def test_invalid_predictor(self): """ Test that the utility raises an exception when predictor = "foo", a name not present in the list of accepted values for the predictor. """ msg = "The requested value for the predictor" with self.assertRaisesRegex(ValueError, msg): check_predictor("foo")
def test_lowercasing(self): """ Test that the result has been lowercased. """ expected = "mean" result = check_predictor("MeaN") self.assertEqual(result, expected)
def test_realizations(self): """ Test that the result is lowercase and an exception is not raised when predictor = "realizations". """ expected = "realizations" result = check_predictor("realizations") self.assertEqual(result, expected)
def __init__(self, predictor="mean"): """ Create a plugin that uses the coefficients created using EMOS from historical forecasts and corresponding truths and applies these coefficients to the current forecast to generate a location and scale parameter that represents the calibrated distribution. Args: predictor (str): String to specify the form of the predictor used to calculate the location parameter when estimating the EMOS coefficients. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. """ check_predictor(predictor) self.predictor = predictor
def test_realizations(): """ Test that the utility does not raise an exception when predictor = "realizations". """ check_predictor("realizations")
def test_mean(): """ Test that the utility does not raise an exception when predictor = "mean". """ check_predictor("mean")
def process(self, historic_forecast, truth, landsea_mask=None): """ Using Nonhomogeneous Gaussian Regression/Ensemble Model Output Statistics, estimate the required coefficients from historical forecasts. The main contents of this method is: 1. Check that the predictor is valid. 2. Filter the historic forecasts and truth to ensure that these inputs match in validity time. 3. Apply unit conversion to ensure that the historic forecasts and truth have the desired units for calibration. 4. Calculate the variance of the historic forecasts. If the chosen predictor is the mean, also calculate the mean of the historic forecasts. 5. If a land-sea mask is provided then mask out sea points in the truth and predictor from the historic forecasts. 6. Calculate initial guess at coefficient values by performing a linear regression, if requested, otherwise default values are used. 7. Perform minimisation. Args: historic_forecast (iris.cube.Cube): The cube containing the historical forecasts used for calibration. truth (iris.cube.Cube): The cube containing the truth used for calibration. landsea_mask (iris.cube.Cube): The optional cube containing a land-sea mask. If provided, only land points are used to calculate the coefficients. Within the land-sea mask cube land points should be specified as ones, and sea points as zeros. Returns: iris.cube.Cube: Cube containing the coefficients estimated using EMOS. The cube contains a coefficient_index dimension coordinate and a coefficient_name auxiliary coordinate. Raises: ValueError: If either the historic_forecast or truth cubes were not passed in. ValueError: If the units of the historic and truth cubes do not match. """ if not (historic_forecast and truth): raise ValueError("historic_forecast and truth cubes must be " "provided.") # Ensure predictor is valid. check_predictor(self.predictor) historic_forecast, truth = ( filter_non_matching_cubes(historic_forecast, truth)) # Make sure inputs have the same units. if self.desired_units: historic_forecast.convert_units(self.desired_units) truth.convert_units(self.desired_units) if historic_forecast.units != truth.units: msg = ("The historic forecast units of {} do not match " "the truth units {}. These units must match, so that " "the coefficients can be estimated.") raise ValueError(msg) if self.predictor.lower() == "mean": no_of_realizations = None forecast_predictor = collapsed( historic_forecast, "realization", iris.analysis.MEAN) elif self.predictor.lower() == "realizations": no_of_realizations = len( historic_forecast.coord("realization").points) forecast_predictor = historic_forecast forecast_var = collapsed( historic_forecast, "realization", iris.analysis.VARIANCE) # If a landsea_mask is provided mask out the sea points if landsea_mask: self.mask_cube(forecast_predictor, landsea_mask) self.mask_cube(forecast_var, landsea_mask) self.mask_cube(truth, landsea_mask) # Computing initial guess for EMOS coefficients initial_guess = self.compute_initial_guess( truth, forecast_predictor, self.predictor, self.ESTIMATE_COEFFICIENTS_FROM_LINEAR_MODEL_FLAG, no_of_realizations=no_of_realizations) # Calculate coefficients if there are no nans in the initial guess. if np.any(np.isnan(initial_guess)): optimised_coeffs = initial_guess else: optimised_coeffs = ( self.minimiser( initial_guess, forecast_predictor, truth, forecast_var, self.predictor, self.distribution.lower())) coefficients_cube = ( self.create_coefficients_cube(optimised_coeffs, historic_forecast)) return coefficients_cube
def __init__(self, distribution, current_cycle, desired_units=None, predictor="mean", tolerance=0.01, max_iterations=1000): """ Create an ensemble calibration plugin that, for Nonhomogeneous Gaussian Regression, calculates coefficients based on historical forecasts and applies the coefficients to the current forecast. Further information is available in the :mod:`module level docstring \ <improver.calibration.ensemble_calibration>`. Args: distribution (str): Name of distribution. Assume that a calibrated version of the current forecast could be represented using this distribution. current_cycle (str): The current cycle in YYYYMMDDTHHMMZ format e.g. 20171122T0100Z. This is used to create a forecast_reference_time coordinate on the resulting EMOS coefficients cube. desired_units (str or cf_units.Unit): The unit that you would like the calibration to be undertaken in. The current forecast, historical forecast and truth will be converted as required. predictor (str): String to specify the form of the predictor used to calculate the location parameter when estimating the EMOS coefficients. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. tolerance (float): The tolerance for the Continuous Ranked Probability Score (CRPS) calculated by the minimisation. The CRPS is in the units of the variable being calibrated. The tolerance is therefore representative of how close to the actual value are we aiming to forecast for a particular variable. Once multiple iterations result in a CRPS equal to the same value within the specified tolerance, the minimisation will terminate. max_iterations (int): The maximum number of iterations allowed until the minimisation has converged to a stable solution. If the maximum number of iterations is reached, but the minimisation has not yet converged to a stable solution, then the available solution is used anyway, and a warning is raised. If the predictor_of_mean is "realizations", then the number of iterations may require increasing, as there will be more coefficients to solve for. Raises: ValueError: If the given distribution is not valid. Warns: ImportWarning: If the statsmodels module can't be imported. """ valid_distributions = (ContinuousRankedProbabilityScoreMinimisers(). minimisation_dict.keys()) if distribution not in valid_distributions: msg = ("Given distribution {} not available. Available " "distributions are {}".format( distribution, valid_distributions)) raise ValueError(msg) self.distribution = distribution self.current_cycle = current_cycle self.desired_units = desired_units # Ensure predictor is valid. check_predictor(predictor) self.predictor = predictor self.tolerance = tolerance self.max_iterations = max_iterations self.minimiser = ContinuousRankedProbabilityScoreMinimisers( tolerance=self.tolerance, max_iterations=self.max_iterations) # Setting default values for coeff_names. Beta is the final # coefficient name in the list, as there can potentially be # multiple beta coefficients if the ensemble realizations, rather # than the ensemble mean, are provided as the predictor. self.coeff_names = ["gamma", "delta", "alpha", "beta"] import imp try: imp.find_module('statsmodels') except ImportError: statsmodels_found = False if predictor.lower() == "realizations": msg = ( "The statsmodels can not be imported. " "Will not be able to calculate an initial guess from " "the individual ensemble realizations. " "A default initial guess will be used without " "estimating coefficients from a linear model.") warnings.warn(msg, ImportWarning) else: statsmodels_found = True import statsmodels.api as sm self.sm = sm self.statsmodels_found = statsmodels_found
def process( self, initial_guess, forecast_predictor, truth, forecast_var, predictor, distribution): """ Function to pass a given function to the scipy minimize function to estimate optimised values for the coefficients. Further information is available in the :mod:`module level docstring \ <improver.calibration.ensemble_calibration>`. Args: initial_guess (list): List of optimised coefficients. Order of coefficients is [gamma, delta, alpha, beta]. forecast_predictor (iris.cube.Cube): Cube containing the fields to be used as the predictor, either the ensemble mean or the ensemble realizations. truth (iris.cube.Cube): Cube containing the field, which will be used as truth. forecast_var (iris.cube.Cube): Cube containing the field containing the ensemble variance. predictor (str): String to specify the form of the predictor used to calculate the location parameter when estimating the EMOS coefficients. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. distribution (str): String used to access the appropriate function for use in the minimisation within self.minimisation_dict. Returns: list of float: List of optimised coefficients. Order of coefficients is [gamma, delta, alpha, beta]. Raises: KeyError: If the distribution is not supported. Warns: Warning: If the minimisation did not converge. """ def calculate_percentage_change_in_last_iteration(allvecs): """ Calculate the percentage change that has occurred within the last iteration of the minimisation. If the percentage change between the last iteration and the last-but-one iteration exceeds the threshold, a warning message is printed. Args: allvecs (list): List of numpy arrays containing the optimised coefficients, after each iteration. Warns: Warning: If a satisfactory minimisation has not been achieved. """ last_iteration_percentage_change = np.absolute( (allvecs[-1] - allvecs[-2]) / allvecs[-2])*100 if (np.any(last_iteration_percentage_change > self.TOLERATED_PERCENTAGE_CHANGE)): np.set_printoptions(suppress=True) msg = ("The final iteration resulted in a percentage change " "that is greater than the accepted threshold of 5% " "i.e. {}. " "\nA satisfactory minimisation has not been achieved. " "\nLast iteration: {}, " "\nLast-but-one iteration: {}" "\nAbsolute difference: {}\n").format( last_iteration_percentage_change, allvecs[-1], allvecs[-2], np.absolute(allvecs[-2]-allvecs[-1])) warnings.warn(msg) try: minimisation_function = self.minimisation_dict[distribution] except KeyError as err: msg = ("Distribution requested {} is not supported in {}" "Error message is {}".format( distribution, self.minimisation_dict, err)) raise KeyError(msg) # Ensure predictor is valid. check_predictor(predictor) # Flatten the data arrays and remove any missing data. truth_data = flatten_ignoring_masked_data(truth.data) forecast_var_data = flatten_ignoring_masked_data(forecast_var.data) if predictor.lower() == "mean": forecast_predictor_data = flatten_ignoring_masked_data( forecast_predictor.data) elif predictor.lower() == "realizations": enforce_coordinate_ordering(forecast_predictor, "realization") # Need to transpose this array so there are columns for each # ensemble member rather than rows. forecast_predictor_data = flatten_ignoring_masked_data( forecast_predictor.data, preserve_leading_dimension=True).T # Increased precision is needed for stable coefficient calculation. # The resulting coefficients are cast to float32 prior to output. initial_guess = np.array(initial_guess, dtype=np.float64) forecast_predictor_data = forecast_predictor_data.astype(np.float64) forecast_var_data = forecast_var_data.astype(np.float64) truth_data = truth_data.astype(np.float64) sqrt_pi = np.sqrt(np.pi).astype(np.float64) optimised_coeffs = minimize( minimisation_function, initial_guess, args=(forecast_predictor_data, truth_data, forecast_var_data, sqrt_pi, predictor), method="Nelder-Mead", tol=self.tolerance, options={"maxiter": self.max_iterations, "return_all": True}) if not optimised_coeffs.success: msg = ("Minimisation did not result in convergence after " "{} iterations. \n{}".format( self.max_iterations, optimised_coeffs.message)) warnings.warn(msg) calculate_percentage_change_in_last_iteration(optimised_coeffs.allvecs) return optimised_coeffs.x.astype(np.float32)