def test_5d_cube(self): """ Test that the utility returns the expected data values when a 5d cube is input. """ cube1 = set_up_temperature_cube() height_coord = iris.coords.AuxCoord([5], standard_name="height") cube1.add_aux_coord(height_coord) cube2 = set_up_temperature_cube() height_coord = iris.coords.AuxCoord([10], standard_name="height") cube2.add_aux_coord(height_coord) cubes = iris.cube.CubeList([cube1, cube2]) cube = cubes.merge_cube() data = np.array([[226.15, 230.15, 232.15], [237.4, 241.4, 243.4], [248.65, 252.65, 254.65], [259.9, 263.9, 265.9], [271.15, 275.15, 277.15], [282.4, 286.4, 288.4], [293.65, 297.65, 299.65], [304.9, 308.9, 310.9], [316.15, 320.15, 322.15], [226.15, 230.15, 232.15], [237.4, 241.4, 243.4], [248.65, 252.65, 254.65], [259.9, 263.9, 265.9], [271.15, 275.15, 277.15], [282.4, 286.4, 288.4], [293.65, 297.65, 299.65], [304.9, 308.9, 310.9], [316.15, 320.15, 322.15]]) result = convert_cube_data_to_2d(cube) self.assertArrayAlmostEqual(result, data, decimal=5)
def test_basic_realizations_predictor(self): """ Test that the plugin returns a numpy array. The ensemble realizations are the predictor. """ initial_guess = [5, 1, 0, 1, 1, 1] initial_guess = np.array(initial_guess, dtype=np.float32) cube = set_up_wind_speed_cube() forecast_predictor = cube.copy() forecast_variance = cube.collapsed( "realization", iris.analysis.VARIANCE) truth = cube.collapsed("realization", iris.analysis.MAX) forecast_predictor_data = ( convert_cube_data_to_2d( forecast_predictor).astype(np.float32)) forecast_variance_data = ( forecast_variance.data.flatten().astype(np.float32)) truth_data = truth.data.flatten().astype(np.float32) sqrt_pi = np.sqrt(np.pi).astype(np.float32) predictor_of_mean_flag = "realizations" plugin = Plugin() result = plugin.truncated_normal_crps_minimiser( initial_guess, forecast_predictor_data, truth_data, forecast_variance_data, sqrt_pi, predictor_of_mean_flag) self.assertIsInstance(result, np.float64) self.assertAlmostEqual(result, 533.487612959)
def test_basic_members_predictor(self): """ Test that the plugin returns a numpy float array with ensemble members as predictor. """ initial_guess = [5, 1, 0, 1, 1, 1] initial_guess = np.array(initial_guess, dtype=np.float32) cube = set_up_temperature_cube() forecast_predictor = cube.copy() forecast_variance = cube.collapsed("realization", iris.analysis.VARIANCE) truth = cube.collapsed("realization", iris.analysis.MAX) forecast_predictor_data = ( convert_cube_data_to_2d(forecast_predictor).astype(np.float32)) forecast_variance_data = (forecast_variance.data.flatten().astype( np.float32)) truth_data = truth.data.flatten().astype(np.float32) sqrt_pi = np.sqrt(np.pi).astype(np.float32) predictor_of_mean_flag = "members" plugin = Plugin() result = plugin.normal_crps_minimiser(initial_guess, forecast_predictor_data, truth_data, forecast_variance_data, sqrt_pi, predictor_of_mean_flag) self.assertIsInstance(result, np.float64) self.assertAlmostEqual(result, 4886.94724835)
def test_change_coordinate(self): """ Test that the utility returns the expected data values when the cube is sliced along the longitude dimension. """ data = self.data.flatten().reshape(9, 3).T.reshape(9, 3) result = convert_cube_data_to_2d(self.cube, coord="longitude") self.assertArrayAlmostEqual(result, data)
def test_no_transpose(self): """ Test that the utility returns the expected data values when the cube is not transposed after slicing. """ data = self.data.T result = convert_cube_data_to_2d(self.cube, transpose=False) self.assertArrayAlmostEqual(result, data)
def test_1d_cube(self): """ Test that the utility returns the expected data values when a 1d cube is input. """ cube = set_up_temperature_cube() cube = cube[0, 0, 0, :] data = np.array([[226.15, 237.4, 248.65]]).T result = convert_cube_data_to_2d(cube) self.assertArrayAlmostEqual(result, data, decimal=5)
def test_3d_cube(self): """ Test that the utility returns the expected data values when a 3d cube is input. """ cube = set_up_temperature_cube() cube = cube[0] data = np.array([[226.15, 237.4, 248.65, 259.9, 271.15, 282.4, 293.65, 304.9, 316.15]]).T result = convert_cube_data_to_2d(cube) self.assertArrayAlmostEqual(result, data)
def setUp(self): """Set up expected inputs.""" super().setUp() # Set up cubes and associated data arrays for temperature. self.forecast_predictor_mean = ( self.historic_temperature_forecast_cube.collapsed( "realization", iris.analysis.MEAN)) self.forecast_predictor_realizations = ( self.historic_temperature_forecast_cube.copy()) self.forecast_variance = ( self.historic_temperature_forecast_cube.collapsed( "realization", iris.analysis.VARIANCE)) self.truth = (self.historic_temperature_forecast_cube.collapsed( "realization", iris.analysis.MAX)) self.forecast_predictor_data = ( self.forecast_predictor_mean.data.flatten().astype(np.float64)) self.forecast_predictor_data_realizations = (convert_cube_data_to_2d( self.historic_temperature_forecast_cube.copy()).astype(np.float64)) self.forecast_variance_data = ( self.forecast_variance.data.flatten().astype(np.float64)) self.truth_data = self.truth.data.flatten().astype(np.float64)
def test_check_values(self): """Test that the utility returns the expected data values.""" result = convert_cube_data_to_2d(self.cube) self.assertArrayAlmostEqual(result, self.data)
def test_basic(self): """Test that the utility returns an iris.cube.Cube.""" result = convert_cube_data_to_2d(self.cube) self.assertIsInstance(result, np.ndarray)
def crps_minimiser_wrapper(self, initial_guess, forecast_predictor, truth, forecast_var, predictor_of_mean_flag, distribution): """ Function to pass a given minimisation function to the scipy minimize function to estimate optimised values for the coefficients. Args: initial_guess (List): List of optimised coefficients. Order of coefficients is [c, d, a, b]. forecast_predictor (iris.cube.Cube): Cube containing the fields to be used as the predictor, either the ensemble mean or the ensemble realizations. truth (iris.cube.Cube): Cube containing the field, which will be used as truth. forecast_var (iris.cube.Cube): Cube containg the field containing the ensemble variance. predictor_of_mean_flag (String): String to specify the input to calculate the calibrated mean. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. distribution (String): String used to access the appropriate minimisation function within self.minimisation_dict. Returns: optimised_coeffs (List): List of optimised coefficients. Order of coefficients is [c, d, a, b]. """ def calculate_percentage_change_in_last_iteration(allvecs): """ Calculate the percentage change that has occurred within the last iteration of the minimisation. If the percentage change between the last iteration and the last-but-one iteration exceeds the threshold, a warning message is printed. Args: allvecs : List List of numpy arrays containing the optimised coefficients, after each iteration. """ last_iteration_percentage_change = np.absolute( (allvecs[-1] - allvecs[-2]) / allvecs[-2]) * 100 if (np.any(last_iteration_percentage_change > self.TOLERATED_PERCENTAGE_CHANGE)): np.set_printoptions(suppress=True) msg = ("\nThe final iteration resulted in a percentage change " "that is greater than the accepted threshold of 5% " "i.e. {}. " "\nA satisfactory minimisation has not been achieved. " "\nLast iteration: {}, " "\nLast-but-one iteration: {}" "\nAbsolute difference: {}\n").format( last_iteration_percentage_change, allvecs[-1], allvecs[-2], np.absolute(allvecs[-2] - allvecs[-1])) warnings.warn(msg) try: minimisation_function = self.minimisation_dict[distribution] except KeyError as err: msg = ("Distribution requested {} is not supported in {}" "Error message is {}".format(distribution, self.minimisation_dict, err)) raise KeyError(msg) # Ensure predictor_of_mean_flag is valid. check_predictor_of_mean_flag(predictor_of_mean_flag) if predictor_of_mean_flag.lower() in ["mean"]: forecast_predictor_data = forecast_predictor.data.flatten() truth_data = truth.data.flatten() forecast_var_data = forecast_var.data.flatten() elif predictor_of_mean_flag.lower() in ["realizations"]: truth_data = truth.data.flatten() forecast_predictor = (enforce_coordinate_ordering( forecast_predictor, "realization")) forecast_predictor_data = convert_cube_data_to_2d( forecast_predictor) forecast_var_data = forecast_var.data.flatten() initial_guess = np.array(initial_guess, dtype=np.float32) forecast_predictor_data = forecast_predictor_data.astype(np.float32) forecast_var_data = forecast_var_data.astype(np.float32) truth_data = truth_data.astype(np.float32) sqrt_pi = np.sqrt(np.pi).astype(np.float32) optimised_coeffs = minimize(minimisation_function, initial_guess, args=(forecast_predictor_data, truth_data, forecast_var_data, sqrt_pi, predictor_of_mean_flag), method="Nelder-Mead", options={ "maxiter": self.MAX_ITERATIONS, "return_all": True }) if not optimised_coeffs.success: msg = ("Minimisation did not result in convergence after " "{} iterations. \n{}".format(self.MAX_ITERATIONS, optimised_coeffs.message)) warnings.warn(msg) calculate_percentage_change_in_last_iteration(optimised_coeffs.allvecs) return optimised_coeffs.x
def process(self, initial_guess, forecast_predictor, truth, forecast_var, predictor_of_mean_flag, distribution): """ Function to pass a given function to the scipy minimize function to estimate optimised values for the coefficients. If the predictor_of_mean_flag is the ensemble mean, this function estimates values for alpha, beta, gamma and delta based on the equation: N(alpha + beta * ensemble_mean, gamma + delta * ensemble_variance), where N is a chosen distribution. If the predictor_of_mean_flag is the ensemble realizations, this function estimates values for alpha, beta, gamma and delta based on the equation: .. math:: N(alpha + beta0 * realization0 + beta1 * realization1, gamma + delta * ensemble\\_variance) where N is a chosen distribution and the number of beta terms depends on the number of realizations provided. Args: initial_guess (list): List of optimised coefficients. Order of coefficients is [gamma, delta, alpha, beta]. forecast_predictor (iris.cube.Cube): Cube containing the fields to be used as the predictor, either the ensemble mean or the ensemble realizations. truth (iris.cube.Cube): Cube containing the field, which will be used as truth. forecast_var (iris.cube.Cube): Cube containg the field containing the ensemble variance. predictor_of_mean_flag (str): String to specify the input to calculate the calibrated mean. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. distribution (str): String used to access the appropriate function for use in the minimisation within self.minimisation_dict. Returns: optimised_coeffs (list): List of optimised coefficients. Order of coefficients is [gamma, delta, alpha, beta]. Raises: KeyError: If the distribution is not supported. Warns: Warning: If the minimisation did not converge. """ def calculate_percentage_change_in_last_iteration(allvecs): """ Calculate the percentage change that has occurred within the last iteration of the minimisation. If the percentage change between the last iteration and the last-but-one iteration exceeds the threshold, a warning message is printed. Args: allvecs (list): List of numpy arrays containing the optimised coefficients, after each iteration. Warns: Warning: If a satisfactory minimisation has not been achieved. """ last_iteration_percentage_change = np.absolute( (allvecs[-1] - allvecs[-2]) / allvecs[-2]) * 100 if (np.any(last_iteration_percentage_change > self.TOLERATED_PERCENTAGE_CHANGE)): np.set_printoptions(suppress=True) msg = ("The final iteration resulted in a percentage change " "that is greater than the accepted threshold of 5% " "i.e. {}. " "\nA satisfactory minimisation has not been achieved. " "\nLast iteration: {}, " "\nLast-but-one iteration: {}" "\nAbsolute difference: {}\n").format( last_iteration_percentage_change, allvecs[-1], allvecs[-2], np.absolute(allvecs[-2] - allvecs[-1])) warnings.warn(msg) try: minimisation_function = self.minimisation_dict[distribution] except KeyError as err: msg = ("Distribution requested {} is not supported in {}" "Error message is {}".format(distribution, self.minimisation_dict, err)) raise KeyError(msg) # Ensure predictor_of_mean_flag is valid. check_predictor_of_mean_flag(predictor_of_mean_flag) if predictor_of_mean_flag.lower() == "mean": forecast_predictor_data = forecast_predictor.data.flatten() truth_data = truth.data.flatten() forecast_var_data = forecast_var.data.flatten() elif predictor_of_mean_flag.lower() == "realizations": truth_data = truth.data.flatten() forecast_predictor = (enforce_coordinate_ordering( forecast_predictor, "realization")) forecast_predictor_data = convert_cube_data_to_2d( forecast_predictor) forecast_var_data = forecast_var.data.flatten() # Increased precision is needed for stable coefficient calculation. # The resulting coefficients are cast to float32 prior to output. initial_guess = np.array(initial_guess, dtype=np.float64) forecast_predictor_data = forecast_predictor_data.astype(np.float64) forecast_var_data = forecast_var_data.astype(np.float64) truth_data = truth_data.astype(np.float64) sqrt_pi = np.sqrt(np.pi).astype(np.float64) optimised_coeffs = minimize(minimisation_function, initial_guess, args=(forecast_predictor_data, truth_data, forecast_var_data, sqrt_pi, predictor_of_mean_flag), method="Nelder-Mead", options={ "maxiter": self.max_iterations, "return_all": True }) if not optimised_coeffs.success: msg = ("Minimisation did not result in convergence after " "{} iterations. \n{}".format(self.max_iterations, optimised_coeffs.message)) warnings.warn(msg) calculate_percentage_change_in_last_iteration(optimised_coeffs.allvecs) return optimised_coeffs.x.astype(np.float32)
def _interpolate_percentiles(self, forecast_at_percentiles, desired_percentiles, bounds_pairing, percentile_coord): """ Interpolation of forecast for a set of percentiles from an initial set of percentiles to a new set of percentiles. This is constructed by linearly interpolating between the original set of percentiles to a new set of percentiles. Args: forecast_at_percentiles (Iris CubeList or Iris Cube): Cube or CubeList expected to contain a percentile coordinate. desired_percentiles (Numpy array): Array of the desired percentiles. bounds_pairing (Tuple): Lower and upper bound to be used as the ends of the cumulative distribution function. percentile_coord (String): Name of required percentile coordinate. Returns: percentile_cube (iris cube.Cube): Cube containing values for the required diagnostic e.g. air_temperature at the required percentiles. """ original_percentiles = ( forecast_at_percentiles.coord(percentile_coord).points) # Ensure that the percentile dimension is first, so that the # conversion to a 2d array produces data in the desired order. forecast_at_percentiles = (enforce_coordinate_ordering( forecast_at_percentiles, percentile_coord)) forecast_at_reshaped_percentiles = convert_cube_data_to_2d( forecast_at_percentiles, coord=percentile_coord) original_percentiles, forecast_at_reshaped_percentiles = ( self._add_bounds_to_percentiles_and_forecast_at_percentiles( original_percentiles, forecast_at_reshaped_percentiles, bounds_pairing)) forecast_at_interpolated_percentiles = (np.empty( (len(desired_percentiles), forecast_at_reshaped_percentiles.shape[0]), dtype=np.float32)) for index in range(forecast_at_reshaped_percentiles.shape[0]): forecast_at_interpolated_percentiles[:, index] = np.interp( desired_percentiles, original_percentiles, forecast_at_reshaped_percentiles[index, :]) # Reshape forecast_at_percentiles, so the percentiles dimension is # first, and any other dimension coordinates follow. forecast_at_percentiles_data = (restore_non_probabilistic_dimensions( forecast_at_interpolated_percentiles, forecast_at_percentiles, percentile_coord, len(desired_percentiles))) for template_cube in forecast_at_percentiles.slices_over( percentile_coord): template_cube.remove_coord(percentile_coord) break percentile_cube = create_cube_with_percentiles( desired_percentiles, template_cube, forecast_at_percentiles_data, custom_name=percentile_coord) return percentile_cube
def _apply_params(self, forecast_predictors, forecast_vars): """ Function to apply EMOS coefficients to all required dates. Args: forecast_predictors (iris.cube.Cube): Cube containing the forecast predictor e.g. ensemble mean or ensemble realizations. forecast_vars (iris.cube.Cube): Cube containing the forecast variance e.g. ensemble variance. Returns: (tuple) : tuple containing: **calibrated_forecast_predictor** (iris.cube.Cube): Cube containing the calibrated version of the ensemble predictor, either the ensemble mean or the ensemble realizations. **calibrated_forecast_variance** (iris.cube.Cube): Cube containing the calibrated version of the ensemble variance, either the ensemble mean or the ensemble realizations. """ optimised_coeffs = ( dict(zip(self.coefficients_cube.coord("coefficient_name").points, self.coefficients_cube.data))) # Calculate the predicted mean based on whether the coefficients # were estimated using the mean as the predictor or using the # ensemble realizations as the predictor. if self.predictor_of_mean_flag.lower() == "mean": # Calculate predicted mean = a + b*X, where X is the # raw ensemble mean. In this case, b = beta. a_and_b = [optimised_coeffs["alpha"], optimised_coeffs["beta"]] forecast_predictor_flat = forecast_predictors.data.flatten() col_of_ones = ( np.ones(forecast_predictor_flat.shape, dtype=np.float32)) ones_and_mean = ( np.column_stack((col_of_ones, forecast_predictor_flat))) predicted_mean = np.dot(ones_and_mean, a_and_b) calibrated_forecast_predictor = forecast_predictors elif self.predictor_of_mean_flag.lower() == "realizations": # Calculate predicted mean = a + b*X, where X is the # raw ensemble mean. In this case, b = beta^2. beta_values = np.array([], dtype=np.float32) for key in optimised_coeffs.keys(): if key.startswith("beta"): beta_values = np.append(beta_values, optimised_coeffs[key]) a_and_b = np.append(optimised_coeffs["alpha"], beta_values**2) forecast_predictor_flat = ( convert_cube_data_to_2d(forecast_predictors)) forecast_var_flat = forecast_vars.data.flatten() col_of_ones = np.ones(forecast_var_flat.shape, dtype=np.float32) ones_and_mean = ( np.column_stack((col_of_ones, forecast_predictor_flat))) predicted_mean = np.dot(ones_and_mean, a_and_b) # Calculate mean of ensemble realizations, as only the # calibrated ensemble mean will be returned. calibrated_forecast_predictor = ( forecast_predictors.collapsed( "realization", iris.analysis.MEAN)) xlen = len(forecast_predictors.coord(axis="x").points) ylen = len(forecast_predictors.coord(axis="y").points) calibrated_forecast_predictor.data = ( np.reshape(predicted_mean, (ylen, xlen))) calibrated_forecast_var = forecast_vars # Calculating the predicted variance, based on the # raw variance S^2, where predicted variance = c + dS^2, # where c = (gamma)^2 and d = (delta)^2 calibrated_forecast_var.data = ( optimised_coeffs["gamma"]**2 + optimised_coeffs["delta"]**2 * forecast_vars.data) return calibrated_forecast_predictor, calibrated_forecast_var
def _interpolate_percentiles( self, forecast_at_percentiles, desired_percentiles, bounds_pairing): """ Interpolation of forecast for a set of percentiles from an initial set of percentiles to a new set of percentiles. This is constructed by linearly interpolating between the original set of percentiles to a new set of percentiles. Parameters ---------- forecast_at_percentiles : Iris CubeList or Iris Cube Cube or CubeList expected to contain a percentile coordinate. desired_percentiles : Numpy array Array of the desired percentiles. bounds_pairing : Tuple Lower and upper bound to be used as the ends of the cumulative distribution function. Returns ------- percentile_cube : Iris cube Cube containing values for the required diagnostic e.g. air_temperature at the required percentiles. """ original_percentiles = ( forecast_at_percentiles.coord( "percentile_over_realization").points) # Ensure that the percentile dimension is first, so that the # conversion to a 2d array produces data in the desired order. forecast_at_percentiles = ( ensure_dimension_is_the_zeroth_dimension( forecast_at_percentiles, "percentile_over_realization")) forecast_at_reshaped_percentiles = convert_cube_data_to_2d( forecast_at_percentiles, coord="percentile_over_realization") original_percentiles, forecast_at_reshaped_percentiles = ( self._add_bounds_to_percentiles_and_forecast_at_percentiles( original_percentiles, forecast_at_reshaped_percentiles, bounds_pairing)) forecast_at_interpolated_percentiles = ( np.empty( (len(desired_percentiles), forecast_at_reshaped_percentiles.shape[0]))) for index in range(forecast_at_reshaped_percentiles.shape[0]): forecast_at_interpolated_percentiles[:, index] = np.interp( desired_percentiles, original_percentiles, forecast_at_reshaped_percentiles[index, :]) # Reshape forecast_at_percentiles, so the percentiles dimension is # first, and any other dimension coordinates follow. forecast_at_percentiles_data = ( restore_non_probabilistic_dimensions( forecast_at_interpolated_percentiles, forecast_at_percentiles, "percentile_over_realization", len(desired_percentiles))) for template_cube in forecast_at_percentiles.slices_over( "percentile_over_realization"): template_cube.remove_coord("percentile_over_realization") break percentile_cube = create_cube_with_percentiles( desired_percentiles, template_cube, forecast_at_percentiles_data) return percentile_cube
def compute_initial_guess(self, truth, forecast_predictor, predictor_of_mean_flag, estimate_coefficients_from_linear_model_flag, no_of_realizations=None): """ Function to compute initial guess of the alpha, beta, gamma and delta components of the EMOS coefficients by linear regression of the forecast predictor and the truth, if requested. Otherwise, default values for the coefficients will be used. If the predictor_of_mean_flag is "mean", then the order of the initial_guess is [gamma, delta, alpha, beta]. Otherwise, if the predictor_of_mean_flag is "realizations" then the order of the initial_guess is [gamma, delta, alpha, beta0, beta1, beta2], where the number of beta variables will correspond to the number of realizations. In this example initial guess with three beta variables, there will correspondingly be three realizations. The coefficients relate to adjustments to the ensemble mean or the ensemble realizations, and adjustments to the ensemble variance: .. math:: alpha + beta * ensemble\\_mean or .. math:: alpha + beta0 * realization1 + beta1 * realization2 .. math:: gamma + delta * ensemble\\_variance The default values for the initial guesses are in [gamma, delta, alpha, beta] ordering: * For the ensemble mean, the default initial guess: [0, 1, 0, 1] assumes that the raw forecast is skilful and the expected adjustments are small. * For the ensemble realizations, the default initial guess is effectively: [0, 1, 0, 1/3., 1/3., 1/3.], such that each realization is assumed to have equal weight. If linear regression is enabled, the alpha and beta coefficients associated with the ensemble mean or ensemble realizations are modified based on the results from the linear regression fit. Args: truth (iris.cube.Cube): Cube containing the field, which will be used as truth. forecast_predictor (iris.cube.Cube): Cube containing the fields to be used as the predictor, either the ensemble mean or the ensemble realizations. predictor_of_mean_flag (str): String to specify the input to calculate the calibrated mean. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. estimate_coefficients_from_linear_model_flag (bool): Flag whether coefficients should be estimated from the linear regression, or static estimates should be used. no_of_realizations (int): Number of realizations, if ensemble realizations are to be used as predictors. Default is None. Returns: initial_guess (list): List of coefficients to be used as initial guess. Order of coefficients is [gamma, delta, alpha, beta]. """ if (predictor_of_mean_flag.lower() == "mean" and not estimate_coefficients_from_linear_model_flag): initial_guess = [0, 1, 0, 1] elif (predictor_of_mean_flag.lower() == "realizations" and not estimate_coefficients_from_linear_model_flag): initial_guess = [0, 1, 0] + np.repeat( np.sqrt(1. / no_of_realizations), no_of_realizations).tolist() elif estimate_coefficients_from_linear_model_flag: if predictor_of_mean_flag.lower() == "mean": # Find all values that are not NaN. truth_not_nan = ~np.isnan(truth.data.flatten()) forecast_not_nan = ~np.isnan(forecast_predictor.data.flatten()) combined_not_nan = (np.all(np.row_stack( [truth_not_nan, forecast_not_nan]), axis=0)) if not any(combined_not_nan): gradient, intercept = ([np.nan, np.nan]) else: gradient, intercept, _, _, _ = (stats.linregress( forecast_predictor.data.flatten()[combined_not_nan], truth.data.flatten()[combined_not_nan])) initial_guess = [0, 1, intercept, gradient] elif predictor_of_mean_flag.lower() == "realizations": if self.statsmodels_found: truth_data = truth.data.flatten() forecast_predictor = (enforce_coordinate_ordering( forecast_predictor, "realization")) forecast_data = np.array(convert_cube_data_to_2d( forecast_predictor, transpose=False), dtype=np.float32) # Find all values that are not NaN. truth_not_nan = ~np.isnan(truth_data) forecast_not_nan = ~np.isnan(forecast_data) combined_not_nan = (np.all(np.row_stack( [truth_not_nan, forecast_not_nan]), axis=0)) val = self.sm.add_constant( forecast_data[:, combined_not_nan].T) est = self.sm.OLS(truth_data[combined_not_nan], val).fit() intercept = est.params[0] gradient = est.params[1:] initial_guess = [0, 1, intercept] + gradient.tolist() else: initial_guess = ( [0, 1, 0] + np.repeat(np.sqrt(1. / no_of_realizations), no_of_realizations).tolist()) return np.array(initial_guess, dtype=np.float32)
def compute_initial_guess(self, truth, forecast_predictor, predictor_of_mean_flag, estimate_coefficients_from_linear_model_flag, no_of_realizations=None): """ Function to compute initial guess of the a and beta components of the EMOS coefficients by linear regression of the forecast predictor and the truth, if requested. Otherwise, default values for a and b will be used. Default values have been chosen based on Figure 8 in the 2017 ensemble calibration report available on the Science Plugin Documents Confluence page. Args: truth (Iris cube): Cube containing the field, which will be used as truth. forecast_predictor (Iris cube): Cube containing the fields to be used as the predictor, either the ensemble mean or the ensemble realizations. predictor_of_mean_flag (String): String to specify the input to calculate the calibrated mean. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. estimate_coefficients_from_linear_model_flag (Logical): Flag whether coefficients should be estimated from the linear regression, or static estimates should be used. no_of_realizations (Int): Number of realizations, if ensemble realizations are to be used as predictors. Default is None. Returns: initial_guess (List): List of coefficients to be used as initial guess. Order of coefficients is [c, d, a, b]. """ if (predictor_of_mean_flag.lower() in ["mean"] and not estimate_coefficients_from_linear_model_flag): initial_guess = [1, 1, 0, 1] elif (predictor_of_mean_flag.lower() in ["realizations"] and not estimate_coefficients_from_linear_model_flag): initial_guess = [1, 1, 0] + np.repeat(1, no_of_realizations).tolist() elif estimate_coefficients_from_linear_model_flag: if predictor_of_mean_flag.lower() in ["mean"]: # Find all values that are not NaN. truth_not_nan = ~np.isnan(truth.data.flatten()) forecast_not_nan = ~np.isnan(forecast_predictor.data.flatten()) combined_not_nan = (np.all(np.row_stack( [truth_not_nan, forecast_not_nan]), axis=0)) if not any(combined_not_nan): gradient, intercept = ([np.nan, np.nan]) else: gradient, intercept, _, _, _ = (stats.linregress( forecast_predictor.data.flatten()[combined_not_nan], truth.data.flatten()[combined_not_nan])) initial_guess = [1, 1, intercept, gradient] elif predictor_of_mean_flag.lower() in ["realizations"]: if self.statsmodels_found: truth_data = truth.data.flatten() forecast_predictor = (enforce_coordinate_ordering( forecast_predictor, "realization")) forecast_data = np.array( convert_cube_data_to_2d(forecast_predictor, transpose=False)) # Find all values that are not NaN. truth_not_nan = ~np.isnan(truth_data) forecast_not_nan = ~np.isnan(forecast_data) combined_not_nan = (np.all(np.row_stack( [truth_not_nan, forecast_not_nan]), axis=0)) val = self.sm.add_constant( forecast_data[:, combined_not_nan].T) est = self.sm.OLS(truth_data[combined_not_nan], val).fit() intercept = est.params[0] gradient = est.params[1:] initial_guess = [1, 1, intercept] + gradient.tolist() else: initial_guess = ([1, 1, 0] + np.repeat(1, no_of_realizations).tolist()) return initial_guess
def _apply_params(self, forecast_predictors, forecast_vars, optimised_coeffs, coeff_names, predictor_of_mean_flag): """ Function to apply EMOS coefficients to all required dates. Args: forecast_predictors (Iris cube): Cube containing the forecast predictor e.g. ensemble mean or ensemble realizations. forecast_vars (Iris cube.): Cube containing the forecast variance e.g. ensemble variance. optimised_coeffs (List): Coefficients for all dates. coeff_names (List): Coefficient names. predictor_of_mean_flag (String): String to specify the input to calculate the calibrated mean. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. Returns: (tuple) : tuple containing: **calibrated_forecast_predictor_all_dates** (CubeList): List of cubes containing the calibrated forecast predictor. **calibrated_forecast_var_all_dates** (CubeList): List of cubes containing the calibrated forecast variance. **calibrated_forecast_coefficients_all_dates** (CubeList): List of cubes containing the coefficients used for calibration. """ calibrated_forecast_predictor_all_dates = iris.cube.CubeList() calibrated_forecast_var_all_dates = iris.cube.CubeList() calibrated_forecast_coefficients_all_dates = iris.cube.CubeList() for forecast_predictor, forecast_var in zip( forecast_predictors.slices_over("time"), forecast_vars.slices_over("time")): date = iris_time_to_datetime( forecast_predictor.coord("time").copy())[0] constr = iris.Constraint(time=date) forecast_predictor_at_date = forecast_predictor.extract(constr) forecast_var_at_date = forecast_var.extract(constr) # If the coefficients are not available for the date, use the # raw ensemble forecast as the calibrated ensemble forecast. if date not in optimised_coeffs.keys(): msg = ("Ensemble calibration not available " "for forecasts with start time of {}. " "Coefficients not available".format( date.strftime("%Y%m%d%H%M"))) warnings.warn(msg) calibrated_forecast_predictor_at_date = ( forecast_predictor_at_date.copy()) calibrated_forecast_var_at_date = forecast_var_at_date.copy() optimised_coeffs[date] = np.full(len(coeff_names), np.nan) coeff_cubes = self._create_coefficient_cube( forecast_predictor_at_date, optimised_coeffs, coeff_names) else: optimised_coeffs_at_date = (optimised_coeffs[date]) # Assigning coefficients to coefficient names. if len(optimised_coeffs_at_date) == len(coeff_names): optimised_coeffs_at_date = dict( zip(coeff_names, optimised_coeffs_at_date)) elif len(optimised_coeffs_at_date) > len(coeff_names): excess_beta = ( optimised_coeffs_at_date[len(coeff_names):].tolist()) optimised_coeffs_at_date = (dict( list(zip(coeff_names, optimised_coeffs_at_date)))) optimised_coeffs_at_date["beta"] = np.array( [optimised_coeffs_at_date["beta"]] + excess_beta) else: msg = ("Number of coefficient names {} with names {} " "is not equal to the number of " "optimised_coeffs_at_date values {} " "with values {} or the number of " "coefficients is not greater than the " "number of coefficient names. Can not continue " "if the number of coefficient names out number " "the number of coefficients".format( len(coeff_names), coeff_names, len(optimised_coeffs_at_date), optimised_coeffs_at_date)) raise ValueError(msg) if predictor_of_mean_flag.lower() in ["mean"]: # Calculate predicted mean = a + b*X, where X is the # raw ensemble mean. In this case, b = beta. beta = [ optimised_coeffs_at_date["a"], optimised_coeffs_at_date["beta"] ] forecast_predictor_flat = ( forecast_predictor_at_date.data.flatten()) new_col = np.ones(forecast_predictor_flat.shape) all_data = np.column_stack( (new_col, forecast_predictor_flat)) predicted_mean = np.dot(all_data, beta) calibrated_forecast_predictor_at_date = ( forecast_predictor_at_date) elif predictor_of_mean_flag.lower() in ["realizations"]: # Calculate predicted mean = a + b*X, where X is the # raw ensemble mean. In this case, b = beta^2. beta = np.concatenate([[optimised_coeffs_at_date["a"]], optimised_coeffs_at_date["beta"]**2 ]) forecast_predictor = (enforce_coordinate_ordering( forecast_predictor, "realization")) forecast_predictor_flat = ( convert_cube_data_to_2d(forecast_predictor_at_date)) forecast_var_flat = forecast_var_at_date.data.flatten() new_col = np.ones(forecast_var_flat.shape) all_data = (np.column_stack( (new_col, forecast_predictor_flat))) predicted_mean = np.dot(all_data, beta) # Calculate mean of ensemble realizations, as only the # calibrated ensemble mean will be returned. calibrated_forecast_predictor_at_date = ( forecast_predictor_at_date.collapsed( "realization", iris.analysis.MEAN)) xlen = len(forecast_predictor_at_date.coord(axis="x").points) ylen = len(forecast_predictor_at_date.coord(axis="y").points) predicted_mean = np.reshape(predicted_mean, (ylen, xlen)) calibrated_forecast_predictor_at_date.data = predicted_mean # Calculating the predicted variance, based on the # raw variance S^2, where predicted variance = c + dS^2, # where c = (gamma)^2 and d = (delta)^2 predicted_var = (optimised_coeffs_at_date["gamma"]**2 + optimised_coeffs_at_date["delta"]**2 * forecast_var_at_date.data) calibrated_forecast_var_at_date = forecast_var_at_date calibrated_forecast_var_at_date.data = predicted_var coeff_cubes = self._create_coefficient_cube( calibrated_forecast_predictor_at_date, optimised_coeffs[date], coeff_names) calibrated_forecast_predictor_all_dates.append( calibrated_forecast_predictor_at_date) calibrated_forecast_var_all_dates.append( calibrated_forecast_var_at_date) calibrated_forecast_coefficients_all_dates.extend(coeff_cubes) return (calibrated_forecast_predictor_all_dates, calibrated_forecast_var_all_dates, calibrated_forecast_coefficients_all_dates)
def _probabilities_to_percentiles(self, forecast_probabilities, percentiles, bounds_pairing): """ Conversion of probabilities to percentiles through the construction of an cumulative distribution function. This is effectively constructed by linear interpolation from the probabilities associated with each threshold to a set of percentiles. Args: forecast_probabilities (Iris cube): Cube with a threshold coordinate. percentiles (Numpy array): Array of percentiles, at which the corresponding values will be calculated. bounds_pairing (Tuple): Lower and upper bound to be used as the ends of the cumulative distribution function. Returns: percentile_cube (Iris cube): Cube containing values for the required diagnostic e.g. air_temperature at the required percentiles. """ threshold_coord = forecast_probabilities.coord("threshold") threshold_unit = forecast_probabilities.coord("threshold").units threshold_points = threshold_coord.points # Ensure that the percentile dimension is first, so that the # conversion to a 2d array produces data in the desired order. forecast_probabilities = (enforce_coordinate_ordering( forecast_probabilities, threshold_coord.name())) prob_slices = convert_cube_data_to_2d(forecast_probabilities, coord=threshold_coord.name()) # The requirement below for a monotonically changing probability # across thresholds can be thwarted by precision errors of order 1E-10, # as such, here we round to a precision of 9 decimal places. prob_slices = np.around(prob_slices, 9) # Invert probabilities for data thresholded above thresholds. relation = forecast_probabilities.attributes['relative_to_threshold'] if relation == 'above': probabilities_for_cdf = 1 - prob_slices elif relation == 'below': probabilities_for_cdf = prob_slices else: msg = ("Probabilities to percentiles only implemented for " "thresholds above or below a given value." "The relation to threshold is given as {}".format(relation)) raise NotImplementedError(msg) threshold_points, probabilities_for_cdf = ( self._add_bounds_to_thresholds_and_probabilities( threshold_points, probabilities_for_cdf, bounds_pairing)) if np.any(np.diff(probabilities_for_cdf) < 0): msg = ("The probability values used to construct the " "Cumulative Distribution Function (CDF) " "must be ascending i.e. in order to yield " "a monotonically increasing CDF." "The probabilities are {}".format(probabilities_for_cdf)) warnings.warn(msg) # Convert percentiles into fractions. percentiles = np.array([x / 100.0 for x in percentiles], dtype=np.float32) forecast_at_percentiles = (np.empty( (len(percentiles), probabilities_for_cdf.shape[0]), dtype=np.float32)) for index in range(probabilities_for_cdf.shape[0]): forecast_at_percentiles[:, index] = np.interp( percentiles, probabilities_for_cdf[index, :], threshold_points) # Convert percentiles back into percentages. percentiles = np.array([x * 100.0 for x in percentiles], dtype=np.float32) # Reshape forecast_at_percentiles, so the percentiles dimension is # first, and any other dimension coordinates follow. forecast_at_percentiles = (restore_non_probabilistic_dimensions( forecast_at_percentiles, forecast_probabilities, threshold_coord.name(), len(percentiles))) for template_cube in forecast_probabilities.slices_over( threshold_coord.name()): template_cube.rename(template_cube.name().replace( "probability_of_", "")) template_cube.remove_coord(threshold_coord.name()) template_cube.attributes.pop('relative_to_threshold') break percentile_cube = create_cube_with_percentiles( percentiles, template_cube, forecast_at_percentiles, custom_name='percentile', cube_unit=threshold_unit) return percentile_cube
def _probabilities_to_percentiles(self, forecast_probabilities, percentiles, bounds_pairing): """ Conversion of probabilities to percentiles through the construction of an cumulative distribution function. This is effectively constructed by linear interpolation from the probabilities associated with each threshold to a set of percentiles. Parameters ---------- forecast_probabilities : Iris cube Cube with a threshold coordinate. percentiles : Numpy array Array of percentiles, at which the corresponding values will be calculated. bounds_pairing : Tuple Lower and upper bound to be used as the ends of the cumulative distribution function. Returns ------- percentile_cube : Iris cube Cube containing values for the required diagnostic e.g. air_temperature at the required percentiles. """ threshold_coord = forecast_probabilities.coord("threshold") threshold_points = threshold_coord.points # Ensure that the percentile dimension is first, so that the # conversion to a 2d array produces data in the desired order. forecast_probabilities = (ensure_dimension_is_the_zeroth_dimension( forecast_probabilities, threshold_coord.name())) prob_slices = convert_cube_data_to_2d(forecast_probabilities, coord=threshold_coord.name()) # Invert probabilities probabilities_for_cdf = 1 - prob_slices threshold_points, probabilities_for_cdf = ( self._add_bounds_to_thresholds_and_probabilities( threshold_points, probabilities_for_cdf, bounds_pairing)) if np.any(np.diff(probabilities_for_cdf) < 0): msg = ("The probability values used to construct the " "Cumulative Distribution Function (CDF) " "must be ascending i.e. in order to yield " "a monotonically increasing CDF." "The probabilities are {}".format(probabilities_for_cdf)) raise ValueError(msg) # Convert percentiles into fractions. percentiles = [x / 100.0 for x in percentiles] forecast_at_percentiles = (np.empty( (len(percentiles), probabilities_for_cdf.shape[0]))) for index in range(probabilities_for_cdf.shape[0]): forecast_at_percentiles[:, index] = np.interp( percentiles, probabilities_for_cdf[index, :], threshold_points) # Convert percentiles back into percentages. percentiles = [x * 100.0 for x in percentiles] # Reshape forecast_at_percentiles, so the percentiles dimension is # first, and any other dimension coordinates follow. forecast_at_percentiles = (restore_non_probabilistic_dimensions( forecast_at_percentiles, forecast_probabilities, threshold_coord.name(), len(percentiles))) for template_cube in forecast_probabilities.slices_over( threshold_coord.name()): template_cube.remove_coord(threshold_coord.name()) break percentile_cube = create_cube_with_percentiles( percentiles, template_cube, forecast_at_percentiles) return percentile_cube