def test_alternative_percentiles(self): """Test that the calibrated forecast is at a specified set of percentiles.""" result = ApplyEMOS(percentiles=self.alternative_percentiles)( self.percentiles, self.coefficients, realizations_count=3) self.assertArrayEqual( result.coord("percentile").points, self.alternative_percentiles)
def test_alternative_string_percentiles(self): """Test that the calibrated forecast is at a specified set of percentiles where the input percentiles are strings.""" str_percentiles = list(map(str, self.alternative_percentiles)) result = ApplyEMOS(percentiles=str_percentiles)(self.percentiles, self.coefficients, realizations_count=3) self.assertArrayEqual( result.coord("percentile").points, self.alternative_percentiles)
def test_null_probabilities(self): """Test effect of "neutral" emos coefficients in probability space. Mean, 0 and 1 probabilities are not preserved.""" expected_data = np.array([ np.full((3, 3), 0.9999999), np.full((3, 3), 0.9452005), np.full((3, 3), 0.02274995), ]) result = ApplyEMOS()(self.probabilities, self.coefficients, realizations_count=3) self.assertIn("probability_of", result.name()) self.assertArrayAlmostEqual(result.data, expected_data)
def test_percentiles_in_probabilities_out(self): """Test effect of "neutral" emos coefficients in percentile space (this is small but non-zero due to limited sampling of the distribution)""" expected_data = np.array( [np.ones((3, 3)), np.full((3, 3), 0.977250), np.full((3, 3), 0.001350)] ) result = ApplyEMOS()( self.percentiles, self.coefficients, realizations_count=3, prob_template=self.probabilities, ) self.assertIn("probability_of", result.name()) self.assertArrayAlmostEqual(result.data, expected_data)
def test_invalid_attribute(self): """Test that an exception is raised if multiple different distribution attributes are provided within the coefficients cubelist.""" self.coefficients[0].attributes["distribution"] = "truncnorm" msg = "Coefficients must share the same" with self.assertRaisesRegex(AttributeError, msg): ApplyEMOS()(self.percentiles, self.coefficients, realizations_count=3)
def test_additional_predictor(self): """Test providing an additional predictor.""" altitude = set_up_variable_cube( np.ones((3, 3), dtype=np.float32), name="surface_altitude", units="m" ) for coord in ["time", "forecast_reference_time", "forecast_period"]: altitude.remove_coord(coord) coefficients = build_coefficients_cubelist( self.realizations, [0, [0.9, 0.1], 0, 1], CubeList([self.realizations, altitude]), ) expected_data = np.array( [ np.full((3, 3), 9.325102), np.full((3, 3), 9.46), np.full((3, 3), 9.594898), ] ) result = ApplyEMOS()( self.percentiles, coefficients, additional_fields=CubeList([altitude]), realizations_count=3, ) self.assertArrayAlmostEqual(result.data, expected_data)
def test_missing_attribute(self): """Test that an exception is raised if the expected distribution attribute is missing from within the coefficients cubelist.""" self.coefficients[0].attributes.pop("distribution") msg = "Coefficients must share the same" with self.assertRaisesRegex(AttributeError, msg): ApplyEMOS()(self.percentiles, self.coefficients, realizations_count=3)
def test_completely_missing_attribute(self): """Test that an exception is raised if the expected distribution attribute is missing from all cubes within the coefficients cubelist.""" for cube in self.coefficients: cube.attributes.pop("distribution") msg = "The distribution attribute must be specified on all coefficients cubes." with self.assertRaisesRegex(AttributeError, msg): ApplyEMOS()(self.percentiles, self.coefficients, realizations_count=3)
def test_realizations_additional_predictor_at_sites(self): """Test providing an additional predictor for site forecasts.""" expected_data = np.tile([2.44, 3.17, 4.26, 4.99], (4, 1)) result = ApplyEMOS()( self.realizations_spot_cube, self.spot_coefficients, additional_fields=CubeList([self.spot_altitude_cube]), realizations_count=3, ) self.assertArrayAlmostEqual(result.data, expected_data)
def test_null_percentiles(self): """Test effect of "neutral" emos coefficients in percentile space (this is small but non-zero due to limited sampling of the distribution)""" result = ApplyEMOS()(self.percentiles, self.coefficients, realizations_count=3) self.assertIn("percentile", get_dim_coord_names(result)) self.assertArrayAlmostEqual(result.data, self.null_percentiles_expected) self.assertAlmostEqual( np.mean(result.data), self.null_percentiles_expected_mean )
def test_null_realizations(self): """Test effect of "neutral" emos coefficients in realization space""" expected_mean = np.mean(self.realizations.data) expected_data = np.array([ np.full((3, 3), 10.433333), np.full((3, 3), 10.670206), np.full((3, 3), 10.196461), ]) result = ApplyEMOS()(self.realizations, self.coefficients) self.assertIn("realization", get_dim_coord_names(result)) self.assertArrayAlmostEqual(result.data, expected_data) self.assertAlmostEqual(np.mean(result.data), expected_mean)
def test_additional_predictor_site_mismatch(self): """Test for a mismatch in sites between the forecast and the additional predictor.""" spot_altitude_cube = self.spot_altitude_cube[1:] msg = "The forecast and additional predictors.*The mismatching sites are.*03001" with self.assertRaisesRegex(ValueError, msg): ApplyEMOS()( self.realizations_spot_cube, self.spot_coefficients, additional_fields=CubeList([spot_altitude_cube]), realizations_count=3, )
def test_land_sea_mask_input_output_format(self): """Test that an exception is raised if a land-sea mask is supplied whilst also requesting a different output format in comparison to the input.""" msg = "If supplying a land-sea mask" with self.assertRaisesRegex(ValueError, msg): ApplyEMOS()( self.percentiles, self.coefficients, realizations_count=3, land_sea_mask=self.land_sea_mask, prob_template=self.probabilities, )
def test_null_percentiles_frt_fp_mismatch(self): """Test effect of "neutral" emos coefficients in percentile space where the forecast is 15 minutes ahead of the coefficients in terms of the forecast reference time.""" percentiles = self.percentiles.copy() mins_15_to_secs = 900 percentiles.coord("forecast_reference_time").points = ( percentiles.coord("forecast_reference_time").points + mins_15_to_secs ) percentiles.coord("forecast_period").points = ( percentiles.coord("forecast_period").points - mins_15_to_secs ) expected_frt = percentiles.coord("forecast_reference_time").points expected_fp = percentiles.coord("forecast_period").points result = ApplyEMOS()(percentiles, self.coefficients, realizations_count=3) self.assertAlmostEqual( result.coord("forecast_reference_time").points, expected_frt ) self.assertAlmostEqual(result.coord("forecast_period").points, expected_fp) self.assertArrayAlmostEqual(result.data, self.null_percentiles_expected) self.assertAlmostEqual( np.mean(result.data), self.null_percentiles_expected_mean )
def test_spread(self): """Test emos coefficients that correct underspread""" self.coefficients.data = [1, 1, 0, 1] expected_mean = np.mean(self.percentiles.data) expected_data = np.array([ np.full((3, 3), 9.7121525), np.full((3, 3), 10.4), np.full((3, 3), 11.087847), ]) result = ApplyEMOS()(self.percentiles, self.coefficients, realizations_count=3) self.assertArrayAlmostEqual(result.data, expected_data) self.assertAlmostEqual(np.mean(result.data), expected_mean)
def test_bias(self): """Test emos coefficients that correct a bias""" self.coefficients.data = [0, 1, 1, 1] expected_mean = np.mean(self.percentiles.data + 1.0) expected_data = np.array([ np.full((3, 3), 11.265101), np.full((3, 3), 11.4), np.full((3, 3), 11.534898), ]) result = ApplyEMOS()(self.percentiles, self.coefficients, realizations_count=3) self.assertArrayAlmostEqual(result.data, expected_data) self.assertAlmostEqual(np.mean(result.data), expected_mean)
def test_null_percentiles_truncnorm_standard_shape_parameters(self): """Test effect of "neutral" emos coefficients in percentile space (this is small but non-zero due to limited sampling of the distribution) for the truncated normal distribution.""" coefficients = iris.cube.CubeList([]) for cube in self.coefficients: cube.attributes["distribution"] = "truncnorm" cube.attributes["shape_parameters"] = np.array([0, np.inf], np.float32) coefficients.append(cube) result = ApplyEMOS()(self.percentiles, coefficients, realizations_count=3) self.assertIn("percentile", get_dim_coord_names(result)) self.assertArrayAlmostEqual(result.data, self.null_percentiles_expected) self.assertAlmostEqual( np.mean(result.data), self.null_percentiles_expected_mean )
def test_null_percentiles(self): """Test effect of "neutral" emos coefficients in percentile space (this is small but non-zero due to limited sampling of the distribution)""" expected_mean = np.mean(self.percentiles.data) expected_data = np.array([ np.full((3, 3), 10.265101), np.full((3, 3), 10.4), np.full((3, 3), 10.534898), ]) result = ApplyEMOS()(self.percentiles, self.coefficients, realizations_count=3) self.assertIn("percentile", get_dim_coord_names(result)) self.assertArrayAlmostEqual(result.data, expected_data) self.assertAlmostEqual(np.mean(result.data), expected_mean)
def test_land_sea_mask(self): """Test that coefficients can be effectively applied to "land" points only""" # update the "gamma" value self.coefficients[2].data = 1 expected_data_slice = np.array([ [9.7121525, 9.7121525, 10.2], [9.7121525, 9.7121525, 10.2], [9.7121525, 10.2, 10.2], ]) result = ApplyEMOS()( self.percentiles, self.coefficients, land_sea_mask=self.land_sea_mask, realizations_count=3, ) self.assertArrayAlmostEqual(result.data[0], expected_data_slice)
def test_land_sea_mask(self): """Test that coefficients can be effectively applied to "land" points only""" land_sea_data = np.array([[1, 1, 0], [1, 1, 0], [1, 0, 0]], dtype=np.int32) land_sea_mask = set_up_variable_cube(land_sea_data, name="land_binary_mask", units="1") self.coefficients.data = [1, 1, 0, 1] expected_data_slice = np.array([ [9.7121525, 9.7121525, 10.2], [9.7121525, 9.7121525, 10.2], [9.7121525, 10.2, 10.2], ]) result = ApplyEMOS()( self.percentiles, self.coefficients, land_sea_mask=land_sea_mask, realizations_count=3, ) self.assertArrayAlmostEqual(result.data[0], expected_data_slice)
def test_null_percentiles_truncnorm_alternative_shape_parameters(self): """Test effect of "neutral" emos coefficients in percentile space (this is small but non-zero due to limited sampling of the distribution) for the truncated normal distribution with alternative shape parameters to show the truncnorm distribution having an effect.""" coefficients = iris.cube.CubeList([]) for cube in self.coefficients: cube.attributes["distribution"] = "truncnorm" cube.attributes["shape_parameters"] = np.array([10, np.inf], np.float32) coefficients.append(cube) expected_mean = np.mean(self.percentiles.data) expected_data = np.array( [ np.full((3, 3), 10.275656), np.full((3, 3), 10.405704), np.full((3, 3), 10.5385), ] ) result = ApplyEMOS()(self.percentiles, coefficients, realizations_count=3) self.assertIn("percentile", get_dim_coord_names(result)) self.assertArrayAlmostEqual(result.data, expected_data) self.assertNotAlmostEqual(np.mean(result.data), expected_mean)
def process( *cubes: cli.inputcubelist, realizations_count: int = None, randomise=False, random_seed: int = None, ignore_ecc_bounds=False, tolerate_time_mismatch=False, predictor="mean", land_sea_mask_name: str = None, percentiles: cli.comma_separated_list = None, ): """Applying coefficients for Ensemble Model Output Statistics. Load in arguments for applying coefficients for Ensemble Model Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian Regression (NGR). The coefficients are applied to the forecast that is supplied, so as to calibrate the forecast. The calibrated forecast is written to a cube. If no coefficients are provided the input forecast is returned unchanged. Args: input_cubes (iris.cube.CubeList): A list of cubes containing: - A Cube containing the forecast to be calibrated. The input format could be either realizations, probabilities or percentiles. - A cubelist containing the coefficients used for calibration or None. - A cubelist containing the coefficients used for calibration or None. If none then the input, or probability template if provided, is returned unchanged. - Optionally, cubes representing static additional predictors. These static additional predictors are expected not to have a time coordinate. - Optionally, a cube containing the land-sea mask on the same domain as the forecast that is to be calibrated. Land points are specified by ones and sea points are specified by zeros. The presence of a land-sea mask will enable land-only calibration, in which sea points are returned without the application of calibration. If a land-sea mask is provided, the land_sea_mask_name must also be provided, in order to identify the land-sea mask. - Optionally, a cube containing a probability forecast that will be used as a template when generating probability output when the input format of the forecast cube is not probabilities i.e. realizations or percentiles. If no coefficients are provided and a probability template is provided, the probability template forecast will be returned as the uncalibrated probability forecast. realizations_count (int): Option to specify the number of ensemble realizations that will be created from probabilities or percentiles when applying the EMOS coefficients. randomise (bool): Option to reorder the post-processed forecasts randomly. If not set, the ordering of the raw ensemble is used. This option is only valid when the input format is realizations. random_seed (int): Option to specify a value for the random seed for testing purposes, otherwise the default random seen behaviour is utilised. The random seed is used in the generation of the random numbers used for either the randomise option to order the input percentiles randomly, rather than use the ordering from the raw ensemble, or for splitting tied values within the raw ensemble, so that the values from the input percentiles can be ordered to match the raw ensemble. ignore_ecc_bounds (bool): If True, where the percentiles exceed the ECC bounds range, raises a warning rather than an exception. This occurs when the current forecasts is in the form of probabilities and is converted to percentiles, as part of converting the input probabilities into realizations. tolerate_time_mismatch (bool): If True, tolerate a mismatch in validity time and forecast period for coefficients vs forecasts. Use with caution! predictor (str): String to specify the form of the predictor used to calculate the location parameter when estimating the EMOS coefficients. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. land_sea_mask_name (str): Name of the land-sea mask cube. This must be provided if a land-sea mask is provided within the list of input cubes, in order to identify the land-sea mask. Providing the land-sea mask ensures that only land points will be calibrated. percentiles (List[float]): The set of percentiles used to create the calibrated forecast. Returns: iris.cube.Cube: The calibrated forecast cube. """ import warnings import numpy as np from improver.calibration import split_forecasts_and_coeffs from improver.calibration.ensemble_calibration import ApplyEMOS from improver.ensemble_copula_coupling.ensemble_copula_coupling import ( ResamplePercentiles, ) ( forecast, coefficients, additional_predictors, land_sea_mask, prob_template, ) = split_forecasts_and_coeffs(cubes, land_sea_mask_name) if coefficients is None: if prob_template: msg = ( "There are no coefficients provided for calibration. As a " "probability template has been provided with the aim of " "creating a calibrated probability forecast, the probability " "template will be returned as the uncalibrated probability " "forecast." ) warnings.warn(msg) return prob_template if percentiles: percentiles = [np.float32(p) for p in percentiles] forecast = ResamplePercentiles(ecc_bounds_warning=ignore_ecc_bounds)( forecast, percentiles=percentiles ) msg = ( "There are no coefficients provided for calibration. The " "uncalibrated forecast will be returned." ) warnings.warn(msg) return forecast calibration_plugin = ApplyEMOS(percentiles=percentiles) result = calibration_plugin( forecast, coefficients, additional_fields=additional_predictors, land_sea_mask=land_sea_mask, prob_template=prob_template, realizations_count=realizations_count, ignore_ecc_bounds=ignore_ecc_bounds, tolerate_time_mismatch=tolerate_time_mismatch, predictor=predictor, randomise=randomise, random_seed=random_seed, ) return result
def process( cube: cli.inputcube, coefficients: inputcoeffs = None, land_sea_mask: cli.inputcube = None, *, realizations_count: int = None, randomise=False, random_seed: int = None, ignore_ecc_bounds=False, predictor="mean", ): """Applying coefficients for Ensemble Model Output Statistics. Load in arguments for applying coefficients for Ensemble Model Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian Regression (NGR). The coefficients are applied to the forecast that is supplied, so as to calibrate the forecast. The calibrated forecast is written to a cube. If no coefficients are provided the input forecast is returned unchanged. Args: cube (iris.cube.Cube): A Cube containing the forecast to be calibrated. The input format could be either realizations, probabilities or percentiles. coefficients (iris.cube.CubeList): A cubelist containing the coefficients used for calibration or None. If none then then input is returned unchanged. land_sea_mask (iris.cube.Cube): A cube containing the land-sea mask on the same domain as the forecast that is to be calibrated. Land points are " "specified by ones and sea points are specified by zeros. " "If not None this argument will enable land-only calibration, in " "which sea points are returned without the application of " "calibration." realizations_count (int): Option to specify the number of ensemble realizations that will be created from probabilities or percentiles when applying the EMOS coefficients. randomise (bool): Option to reorder the post-processed forecasts randomly. If not set, the ordering of the raw ensemble is used. This option is only valid when the input format is realizations. random_seed (int): Option to specify a value for the random seed for testing purposes, otherwise the default random seen behaviour is utilised. The random seed is used in the generation of the random numbers used for either the randomise option to order the input percentiles randomly, rather than use the ordering from the raw ensemble, or for splitting tied values within the raw ensemble, so that the values from the input percentiles can be ordered to match the raw ensemble. ignore_ecc_bounds (bool): If True, where the percentiles exceed the ECC bounds range, raises a warning rather than an exception. This occurs when the current forecasts is in the form of probabilities and is converted to percentiles, as part of converting the input probabilities into realizations. predictor (str): String to specify the form of the predictor used to calculate the location parameter when estimating the EMOS coefficients. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. Returns: iris.cube.Cube: The calibrated forecast cube. Raises: ValueError: If the current forecast is a coefficients cube. ValueError: If the coefficients cube does not have the right name of "emos_coefficients". ValueError: If the forecast type is 'percentiles' or 'probabilities' and the realizations_count argument is not provided. """ import warnings from improver.calibration.ensemble_calibration import ApplyEMOS if coefficients is None: msg = ("There are no coefficients provided for calibration. The " "uncalibrated forecast will be returned.") warnings.warn(msg) return cube if land_sea_mask and land_sea_mask.name() != "land_binary_mask": msg = "The land_sea_mask cube does not have the name 'land_binary_mask'" raise ValueError(msg) calibration_plugin = ApplyEMOS() result = calibration_plugin( cube, coefficients, land_sea_mask=land_sea_mask, realizations_count=realizations_count, ignore_ecc_bounds=ignore_ecc_bounds, predictor=predictor, randomise=randomise, random_seed=random_seed, ) return result
def test_error_realizations_count(self): """Test an error is raised if the realizations_count is not set""" msg = "The 'realizations_count' argument must be defined" with self.assertRaisesRegex(ValueError, msg): ApplyEMOS()(self.percentiles, self.coefficients)
def process( cube: cli.inputcube, coefficients: cli.inputcube = None, land_sea_mask: cli.inputcube = None, *, distribution, realizations_count: int = None, randomise=False, random_seed: int = None, ignore_ecc_bounds=False, predictor="mean", shape_parameters: cli.comma_separated_list = None, ): """Applying coefficients for Ensemble Model Output Statistics. Load in arguments for applying coefficients for Ensemble Model Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian Regression (NGR). The coefficients are applied to the forecast that is supplied, so as to calibrate the forecast. The calibrated forecast is written to a cube. If no coefficients are provided the input forecast is returned unchanged. Args: cube (iris.cube.Cube): A Cube containing the forecast to be calibrated. The input format could be either realizations, probabilities or percentiles. coefficients (iris.cube.Cube): A cube containing the coefficients used for calibration or None. If none then then input is returned unchanged. land_sea_mask (iris.cube.Cube): A cube containing the land-sea mask on the same domain as the forecast that is to be calibrated. Land points are " "specified by ones and sea points are specified by zeros. " "If not None this argument will enable land-only calibration, in " "which sea points are returned without the application of " "calibration." distribution (str): The distribution for constructing realizations, percentiles or probabilities. This should typically match the distribution used for minimising the Continuous Ranked Probability Score when estimating the EMOS coefficients. The distributions available are those supported by :data:`scipy.stats`. realizations_count (int): Option to specify the number of ensemble realizations that will be created from probabilities or percentiles for input into EMOS. randomise (bool): Option to reorder the post-processed forecasts randomly. If not set, the ordering of the raw ensemble is used. This option is only valid when the input format is realizations. random_seed (int): Option to specify a value for the random seed for testing purposes, otherwise the default random seen behaviour is utilised. The random seed is used in the generation of the random numbers used for either the randomise option to order the input percentiles randomly, rather than use the ordering from the raw ensemble, or for splitting tied values within the raw ensemble, so that the values from the input percentiles can be ordered to match the raw ensemble. ignore_ecc_bounds (bool): If True, where the percentiles exceed the ECC bounds range, raises a warning rather than an exception. This occurs when the current forecasts is in the form of probabilities and is converted to percentiles, as part of converting the input probabilities into realizations. predictor (str): String to specify the form of the predictor used to calculate the location parameter when estimating the EMOS coefficients. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. shape_parameters (float or str): The shape parameters required for defining the distribution specified by the distribution argument. The shape parameters should either be a number or 'inf' or '-inf' to represent infinity. Further details about appropriate shape parameters are available in scipy.stats. For the truncated normal distribution with a lower bound of zero, as available when estimating EMOS coefficients, the appropriate shape parameters are 0 and inf. Returns: iris.cube.Cube: The calibrated forecast cube. Raises: ValueError: If the current forecast is a coefficients cube. ValueError: If the coefficients cube does not have the right name of "emos_coefficients". ValueError: If the forecast type is 'percentiles' or 'probabilities' and the realizations_count argument is not provided. """ import warnings import numpy as np from improver.calibration.ensemble_calibration import ApplyEMOS if cube.name() in ["emos_coefficients", "land_binary_mask"]: msg = "Invalid forecast cube provided (name '{}')" raise ValueError(msg.format(cube.name())) if coefficients is None: msg = ("There are no coefficients provided for calibration. The " "uncalibrated forecast will be returned.") warnings.warn(msg) return cube if coefficients.name() != "emos_coefficients": msg = "Invalid coefficients cube provided (name '{}')" raise ValueError(msg.format(coefficients.name())) if land_sea_mask and land_sea_mask.name() != "land_binary_mask": msg = "The land_sea_mask cube does not have the " "name 'land_binary_mask'" raise ValueError(msg) if shape_parameters: shape_parameters = [np.float32(x) for x in shape_parameters] calibration_plugin = ApplyEMOS() result = calibration_plugin( cube, coefficients, land_sea_mask=land_sea_mask, realizations_count=realizations_count, ignore_ecc_bounds=ignore_ecc_bounds, predictor=predictor, distribution=distribution, shape_parameters=shape_parameters, randomise=randomise, random_seed=random_seed, ) return result