def test_if_nearly_identical_data(self): """ Test that the plugin returns the expected values, if every percentile has an identical value. This causes an issue because the default for the underlying scipy function is to yield a NaN for tied values. For this application, any NaN values are overwritten with the predicted mean value for all probability thresholds. """ data = np.array([ [[[1.0, 1.0, 1.0], [4.0, 2.0, 2.0], [3.0, 3.0, 3.0]]], [[[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]]], [[[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]]], ]) result_data = np.array([ [[[1.0, 1.0, 1.0], [1.18685838, 2.0, 2.0], [3.0, 3.0, 3.0]]], [[[1.0, 1.0, 1.0], [2.66666667, 2.0, 2.0], [3.0, 3.0, 3.0]]], [[[1.0, 1.0, 1.0], [4.14647495, 2.0, 2.0], [3.0, 3.0, 3.0]]], ]) cube = self.temperature_cube cube.data = data current_forecast_predictor = cube.collapsed("realization", iris.analysis.MEAN) current_forecast_variance = cube.collapsed("realization", iris.analysis.VARIANCE) plugin = Plugin() result = plugin._location_and_scale_parameters_to_percentiles( current_forecast_predictor, current_forecast_variance, cube, self.percentiles, ) self.assertArrayAlmostEqual(result.data, result_data)
def test_multiple_keyword_arguments_error(self): """ Test that the plugin raises an error when both the no_of_percentiles keyword argument and the percentiles keyword argument are provided. """ cube = self.current_temperature_forecast_cube current_forecast_predictor = cube.collapsed("realization", iris.analysis.MEAN) current_forecast_variance = cube.collapsed("realization", iris.analysis.VARIANCE) raw_forecast = cube.copy() no_of_percentiles = len(raw_forecast.coord("realization").points) percentiles = [10, 25, 50, 75, 90] plugin = Plugin() msg = "Please specify either the number of percentiles or" with self.assertRaisesRegex(ValueError, msg): plugin.process( current_forecast_predictor, current_forecast_variance, cube, no_of_percentiles=no_of_percentiles, percentiles=percentiles, )
def test_list_of_percentiles(self): """ Test that the plugin returns a cube with the expected percentiles when a specific list of percentiles is provided. """ cube = self.current_temperature_forecast_cube current_forecast_predictor = cube.collapsed("realization", iris.analysis.MEAN) current_forecast_variance = cube.collapsed("realization", iris.analysis.VARIANCE) percentiles = [10, 50, 90] expected = np.array([[[[225.56812, 236.81812, 248.06812], [259.3181, 270.5681, 281.8181], [293.0681, 304.3181, 315.5681]]], [[[229.48332, 240.73332, 251.98332], [263.2333, 274.4833, 285.7333], [296.9833, 308.2333, 319.4833]]], [[[233.39853, 244.64853, 255.89853], [267.1485, 278.3985, 289.6485], [300.8985, 312.1485, 323.3985]]]]) plugin = Plugin() result = plugin.process(current_forecast_predictor, current_forecast_variance, cube, percentiles=percentiles) self.assertEqual(len(percentiles), len(result.coord("percentile").points)) self.assertArrayAlmostEqual(percentiles, result.coord("percentile").points) self.assertArrayAlmostEqual(expected, result.data, decimal=4)
def test_if_identical_data(self): """ Test that the plugin returns the expected values, if every percentile has an identical value. This causes an issue because the default for the underlying scipy function is to yield a NaN for tied values. For this application, any NaN values are overwritten with the predicted mean value for all probability thresholds. """ data = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]]) # Repeat data in the realization dimension. data = np.repeat(data[np.newaxis, np.newaxis, :, :], 3, axis=0) result_data = np.array([[[[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]]], [[[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]]], [[[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]]]]) cube = self.temperature_cube cube.data = data current_forecast_predictor = cube.collapsed("realization", iris.analysis.MEAN) current_forecast_variance = cube.collapsed("realization", iris.analysis.VARIANCE) plugin = Plugin() result = plugin._location_and_scale_parameters_to_percentiles( current_forecast_predictor, current_forecast_variance, cube, self.percentiles) self.assertArrayAlmostEqual(result.data, result_data)
def test_number_of_percentiles(self): """ Test that the plugin returns a cube with the expected number of percentiles. """ cube = self.current_temperature_forecast_cube current_forecast_predictor = cube.collapsed("realization", iris.analysis.MEAN) current_forecast_variance = cube.collapsed("realization", iris.analysis.VARIANCE) raw_forecast = cube.copy() no_of_percentiles = len(raw_forecast.coord("realization").points) expected = np.array([[[[227.42273, 238.67273, 249.92273], [261.1727, 272.4227, 283.6727], [294.9227, 306.1727, 317.4227]]], [[[229.48332, 240.73332, 251.98332], [263.2333, 274.4833, 285.7333], [296.9833, 308.2333, 319.4833]]], [[[231.54391, 242.79391, 254.04391], [265.2939, 276.5439, 287.7939], [299.0439, 310.2939, 321.5439]]]]) plugin = Plugin() result = plugin.process(current_forecast_predictor, current_forecast_variance, cube, no_of_percentiles=no_of_percentiles) self.assertEqual(len(raw_forecast.coord("realization").points), len(result.coord("percentile").points)) self.assertArrayAlmostEqual(expected, result.data, decimal=4)
def test_simple_data(self): """ Test that the plugin returns the expected values for the generated percentiles when an idealised set of data values between 1 and 3 is used to create the mean (location parameter) and the variance (scale parameter). """ data = np.array([[[[1, 1, 1], [1, 1, 1], [1, 1, 1]]], [[[2, 2, 2], [2, 2, 2], [2, 2, 2]]], [[[3, 3, 3], [3, 3, 3], [3, 3, 3]]]]) result_data = np.array([[[[0.71844843, 0.71844843, 0.71844843], [0.71844843, 0.71844843, 0.71844843], [0.71844843, 0.71844843, 0.71844843]]], [[[2., 2., 2.], [2., 2., 2.], [2., 2., 2.]]], [[[3.28155157, 3.28155157, 3.28155157], [3.28155157, 3.28155157, 3.28155157], [3.28155157, 3.28155157, 3.28155157]]]]) cube = self.temperature_cube cube.data = data current_forecast_predictor = cube.collapsed("realization", iris.analysis.MEAN) current_forecast_variance = cube.collapsed("realization", iris.analysis.VARIANCE) plugin = Plugin() result = plugin._location_and_scale_parameters_to_percentiles( current_forecast_predictor, current_forecast_variance, cube, self.percentiles) self.assertArrayAlmostEqual(result.data, result_data)
def test_number_of_percentiles(self): """ Test that the plugin returns a cube with the expected number of percentiles. """ expected = np.array([ [ [227.42273, 238.67273, 249.92273], [261.1727, 272.4227, 283.6727], [294.9227, 306.1727, 317.4227], ], [ [229.48332, 240.73332, 251.98332], [263.2333, 274.4833, 285.7333], [296.9833, 308.2333, 319.4833], ], [ [231.54391, 242.79391, 254.04391], [265.2939, 276.5439, 287.7939], [299.0439, 310.2939, 321.5439], ], ]) result = Plugin().process( self.forecast_predictor, self.forecast_variance, self.cube, no_of_percentiles=self.no_of_percentiles, ) self.assertEqual(len(result.coord("percentile").points), self.no_of_percentiles) self.assertArrayAlmostEqual(expected, result.data, decimal=4)
def test_simple_data_truncnorm_distribution(self): """ Test that the plugin returns an iris.cube.Cube matching the expected data values when cubes containing the location parameter and scale parameter are passed in. In this test, the ensemble mean and standard deviation is used as a proxy for the location and scale parameter. The resulting data values are the percentiles, which have been generated using a truncated normal distribution. """ data = np.array([ [[1, 1, 1], [1, 1, 1], [1, 1, 1]], [[2, 2, 2], [2, 2, 2], [2, 2, 2]], [[3, 3, 3], [3, 3, 3], [3, 3, 3]], ]) self.temperature_cube.data = data expected_data = np.array([ [ [1.0121, 1.0121, 1.0121], [1.0121, 1.0121, 1.0121], [1.0121, 1.0121, 1.0121], ], [ [3.1677, 3.1677, 3.1677], [3.1677, 3.1677, 3.1677], [3.1677, 3.1677, 3.1677], ], [ [5.6412, 5.6412, 5.6412], [5.6412, 5.6412, 5.6412], [5.6412, 5.6412, 5.6412], ], ]) # Use an adjusted version of the ensemble mean as a proxy for the # location parameter for the truncated normal distribution. current_forecast_predictor = self.temperature_cube.collapsed( "realization", iris.analysis.MEAN) current_forecast_predictor.data = current_forecast_predictor.data + 1 # Use an adjusted version of the ensemble standard deviation as a proxy for the # scale parameter for the truncated normal distribution. current_forecast_stddev = self.temperature_cube.collapsed( "realization", iris.analysis.STD_DEV, ) current_forecast_stddev.data = current_forecast_stddev.data + 1 plugin = Plugin( distribution="truncnorm", shape_parameters=np.array([0, np.inf], dtype=np.float32), ) result = plugin._location_and_scale_parameters_to_percentiles( current_forecast_predictor, current_forecast_stddev, self.temperature_cube, self.percentiles, ) self.assertIsInstance(result, Cube) np.testing.assert_allclose(result.data, expected_data, rtol=1.0e-4)
def test_simple_data_truncnorm_distribution(self): """ Test that the plugin returns an iris.cube.Cube matching the expected data values when cubes containing the location parameter and scale parameter are passed in. In this test, the ensemble mean and variance is used as a proxy for the location and scale parameter. The resulting data values are the percentiles, which have been generated using a truncated normal distribution. """ data = np.array([ [[1, 1, 1], [1, 1, 1], [1, 1, 1]], [[2, 2, 2], [2, 2, 2], [2, 2, 2]], [[3, 3, 3], [3, 3, 3], [3, 3, 3]], ]) self.temperature_cube.data = data expected_data = np.array([ [ [1.3042759, 1.3042759, 1.3042759], [1.3042759, 1.3042759, 1.3042759], [1.3042759, 1.3042759, 1.3042759], ], [ [3.0300407, 3.0300407, 3.0300407], [3.0300407, 3.0300407, 3.0300407], [3.0300407, 3.0300407, 3.0300407], ], [ [4.8261294, 4.8261294, 4.8261294], [4.8261294, 4.8261294, 4.8261294], [4.8261294, 4.8261294, 4.8261294], ], ]) # Use an adjusted version of the ensemble mean as a proxy for the # location parameter for the truncated normal distribution. current_forecast_predictor = self.temperature_cube.collapsed( "realization", iris.analysis.MEAN) current_forecast_predictor.data = current_forecast_predictor.data + 1 # Use an adjusted version of the ensemble variance as a proxy for the # scale parameter for the truncated normal distribution. current_forecast_variance = self.temperature_cube.collapsed( "realization", iris.analysis.VARIANCE) current_forecast_variance.data = current_forecast_variance.data + 1 plugin = Plugin( distribution="truncnorm", shape_parameters=np.array([0, np.inf], dtype=np.float32), ) result = plugin._location_and_scale_parameters_to_percentiles( current_forecast_predictor, current_forecast_variance, self.temperature_cube, self.percentiles, ) self.assertIsInstance(result, Cube) self.assertArrayAlmostEqual(result.data, expected_data)
def test_many_percentiles(self): """ Test that the plugin returns an iris.cube.Cube if many percentiles are requested. """ cube = self.temperature_cube percentiles = np.linspace(1, 99, num=1000, endpoint=True) plugin = Plugin() result = plugin._location_and_scale_parameters_to_percentiles( self.location_parameter, self.scale_parameter, cube, percentiles) self.assertIsInstance(result, Cube)
def test_negative_percentiles(self): """ Test that the plugin returns the expected values for the percentiles if negative probabilities are requested. """ cube = self.temperature_cube percentiles = [-10, 10] plugin = Plugin() msg = "NaNs are present within the result for the" with self.assertRaisesRegex(ValueError, msg): plugin._location_and_scale_parameters_to_percentiles( self.location_parameter, self.scale_parameter, cube, percentiles)
def test_check_data(self): """ Test that the plugin returns an Iris.cube.Cube matching the expected data values when a cubes containing location and scale parameters are passed in, which are equivalent to the ensemble mean and ensemble variance. The resulting data values are the percentiles, which have been generated. """ plugin = Plugin() result = plugin._location_and_scale_parameters_to_percentiles( self.location_parameter, self.scale_parameter, self.temperature_cube, self.percentiles) self.assertIsInstance(result, Cube) np.testing.assert_allclose(result.data, self.data, rtol=1.e-4)
def test_masked_scale_parameter(self): """ Test that the plugin returns the correctly masked data when given a scale parameter that is masked. """ mask = np.array([[0, 0, 0], [0, 0, 0], [1, 0, 1]]) expected_mask = np.broadcast_to(mask, (3, 3, 3)) expected_data = np.ma.masked_array(self.data, mask=expected_mask) self.scale_parameter.data = np.ma.masked_array( self.scale_parameter.data, mask=mask) plugin = Plugin() result = plugin._location_and_scale_parameters_to_percentiles( self.location_parameter, self.scale_parameter, self.temperature_cube, self.percentiles) np.testing.assert_allclose(result.data, expected_data, rtol=1.e-4)
def test_basic(self): """Test that the plugin returns an Iris.cube.Cube.""" cube = self.current_temperature_forecast_cube current_forecast_predictor = cube.collapsed("realization", iris.analysis.MEAN) current_forecast_variance = cube.collapsed("realization", iris.analysis.VARIANCE) raw_forecast = cube.copy() no_of_percentiles = len(raw_forecast.coord("realization").points) plugin = Plugin() result = plugin.process(current_forecast_predictor, current_forecast_variance, cube, no_of_percentiles=no_of_percentiles) self.assertIsInstance(result, Cube)
def test_spot_forecasts_check_data(self): """ Test that the plugin returns an Iris.cube.Cube matching the expected data values when a cube containing mean (location parameter) and variance (scale parameter) is passed in. The resulting data values are the percentiles, which have been generated for a spot forecast. """ data = np.reshape(self.data, (3, 1, 9)) cube = self.temperature_spot_cube current_forecast_predictor = cube.collapsed("realization", iris.analysis.MEAN) current_forecast_variance = cube.collapsed("realization", iris.analysis.VARIANCE) plugin = Plugin() result = plugin._location_and_scale_parameters_to_percentiles( current_forecast_predictor, current_forecast_variance, cube, self.percentiles) self.assertIsInstance(result, Cube) self.assertArrayAlmostEqual(result.data, data)
def test_both_masked(self): """ Test that the plugin returns the correctly masked data when both the scale and location parameters are masked. """ mask1 = np.array([[0, 1, 0], [0, 0, 0], [0, 0, 0]]) mask2 = np.array([[0, 0, 0], [1, 0, 0], [0, 0, 0]]) expected_mask = np.broadcast_to(mask1 + mask2, (3, 3, 3)) expected_data = np.ma.masked_array(self.data, mask=expected_mask) self.location_parameter.data = np.ma.masked_array( self.location_parameter.data, mask=mask1) self.scale_parameter.data = np.ma.masked_array( self.scale_parameter.data, mask=mask2) plugin = Plugin() result = plugin._location_and_scale_parameters_to_percentiles( self.location_parameter, self.scale_parameter, self.temperature_cube, self.percentiles, ) np.testing.assert_allclose(result.data, expected_data, rtol=1.0e-4)
def test_list_of_percentiles(self): """ Test that the plugin returns a cube with the expected percentiles when a specific list of percentiles is provided. """ percentiles = [10, 50, 90] expected = np.array([ [ [225.56812, 236.81812, 248.06812], [259.3181, 270.5681, 281.8181], [293.0681, 304.3181, 315.5681], ], [ [229.48332, 240.73332, 251.98332], [263.2333, 274.4833, 285.7333], [296.9833, 308.2333, 319.4833], ], [ [233.39853, 244.64853, 255.89853], [267.1485, 278.3985, 289.6485], [300.8985, 312.1485, 323.3985], ], ]) result = Plugin().process( self.forecast_predictor, self.forecast_variance, self.cube, percentiles=percentiles, ) self.assertEqual(len(percentiles), len(result.coord("percentile").points)) self.assertArrayAlmostEqual(percentiles, result.coord("percentile").points) self.assertArrayAlmostEqual(expected, result.data, decimal=4)
def process(cube: cli.inputcube, coefficients: cli.inputcube = None, land_sea_mask: cli.inputcube = None, *, distribution, realizations_count: int = None, randomise=False, random_seed: int = None, ignore_ecc_bounds=False, predictor='mean', shape_parameters: cli.comma_separated_list = None): """Applying coefficients for Ensemble Model Output Statistics. Load in arguments for applying coefficients for Ensemble Model Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian Regression (NGR). The coefficients are applied to the forecast that is supplied, so as to calibrate the forecast. The calibrated forecast is written to a cube. If no coefficients are provided the input forecast is returned unchanged. Args: cube (iris.cube.Cube): A Cube containing the forecast to be calibrated. The input format could be either realizations, probabilities or percentiles. coefficients (iris.cube.Cube): A cube containing the coefficients used for calibration or None. If none then then input is returned unchanged. land_sea_mask (iris.cube.Cube): A cube containing the land-sea mask on the same domain as the forecast that is to be calibrated. Land points are " "specified by ones and sea points are specified by zeros. " "If not None this argument will enable land-only calibration, in " "which sea points are returned without the application of " "calibration." distribution (str): The distribution for constructing realizations, percentiles or probabilities. This should typically match the distribution used for minimising the Continuous Ranked Probability Score when estimating the EMOS coefficients. The distributions available are those supported by :data:`scipy.stats`. realizations_count (int): Option to specify the number of ensemble realizations that will be created from probabilities or percentiles for input into EMOS. randomise (bool): Option to reorder the post-processed forecasts randomly. If not set, the ordering of the raw ensemble is used. This option is only valid when the input format is realizations. random_seed (int): Option to specify a value for the random seed for testing purposes, otherwise the default random seen behaviour is utilised. The random seed is used in the generation of the random numbers used for either the randomise option to order the input percentiles randomly, rather than use the ordering from the raw ensemble, or for splitting tied values within the raw ensemble, so that the values from the input percentiles can be ordered to match the raw ensemble. ignore_ecc_bounds (bool): If True, where the percentiles exceed the ECC bounds range, raises a warning rather than an exception. This occurs when the current forecasts is in the form of probabilities and is converted to percentiles, as part of converting the input probabilities into realizations. predictor (str): String to specify the form of the predictor used to calculate the location parameter when estimating the EMOS coefficients. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as the predictors. shape_parameters (float or str): The shape parameters required for defining the distribution specified by the distribution argument. The shape parameters should either be a number or 'inf' or '-inf' to represent infinity. Further details about appropriate shape parameters are available in scipy.stats. For the truncated normal distribution with a lower bound of zero, as available when estimating EMOS coefficients, the appropriate shape parameters are 0 and inf. Returns: iris.cube.Cube: The calibrated forecast cube. Raises: ValueError: If the current forecast is a coefficients cube. ValueError: If the coefficients cube does not have the right name of "emos_coefficients". ValueError: If the forecast type is 'percentiles' or 'probabilities' and the realizations_count argument is not provided. """ import warnings import numpy as np from iris.exceptions import CoordinateNotFoundError from improver.calibration.ensemble_calibration import ( ApplyCoefficientsFromEnsembleCalibration) from improver.ensemble_copula_coupling.ensemble_copula_coupling import ( EnsembleReordering, ConvertLocationAndScaleParametersToPercentiles, ConvertLocationAndScaleParametersToProbabilities, ConvertProbabilitiesToPercentiles, RebadgePercentilesAsRealizations, ResamplePercentiles) from improver.calibration.utilities import merge_land_and_sea from improver.metadata.probabilistic import find_percentile_coordinate current_forecast = cube if current_forecast.name() in ['emos_coefficients', 'land_binary_mask']: msg = "The current forecast cube has the name {}" raise ValueError(msg.format(current_forecast.name())) if coefficients is None: msg = ("There are no coefficients provided for calibration. The " "uncalibrated forecast will be returned.") warnings.warn(msg) return current_forecast if coefficients.name() != 'emos_coefficients': msg = ("The current coefficients cube does not have the " "name 'emos_coefficients'") raise ValueError(msg) if land_sea_mask and land_sea_mask.name() != 'land_binary_mask': msg = ("The land_sea_mask cube does not have the " "name 'land_binary_mask'") raise ValueError(msg) original_current_forecast = current_forecast.copy() try: find_percentile_coordinate(current_forecast) input_forecast_type = "percentiles" except CoordinateNotFoundError: input_forecast_type = "realizations" if current_forecast.name().startswith("probability_of"): input_forecast_type = "probabilities" conversion_plugin = ConvertProbabilitiesToPercentiles( ecc_bounds_warning=ignore_ecc_bounds) elif input_forecast_type == "percentiles": # Initialise plugin to resample percentiles so that the percentiles are # evenly spaced. conversion_plugin = ResamplePercentiles( ecc_bounds_warning=ignore_ecc_bounds) if input_forecast_type in ["percentiles", "probabilities"]: if not realizations_count: raise ValueError( "The current forecast has been provided as {0}. " "These {0} need to be converted to realizations " "for ensemble calibration. The realizations_count " "argument is used to define the number of realizations " "to construct from the input {0}, so if the " "current forecast is provided as {0} then " "realizations_count must be defined.".format( input_forecast_type)) current_forecast = conversion_plugin.process( current_forecast, no_of_percentiles=realizations_count) current_forecast = ( RebadgePercentilesAsRealizations().process(current_forecast)) # Apply coefficients as part of Ensemble Model Output Statistics (EMOS). ac = ApplyCoefficientsFromEnsembleCalibration(predictor=predictor) location_parameter, scale_parameter = ac.process( current_forecast, coefficients, landsea_mask=land_sea_mask) if shape_parameters: shape_parameters = [np.float32(x) for x in shape_parameters] # Convert the output forecast type (i.e. realizations, percentiles, # probabilities) to match the input forecast type. if input_forecast_type == "probabilities": result = ConvertLocationAndScaleParametersToProbabilities( distribution=distribution, shape_parameters=shape_parameters).process( location_parameter, scale_parameter, original_current_forecast) elif input_forecast_type == "percentiles": perc_coord = find_percentile_coordinate(original_current_forecast) result = ConvertLocationAndScaleParametersToPercentiles( distribution=distribution, shape_parameters=shape_parameters).process( location_parameter, scale_parameter, original_current_forecast, percentiles=perc_coord.points) elif input_forecast_type == "realizations": # Ensemble Copula Coupling to generate realizations # from the location and scale parameter. no_of_percentiles = len(current_forecast.coord('realization').points) percentiles = ConvertLocationAndScaleParametersToPercentiles( distribution=distribution, shape_parameters=shape_parameters).process( location_parameter, scale_parameter, original_current_forecast, no_of_percentiles=no_of_percentiles) result = EnsembleReordering().process(percentiles, current_forecast, random_ordering=randomise, random_seed=random_seed) if land_sea_mask: # Fill in masked sea points with uncalibrated data. merge_land_and_sea(result, original_current_forecast) return result