def test_coefficient_values_for_truncnorm_distribution(self): """Ensure that the values for the optimised_coefficients match the expected values, and the coefficient names also match expected values for a truncated normal distribution. In this case, a linear least-squares regression is used to construct the initial guess.""" distribution = "truncnorm" plugin = Plugin(distribution) result = plugin.process( self.historic_wind_speed_forecast_cube, self.wind_speed_truth_cube ) self.assertEMOSCoefficientsAlmostEqual( np.array([cube.data for cube in result]), self.expected_mean_predictor_truncnorm, ) self.assertArrayEqual( [cube.name() for cube in result], self.expected_coeff_names ) for cube in result: self.assertArrayEqual( cube.attributes["shape_parameters"], np.array([0, np.inf], dtype=np.float32), )
def test_realizations_predictor_estimate_coefficients_masked_halo(self): """ Test that the plugin returns the expected values for the initial guess for the calibration coefficients, when the ensemble mean is used as the predictor. The coefficients are estimated using a linear model. In this case, the result of the linear regression is for an intercept of 0.333333 with different weights for the realizations because some of the realizations are closer to the truth, in this instance. In this case the original data has been surrounded by a halo of masked nans, which gives the same coefficients as the original data. """ predictor = "realizations" estimate_coefficients_from_linear_model_flag = True plugin = Plugin(self.distribution, self.desired_units) result = plugin.compute_initial_guess( self.truth_masked_halo, self.current_forecast_predictor_realizations_masked_halo, predictor, estimate_coefficients_from_linear_model_flag, no_of_realizations=self.no_of_realizations, ) self.assertArrayAlmostEqual( self.expected_realizations_predictor_with_linear_model, result )
def test_basic(self): """Ensure that the optimised_coefficients are returned as a cube, with the expected number of coefficients.""" plugin = Plugin(self.distribution) result = plugin.process( self.historic_temperature_forecast_cube, self.temperature_truth_cube ) self.assertIsInstance(result, iris.cube.CubeList) self.assertEqual(len(result), len(self.coeff_names))
def test_missing_cube(self): """Test that an exception is raised if either of the historic forecasts or truth were missing.""" self.historic_temperature_forecast_cube.convert_units("Fahrenheit") plugin = Plugin(self.distribution) msg = ".*cubes must be provided" with self.assertRaisesRegex(ValueError, msg): plugin.process(self.historic_temperature_forecast_cube, None)
def test_non_matching_units(self): """Test that an exception is raised if the historic forecasts and truth have non matching units.""" self.historic_temperature_forecast_cube.convert_units("Fahrenheit") plugin = Plugin(self.distribution) msg = "The historic forecast units" with self.assertRaisesRegex(ValueError, msg): plugin.process( self.historic_temperature_forecast_cube, self.temperature_truth_cube )
def test_historic_forecast_unit_conversion(self): """Ensure the expected optimised coefficients are generated, even if the input historic forecast cube has different units.""" self.historic_temperature_forecast_cube.convert_units("Fahrenheit") desired_units = "Kelvin" plugin = Plugin(self.distribution, desired_units=desired_units) result = plugin.process( self.historic_temperature_forecast_cube, self.temperature_truth_cube ) self.assertEMOSCoefficientsAlmostEqual( np.array([cube.data for cube in result]), self.expected_mean_predictor_norm, )
def test_too_few_coefficients(self): """Test that an exception is raised if the number of coefficients provided for creating the coefficients cube is not equal to the number of coefficient names.""" distribution = "truncnorm" desired_units = "Fahrenheit" predictor = "mean" optimised_coeffs = [1, 2, 3] plugin = Plugin( distribution=distribution, desired_units=desired_units, predictor=predictor, ) msg = "The number of coefficients in" with self.assertRaisesRegex(ValueError, msg): plugin.create_coefficients_cubelist( optimised_coeffs, self.historic_forecast )
def test_coefficient_values_for_norm_distribution(self): """Ensure that the values for the optimised_coefficients match the expected values, and the coefficient names also match expected values for a normal distribution. In this case, a linear least-squares regression is used to construct the initial guess.""" plugin = Plugin(self.distribution) result = plugin.process( self.historic_temperature_forecast_cube, self.temperature_truth_cube ) self.assertEMOSCoefficientsAlmostEqual( np.array([cube.data for cube in result]), self.expected_mean_predictor_norm, ) self.assertArrayEqual( [cube.name() for cube in result], self.expected_coeff_names )
def test_attributes_for_truncnorm(self): """Test that the expected attributes are created for a truncated normal distribution.""" distribution = "truncnorm" self.attributes["distribution"] = distribution self.attributes["shape_parameters"] = np.array([0, np.inf], dtype=np.float32) plugin = Plugin( distribution=distribution, desired_units=self.desired_units, predictor=self.predictor, ) result = plugin.create_coefficients_cubelist(self.optimised_coeffs, self.historic_forecast) for cube in result: self.assertDictEqual(cube.attributes, self.attributes)
def test_coefficient_values_for_norm_distribution_max_iterations(self): """Ensure that the values for the optimised_coefficients match the expected values, and the coefficient names also match expected values for a normal distribution, when the max_iterations argument is specified.""" max_iterations = 800 plugin = Plugin(self.distribution, max_iterations=max_iterations) result = plugin.process( self.historic_temperature_forecast_cube, self.temperature_truth_cube ) self.assertEMOSCoefficientsAlmostEqual( np.array([cube.data for cube in result]), self.expected_mean_predictor_norm, ) self.assertArrayEqual( [cube.name() for cube in result], self.expected_coeff_names )
def test_basic_mean_predictor(self): """ Test that the plugin returns the expected values for the initial guess for the calibration coefficients, when the ensemble mean is used as the predictor. As coefficients are not estimated using a linear model, the default values for the initial guess are used. """ estimate_coefficients_from_linear_model_flag = False plugin = Plugin(self.distribution, self.desired_units) result = plugin.compute_initial_guess( self.truth, self.current_forecast_predictor_mean, self.predictor, estimate_coefficients_from_linear_model_flag, ) self.assertIsInstance(result, np.ndarray) self.assertArrayAlmostEqual( result, self.expected_mean_predictor_no_linear_model)
def setUp(self): """Set up additional cube for land-sea mask.""" super().setUp() mask_data = np.array([[0, 1, 0], [0, 1, 1], [1, 1, 0]], dtype=np.int32) self.mask_cube = set_up_variable_cube(mask_data, name="land_binary_mask", units="1") self.plugin = Plugin("norm", "20171110T0000Z") # Copy a few slices of the temperature truth cube to test on. self.cube3D = self.temperature_truth_cube[0:2, ...].copy()
def test_coefficients_norm_realizations_no_statsmodels(self): """Ensure that the values for the optimised_coefficients match the expected values, and the coefficient names also match expected values for a normal distribution where the realizations are used as the predictor. """ predictor = "realizations" plugin = Plugin(self.distribution, predictor=predictor) result = plugin.process( self.historic_temperature_forecast_cube, self.temperature_truth_cube ) self.assertEMOSCoefficientsAlmostEqual( np.concatenate([np.atleast_1d(cube.data) for cube in result]), self.expected_realizations_norm_no_statsmodels, ) self.assertArrayEqual( [cube.name() for cube in result], self.expected_coeff_names )
def setUp(self): """Set up the plugin and cubes for testing.""" super().setUp() frt_dt = datetime.datetime(2017, 11, 10, 0, 0) time_dt = datetime.datetime(2017, 11, 10, 4, 0) data = np.ones((3, 3), dtype=np.float32) self.historic_forecast = _create_historic_forecasts( data, time_dt, frt_dt, ).merge_cube() data_with_realizations = np.ones((3, 3, 3), dtype=np.float32) self.historic_forecast_with_realizations = _create_historic_forecasts( data_with_realizations, time_dt, frt_dt, realizations=[0, 1, 2], ).merge_cube() self.optimised_coeffs = np.array([0, 1, 2, 3], np.int32) self.distribution = "norm" self.desired_units = "degreesC" self.predictor = "mean" self.plugin = Plugin( distribution=self.distribution, desired_units=self.desired_units, predictor=self.predictor, ) self.expected_frt = ( self.historic_forecast.coord("forecast_reference_time").cell(-1).point ) self.expected_x_coord_points = np.median( self.historic_forecast.coord(axis="x").points ) self.historic_forecast.coord(axis="x").guess_bounds() self.expected_x_coord_bounds = np.array( [ [ np.min(self.historic_forecast.coord(axis="x").bounds), np.max(self.historic_forecast.coord(axis="x").bounds), ] ] ) self.expected_y_coord_points = np.median( self.historic_forecast.coord(axis="y").points ) self.historic_forecast.coord(axis="y").guess_bounds() self.expected_y_coord_bounds = np.array( [ [ np.min(self.historic_forecast.coord(axis="y").bounds), np.max(self.historic_forecast.coord(axis="y").bounds), ] ] ) self.attributes = generate_mandatory_attributes([self.historic_forecast]) self.attributes["diagnostic_standard_name"] = self.historic_forecast.name() self.attributes["distribution"] = self.distribution self.attributes["title"] = "Ensemble Model Output Statistics coefficients"
def test_coefficient_values_for_norm_distribution_mismatching_inputs(self): """Test that the values for the optimised coefficients match the expected values, and the coefficient names also match expected values for a normal distribution for when the historic forecasts and truths input having some mismatches in validity time. """ expected = [23.4593, 0.9128, 0.0041, 0.4885] partial_historic_forecasts = ( self.historic_forecasts[:2] + self.historic_forecasts[3:] ).merge_cube() partial_truth = self.truth[1:].merge_cube() plugin = Plugin(self.distribution) result = plugin.process(partial_historic_forecasts, partial_truth) self.assertEMOSCoefficientsAlmostEqual( np.array([cube.data for cube in result]), expected ) self.assertArrayEqual( [cube.name() for cube in result], self.expected_coeff_names )
def test_mean_predictor_estimate_coefficients(self): """ Test that the plugin returns the expected values for the initial guess for the calibration coefficients, when the ensemble mean is used as the predictor. The coefficients are estimated using a linear model, where there is an offset of one between the truth and the forecast during the training period. Therefore, in this case the result of the linear regression is a gradient of 1 and an intercept of 1. """ estimate_coefficients_from_linear_model_flag = True plugin = Plugin(self.distribution, self.desired_units) result = plugin.compute_initial_guess( self.truth, self.current_forecast_predictor_mean, self.predictor, estimate_coefficients_from_linear_model_flag, ) self.assertArrayAlmostEqual( self.expected_mean_predictor_with_linear_model, result)
def test_coefficients_from_realizations(self): """Test that the expected coefficient cube is returned when the ensemble realizations are used as the predictor.""" predictor = "realizations" optimised_coeffs = [0, 1, 2, 3, 4, 5] plugin = Plugin( distribution=self.distribution, desired_units=self.desired_units, predictor=predictor, ) result = plugin.create_coefficients_cubelist( optimised_coeffs, self.historic_forecast_with_realizations) self.assertEqual([cube.name() for cube in result], self.expected_coeff_names) self.assertArrayEqual( result.extract("emos_coefficient_beta", strict=True).coord("realization").points, self.historic_forecast_with_realizations.coord( "realization").points, )
def test_coefficient_values_for_norm_distribution_landsea_mask(self): """Ensure that the values for the optimised_coefficients match the expected values, and the coefficient names also match expected values for a normal distribution. In this case, a linear least-squares regression is used to construct the initial guess. The original data is surrounded by a halo that is masked out by the landsea_mask, giving the same results as the original data. """ plugin = Plugin(self.distribution) result = plugin.process( self.historic_temperature_forecast_cube_halo, self.temperature_truth_cube_halo, landsea_mask=self.landsea_cube, ) self.assertEMOSCoefficientsAlmostEqual( np.array([cube.data for cube in result]), self.expected_mean_predictor_norm, ) self.assertArrayEqual( [cube.name() for cube in result], self.expected_coeff_names )
def test_coefficients_norm_distribution_default_initial_guess(self): """Ensure that the values for the optimised_coefficients match the expected values, and the coefficient names also match expected values for a normal distribution, where the default values for the initial guess are used, rather than using a linear least-squares regression to construct an initial guess. Reducing the value for the tolerance would result in the coefficients more closely matching the coefficients created when using a linear least-squares regression to construct the initial guess.""" expected = [-0.0001, 0.9974, 0.0001, 1.0374] plugin = Plugin(self.distribution) plugin.ESTIMATE_COEFFICIENTS_FROM_LINEAR_MODEL_FLAG = False result = plugin.process( self.historic_temperature_forecast_cube, self.temperature_truth_cube ) self.assertEMOSCoefficientsAlmostEqual( np.array([cube.data for cube in result]), expected ) self.assertArrayEqual( [cube.name() for cube in result], self.expected_coeff_names )
def setUp(self): """Set up coefficients cubes for when either the ensemble mean or the ensemble realizations have been used as the predictor. The coefficients have been constructed from the same underlying set of ensemble realizations, so application of these coefficients would be expected to give similar results. The values for the coefficients used to construct the coefficients cubes are taken from the SetupExpectedCoefficients class. These coefficients are the expected outputs from the tests to estimate the coefficients.""" super().setUp() # Set up a coefficients cube when using the ensemble mean as the # predictor. estimator = EstimateCoefficientsForEnsembleCalibration( "norm", desired_units="Celsius") self.coeffs_from_mean = estimator.create_coefficients_cubelist( self.expected_mean_predictor_norm, self.current_temperature_forecast_cube, ) # Set up a coefficients cube when using the ensemble realization as the # predictor and the coefficients have been estimated using statsmodels. estimator = EstimateCoefficientsForEnsembleCalibration( "norm", desired_units="Celsius", predictor="realizations") self.coeffs_from_statsmodels_realizations = estimator.create_coefficients_cubelist( self.expected_realizations_norm_statsmodels, self.current_temperature_forecast_cube, ) # Set up a coefficients cube when using the ensemble realization as the # predictor and the coefficients have been estimated without using # statsmodels. estimator = EstimateCoefficientsForEnsembleCalibration( "norm", desired_units="Celsius", predictor="realizations") self.coeffs_from_no_statsmodels_realizations = estimator.create_coefficients_cubelist( self.expected_realizations_norm_no_statsmodels, self.current_temperature_forecast_cube, ) # Some expected data that are used in various tests. self.expected_loc_param_mean = np.array( [ [273.7854, 274.6913, 275.4461], [276.8652, 277.6502, 278.405], [279.492, 280.1562, 280.9715], ], dtype=np.float32, ) self.expected_scale_param_mean = np.array( [ [0.1952, 0.1974, 0.0117], [0.0226, 0.0197, 0.0117], [0.0532, 0.0029, 0.0007], ], dtype=np.float32, ) self.expected_loc_param_statsmodels_realizations = np.array( [ [274.1395, 275.0975, 275.258], [276.9771, 277.3487, 278.3144], [280.0085, 280.2506, 281.1632], ], dtype=np.float32, ) self.expected_loc_param_no_statsmodels_realizations = np.array( [ [273.4695, 274.4673, 275.3034], [276.8648, 277.733, 278.5632], [279.7562, 280.4913, 281.3889], ], dtype=np.float32, ) # Create output cubes with the expected data. self.expected_loc_param_mean_cube = set_up_variable_cube( self.expected_loc_param_mean, name="location_parameter", units="K", attributes=MANDATORY_ATTRIBUTE_DEFAULTS, ) self.expected_scale_param_mean_cube = set_up_variable_cube( self.expected_scale_param_mean, name="scale_parameter", units="Kelvin^2", attributes=MANDATORY_ATTRIBUTE_DEFAULTS, )
def setUp(self): """Set up coefficients cubes for when either the ensemble mean or the ensemble realizations have been used as the predictor. The coefficients have been constructed from the same underlying set of ensemble realizations, so application of these coefficients would be expected to give similar results. The values for the coefficients used to construct the coefficients cubes are taken from the SetupExpectedCoefficients class. These coefficients are the expected outputs from the tests to estimate the coefficients.""" super().setUp() # Set up a coefficients cube when using the ensemble mean as the # predictor. estimator = EstimateCoefficientsForEnsembleCalibration( "norm", desired_units="Celsius" ) self.coeffs_from_mean = estimator.create_coefficients_cubelist( self.expected_mean_pred_norm, self.historic_temperature_forecast_cube, CubeList([self.historic_temperature_forecast_cube]), ) # Set up a timeshifted coefficients cube using the ensemble mean as a # predictor. forecast_timeshift_cube = self.historic_temperature_forecast_cube.copy() for coord_name in ["time", "forecast_period"]: forecast_timeshift_cube.coord(coord_name).points = [ _ + 3600 for _ in forecast_timeshift_cube.coord(coord_name).points ] self.coeffs_from_mean_timeshift = estimator.create_coefficients_cubelist( self.expected_mean_pred_norm, forecast_timeshift_cube, CubeList([forecast_timeshift_cube]), ) # Set up a coefficients cube when using the ensemble mean as the # predictor and separate coefficients at each point. estimator = EstimateCoefficientsForEnsembleCalibration( "norm", point_by_point=True, desired_units="Celsius" ) point_by_point_predictor = np.stack( [self.expected_mean_pred_norm] * 9 ).T.reshape(4, 3, 3) self.coeffs_from_mean_point_by_point = estimator.create_coefficients_cubelist( point_by_point_predictor, self.historic_temperature_forecast_cube, CubeList([self.historic_temperature_forecast_cube]), ) # Set up a coefficients cube when using the ensemble realization as the # predictor. estimator = EstimateCoefficientsForEnsembleCalibration( "norm", desired_units="Celsius", predictor="realizations" ) self.coeffs_from_realizations = estimator.create_coefficients_cubelist( self.expected_realizations_norm, self.historic_temperature_forecast_cube, CubeList([self.historic_temperature_forecast_cube]), ) # Set up a coefficients cube when using the ensemble realization as the # predictor and separate coefficients at each point. expected_realizations_each_site = [ array if array.ndim == 1 else np.squeeze(array) for array in list(self.expected_realizations_each_site.values()) ] estimator = EstimateCoefficientsForEnsembleCalibration( "norm", predictor="realizations", point_by_point=True ) self.coeffs_from_realizations_sites = estimator.create_coefficients_cubelist( expected_realizations_each_site, self.historic_forecast_spot_cube, CubeList([self.historic_temperature_forecast_cube]), ) # # Set up a coefficients cube when using an additional predictor. self.altitude = set_up_variable_cube( np.ones((3, 3), dtype=np.float32), name="surface_altitude", units="m" ) for coord in ["time", "forecast_reference_time", "forecast_period"]: self.altitude.remove_coord(coord) estimator = EstimateCoefficientsForEnsembleCalibration( "norm", desired_units="Celsius" ) self.coeffs_from_mean_alt = estimator.create_coefficients_cubelist( self.expected_mean_pred_norm_alt, self.historic_temperature_forecast_cube, CubeList([self.historic_temperature_forecast_cube, self.altitude]), ) # Some expected data that are used in various tests. self.expected_loc_param_mean = np.array( [ [273.7014, 274.6534, 275.4469], [276.9385, 277.7636, 278.5570], [279.6996, 280.1122, 281.2547], ], dtype=np.float32, ) self.expected_scale_param_mean = np.array( [ [0.4813, 0.4840, 0.1295], [0.1647, 0.1538, 0.1295], [0.2517, 0.3393, 0.1076], ], dtype=np.float32, ) self.expected_loc_param_realizations = np.array( [ [274.388, 275.3053, 275.4492], [277.1295, 277.3866, 278.4672], [280.2007, 280.3929, 281.2602], ], dtype=np.float32, ) self.expected_loc_param_realizations_sites = np.array( [277.7531, 277.4529, 277.553, 277.2528], dtype=np.float32, ) self.expected_scale_param_realizations_sites = np.array( [0.0005, 0.0005, 0.0005, 0.0005], dtype=np.float32 ) self.expected_loc_param_mean_alt = np.array( [ [275.18134, 276.18134, 277.01465], [278.58133, 279.44797, 280.2813], [281.48132, 281.91464, 283.11465], ], dtype=np.float32, ) self.expected_scale_param_mean_alt = np.array( [ [0.6593, 0.663, 0.1756], [0.2242, 0.2093, 0.1756], [0.3441, 0.4645, 0.1452], ], dtype=np.float32, ) # Create output cubes with the expected data. self.expected_loc_param_mean_cube = set_up_variable_cube( self.expected_loc_param_mean, name="location_parameter", units="K", attributes=MANDATORY_ATTRIBUTE_DEFAULTS, ) self.expected_scale_param_mean_cube = set_up_variable_cube( self.expected_scale_param_mean, name="scale_parameter", units="K", attributes=MANDATORY_ATTRIBUTE_DEFAULTS, )
class Test_mask_cube(SetupCubes): """Test the mask_cube method""" @ManageWarnings(ignored_messages=IGNORED_MESSAGES, warning_types=WARNING_TYPES) def setUp(self): """Set up additional cube for land-sea mask.""" super().setUp() mask_data = np.array([[0, 1, 0], [0, 1, 1], [1, 1, 0]], dtype=np.int32) self.mask_cube = set_up_variable_cube(mask_data, name="land_binary_mask", units="1") self.plugin = Plugin("norm", "20171110T0000Z") # Copy a few slices of the temperature truth cube to test on. self.cube3D = self.temperature_truth_cube[0:2, ...].copy() @ManageWarnings(ignored_messages=IGNORED_MESSAGES, warning_types=WARNING_TYPES) def test_basic(self): """Test that a simple cube is masked in the correct way.""" expected_result = np.array( [ [ [np.nan, 273.15, np.nan], [np.nan, 275.75, 276.55], [278.05, 278.35, np.nan], ], [ [np.nan, 273.15, np.nan], [np.nan, 275.75, 276.55], [278.05, 278.35, np.nan], ], ], dtype=np.float32, ) expected_result = np.ma.masked_invalid(expected_result) self.plugin.mask_cube(self.cube3D, self.mask_cube) self.assertArrayAlmostEqual(expected_result.data, self.cube3D.data.data) self.assertArrayEqual(np.ma.getmask(expected_result), np.ma.getmask(self.cube3D.data)) @ManageWarnings(ignored_messages=IGNORED_MESSAGES, warning_types=WARNING_TYPES) def test_basic_2D_input_cube(self): """Test that a simple 2D cube is masked in the correct way.""" cube2D = self.cube3D[0].copy() expected_result = np.array( [ [np.nan, 273.15, np.nan], [np.nan, 275.75, 276.55], [278.05, 278.35, np.nan], ], dtype=np.float32, ) expected_result = np.ma.masked_invalid(expected_result) self.plugin.mask_cube(cube2D, self.mask_cube) self.assertArrayAlmostEqual(expected_result.data, cube2D.data.data) self.assertArrayEqual(np.ma.getmask(expected_result), np.ma.getmask(cube2D.data)) @ManageWarnings(ignored_messages=IGNORED_MESSAGES, warning_types=WARNING_TYPES) def test_fail_mismatched_arrays(self): """Test that an error is raised when input have incompatible shapes.""" cube_mismatched = self.cube3D[..., 0].copy() msg = "Cube and landsea_mask shapes are not compatible." with self.assertRaisesRegex(IndexError, msg): self.plugin.mask_cube(cube_mismatched, self.mask_cube) @ManageWarnings(ignored_messages=IGNORED_MESSAGES, warning_types=WARNING_TYPES) def test_fail_transposed_input(self): """Test that an error is raised when the input cube is transposed""" self.cube3D.transpose((2, 0, 1)) msg = "Cube and landsea_mask shapes are not compatible." with self.assertRaisesRegex(IndexError, msg): self.plugin.mask_cube(self.cube3D, self.mask_cube)
class Test_create_coefficients_cubelist(SetupExpectedCoefficients): """Test the create_coefficients_cubelist method.""" @ManageWarnings(ignored_messages=IGNORED_MESSAGES, warning_types=WARNING_TYPES) def setUp(self): """Set up the plugin and cubes for testing.""" super().setUp() frt_dt = datetime.datetime(2017, 11, 10, 0, 0) time_dt = datetime.datetime(2017, 11, 10, 4, 0) data = np.ones((3, 3), dtype=np.float32) self.historic_forecast = _create_historic_forecasts( data, time_dt, frt_dt, ).merge_cube() data_with_realizations = np.ones((3, 3, 3), dtype=np.float32) self.historic_forecast_with_realizations = _create_historic_forecasts( data_with_realizations, time_dt, frt_dt, realizations=[0, 1, 2], ).merge_cube() self.optimised_coeffs = np.array([0, 1, 2, 3], np.int32) self.distribution = "norm" self.desired_units = "degreesC" self.predictor = "mean" self.plugin = Plugin( distribution=self.distribution, desired_units=self.desired_units, predictor=self.predictor, ) self.expected_frt = (self.historic_forecast.coord( "forecast_reference_time").cell(-1).point) self.expected_fp = self.historic_forecast.coord("forecast_period") self.expected_x_coord_points = np.median( self.historic_forecast.coord(axis="x").points) self.expected_x_coord_bounds = np.array([[ np.min(self.historic_forecast.coord(axis="x").bounds), np.max(self.historic_forecast.coord(axis="x").bounds), ]]) self.expected_y_coord_points = np.median( self.historic_forecast.coord(axis="y").points) self.expected_y_coord_bounds = np.array([[ np.min(self.historic_forecast.coord(axis="y").bounds), np.max(self.historic_forecast.coord(axis="y").bounds), ]]) self.attributes = generate_mandatory_attributes( [self.historic_forecast]) self.attributes[ "diagnostic_standard_name"] = self.historic_forecast.name() self.attributes["distribution"] = self.distribution self.attributes[ "title"] = "Ensemble Model Output Statistics coefficients" @ManageWarnings(ignored_messages=IGNORED_MESSAGES, warning_types=WARNING_TYPES) def test_coefficients_from_mean(self): """Test that the expected coefficient cube is returned when the ensemble mean is used as the predictor.""" result = self.plugin.create_coefficients_cubelist( self.optimised_coeffs, self.historic_forecast) self.assertEqual(len(result), 4) for cube in result: self.assertEqual( cube.coord("forecast_reference_time").cell(0).point, self.expected_frt, ) self.assertEqual( cube.coord("forecast_period"), self.expected_fp, ) self.assertArrayAlmostEqual( cube.coord(axis="x").points, self.expected_x_coord_points) self.assertArrayAlmostEqual( cube.coord(axis="x").bounds, self.expected_x_coord_bounds) self.assertArrayAlmostEqual( cube.coord(axis="y").points, self.expected_y_coord_points) self.assertArrayAlmostEqual( cube.coord(axis="y").bounds, self.expected_y_coord_bounds) self.assertDictEqual(cube.attributes, self.attributes) self.assertEqual([cube.name() for cube in result], self.expected_coeff_names) @ManageWarnings(ignored_messages=IGNORED_MESSAGES, warning_types=WARNING_TYPES) def test_coordinates_from_mean_period_diagnostic(self): """Test that the time coordinates are as expected when the historic forecasts are time-bounded diagnostics, e.g. maximum in hour.""" fp_bounds = [10800, 14400] self.historic_forecast.coord("forecast_period").bounds = fp_bounds self.expected_fp.bounds = fp_bounds result = self.plugin.create_coefficients_cubelist( self.optimised_coeffs, self.historic_forecast) self.assertEqual(len(result), 4) for cube in result: self.assertEqual( cube.coord("forecast_reference_time").cell(0).point, self.expected_frt, ) self.assertEqual( cube.coord("forecast_period"), self.expected_fp, ) @ManageWarnings(ignored_messages=IGNORED_MESSAGES, warning_types=WARNING_TYPES) def test_exception_for_multi_valued_forecast_period(self): """Test that an exception is raised if the forecast_period is multi- valued. This is simply to demonstrate that only single valued forecast periods are expected.""" fps = [0, 3600, 7200, 10800, 14400] fp_coord = self.historic_forecast.coord("forecast_period").copy(fps) self.historic_forecast.remove_coord("forecast_period") self.historic_forecast.add_aux_coord(fp_coord, 0) msg = "The forecast period must be the same" with self.assertRaisesRegex(ValueError, msg): self.plugin.create_coefficients_cubelist(self.optimised_coeffs, self.historic_forecast) def test_attributes_for_truncnorm(self): """Test that the expected attributes are created for a truncated normal distribution.""" distribution = "truncnorm" self.attributes["distribution"] = distribution self.attributes["shape_parameters"] = np.array([0, np.inf], dtype=np.float32) plugin = Plugin( distribution=distribution, desired_units=self.desired_units, predictor=self.predictor, ) result = plugin.create_coefficients_cubelist(self.optimised_coeffs, self.historic_forecast) for cube in result: self.assertDictEqual(cube.attributes, self.attributes) @ManageWarnings(ignored_messages=IGNORED_MESSAGES, warning_types=WARNING_TYPES) def test_coefficients_from_realizations(self): """Test that the expected coefficient cube is returned when the ensemble realizations are used as the predictor.""" predictor = "realizations" optimised_coeffs = [0, 1, 2, 3, 4, 5] plugin = Plugin( distribution=self.distribution, desired_units=self.desired_units, predictor=predictor, ) result = plugin.create_coefficients_cubelist( optimised_coeffs, self.historic_forecast_with_realizations) self.assertEqual([cube.name() for cube in result], self.expected_coeff_names) self.assertArrayEqual( result.extract("emos_coefficient_beta", strict=True).coord("realization").points, self.historic_forecast_with_realizations.coord( "realization").points, ) @ManageWarnings(ignored_messages=IGNORED_MESSAGES, warning_types=WARNING_TYPES) def test_too_few_coefficients(self): """Test that an exception is raised if the number of coefficients provided for creating the coefficients cube is not equal to the number of coefficient names.""" distribution = "truncnorm" desired_units = "Fahrenheit" predictor = "mean" optimised_coeffs = [1, 2, 3] plugin = Plugin( distribution=distribution, desired_units=desired_units, predictor=predictor, ) msg = "The number of coefficients in" with self.assertRaisesRegex(ValueError, msg): plugin.create_coefficients_cubelist(optimised_coeffs, self.historic_forecast)
def process( forecast: cli.inputpath, truth: cli.inputpath, additional_predictors: cli.inputcubelist = None, *, diagnostic, cycletime, forecast_period, training_length, distribution, point_by_point=False, use_default_initial_guess=False, units=None, predictor="mean", tolerance: float = 0.02, max_iterations: int = 1000, percentiles: cli.comma_separated_list = None, experiment: str = None, ): """Estimate coefficients for Ensemble Model Output Statistics. Loads in arguments for estimating coefficients for Ensemble Model Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian Regression (NGR). Two sources of input data must be provided: historical forecasts and historical truth data (to use in calibration). The estimated coefficients are output as a cube. Args: forecast (pathlib.Path): The path to a Parquet file containing the historical forecasts to be used for calibration.The expected columns within the Parquet file are: forecast, blend_time, forecast_period, forecast_reference_time, time, wmo_id, percentile, diagnostic, latitude, longitude, period, height, cf_name, units. truth (pathlib.Path): The path to a Parquet file containing the truths to be used for calibration. The expected columns within the Parquet file are: ob_value, time, wmo_id, diagnostic, latitude, longitude and altitude. additional_predictors (iris.cube.Cube): A cube for a static additional predictor to be used, in addition to the forecast, when estimating the EMOS coefficients. diagnostic (str): The name of the diagnostic to be calibrated within the forecast and truth tables. This name is used to filter the Parquet file when reading from disk. cycletime (str): Cycletime of a format similar to 20170109T0000Z. forecast_period (int): Forecast period to be calibrated in seconds. training_length (int): Number of days within the training period. distribution (str): The distribution that will be used for minimising the Continuous Ranked Probability Score when estimating the EMOS coefficients. This will be dependent upon the input phenomenon. point_by_point (bool): If True, coefficients are calculated independently for each point within the input cube by creating an initial guess and minimising each grid point independently. If False, a single set of coefficients is calculated using all points. Warning: This option is memory intensive and is unsuitable for gridded input. Using a default initial guess may reduce the memory overhead option. use_default_initial_guess (bool): If True, use the default initial guess. The default initial guess assumes no adjustments are required to the initial choice of predictor to generate the calibrated distribution. This means coefficients of 1 for the multiplicative coefficients and 0 for the additive coefficients. If False, the initial guess is computed. units (str): The units that calibration should be undertaken in. The historical forecast and truth will be converted as required. predictor (str): String to specify the form of the predictor used to calculate the location parameter when estimating the EMOS coefficients. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as options. tolerance (float): The tolerance for the Continuous Ranked Probability Score (CRPS) calculated by the minimisation. Once multiple iterations result in a CRPS equal to the same value within the specified tolerance, the minimisation will terminate. max_iterations (int): The maximum number of iterations allowed until the minimisation has converged to a stable solution. If the maximum number of iterations is reached but the minimisation has not yet converged to a stable solution, then the available solution is used anyway, and a warning is raised. If the predictor is "realizations", then the number of iterations may require increasing, as there will be more coefficients to solve. percentiles (List[float]): The set of percentiles to be used for estimating EMOS coefficients. These should be a set of equally spaced quantiles. experiment (str): A value within the experiment column to select from the forecast table. Returns: iris.cube.CubeList: CubeList containing the coefficients estimated using EMOS. Each coefficient is stored in a separate cube. """ import iris import pandas as pd from iris.cube import CubeList from improver.calibration.dataframe_utilities import ( forecast_and_truth_dataframes_to_cubes, ) from improver.calibration.ensemble_calibration import ( EstimateCoefficientsForEnsembleCalibration, ) # Load forecasts from parquet file filtering by diagnostic and blend_time. forecast_period_td = pd.Timedelta(int(forecast_period), unit="seconds") # tz_localize(None) is used to facilitate filtering, although the dataframe # is expected to be timezone aware upon load. cycletimes = pd.date_range( end=pd.Timestamp(cycletime) - pd.Timedelta(1, unit="days") - forecast_period_td.floor("D"), periods=int(training_length), freq="D", ).tz_localize(None) filters = [[("diagnostic", "==", diagnostic), ("blend_time", "in", cycletimes)]] forecast_df = pd.read_parquet(forecast, filters=filters) # Load truths from parquet file filtering by diagnostic. filters = [[("diagnostic", "==", diagnostic)]] truth_df = pd.read_parquet(truth, filters=filters) if truth_df.empty: msg = (f"The requested filepath {truth} does not contain the " f"requested contents: {filters}") raise IOError(msg) forecast_cube, truth_cube = forecast_and_truth_dataframes_to_cubes( forecast_df, truth_df, cycletime, forecast_period, training_length, percentiles=percentiles, experiment=experiment, ) if not forecast_cube or not truth_cube: return # Extract WMO IDs from the additional predictors. if additional_predictors: constr = iris.Constraint(wmo_id=truth_cube.coord("wmo_id").points) additional_predictors = CubeList( [ap.extract(constr) for ap in additional_predictors]) plugin = EstimateCoefficientsForEnsembleCalibration( distribution, point_by_point=point_by_point, use_default_initial_guess=use_default_initial_guess, desired_units=units, predictor=predictor, tolerance=tolerance, max_iterations=max_iterations, ) return plugin(forecast_cube, truth_cube, additional_fields=additional_predictors)
def setUp(self): """Set up coefficients cubes for when either the ensemble mean or the ensemble realizations have been used as the predictor. The coefficients have been constructed from the same underlying set of ensemble realizations, so application of these coefficients would be expected to give similar results. The values for the coefficients used to construct the coefficients cubes are taken from the SetupExpectedCoefficients class. These coefficients are the expected outputs from the tests to estimate the coefficients.""" super().setUp() # Set up a coefficients cube when using the ensemble mean as the # predictor. estimator = EstimateCoefficientsForEnsembleCalibration( "norm", desired_units="Celsius" ) self.coeffs_from_mean = estimator.create_coefficients_cubelist( self.expected_mean_predictor_norm, self.historic_temperature_forecast_cube, ) estimator = EstimateCoefficientsForEnsembleCalibration( "norm", point_by_point=True, desired_units="Celsius" ) point_by_point_predictor = np.stack( [self.expected_mean_predictor_norm] * 9 ).T.reshape(4, 3, 3) self.coeffs_from_mean_point_by_point = estimator.create_coefficients_cubelist( point_by_point_predictor, self.historic_temperature_forecast_cube, ) # Set up a coefficients cube when using the ensemble realization as the # predictor and the coefficients have been estimated using statsmodels. estimator = EstimateCoefficientsForEnsembleCalibration( "norm", desired_units="Celsius", predictor="realizations" ) self.coeffs_from_statsmodels_realizations = estimator.create_coefficients_cubelist( self.expected_realizations_norm_statsmodels, self.historic_temperature_forecast_cube, ) # Set up a coefficients cube when using the ensemble realization as the # predictor and the coefficients have been estimated without using # statsmodels. estimator = EstimateCoefficientsForEnsembleCalibration( "norm", desired_units="Celsius", predictor="realizations" ) self.coeffs_from_no_statsmodels_realizations = estimator.create_coefficients_cubelist( self.expected_realizations_norm_no_statsmodels, self.historic_temperature_forecast_cube, ) # Set up a coefficients cube when using the ensemble realization as the # predictor and the coefficients have been estimated using statsmodels. expected_realizations_each_site = np.vstack( list(self.expected_realizations_each_site_statsmodels.values()) ) estimator = EstimateCoefficientsForEnsembleCalibration( "norm", predictor="realizations", point_by_point=True ) self.coeffs_from_realizations_sites = estimator.create_coefficients_cubelist( expected_realizations_each_site, self.historic_forecast_spot_cube, ) # Some expected data that are used in various tests. self.expected_loc_param_mean = np.array( [ [273.7014, 274.6534, 275.4469], [276.9385, 277.7636, 278.5570], [279.6996, 280.1122, 281.2547], ], dtype=np.float32, ) self.expected_scale_param_mean = np.array( [ [0.2316, 0.2342, 0.0168], [0.0271, 0.0237, 0.0168], [0.0634, 0.1151, 0.0116], ], dtype=np.float32, ) self.expected_loc_param_statsmodels_realizations = np.array( [ [274.388, 275.3053, 275.4492], [277.1295, 277.3866, 278.4672], [280.2007, 280.3929, 281.2602], ], dtype=np.float32, ) self.expected_loc_param_no_statsmodels_realizations = np.array( [ [273.9595, 274.9872, 275.4302], [277.0191, 277.6373, 278.6069], [279.9651, 280.1437, 281.4046], ], dtype=np.float32, ) self.expected_loc_param_realizations_sites = np.array( [277.7531, 277.4529, 277.553, 277.2528], dtype=np.float32, ) self.expected_scale_param_realizations_sites = np.array( [0, 0, 0, 0], dtype=np.float32 ) # Create output cubes with the expected data. self.expected_loc_param_mean_cube = set_up_variable_cube( self.expected_loc_param_mean, name="location_parameter", units="K", attributes=MANDATORY_ATTRIBUTE_DEFAULTS, ) self.expected_scale_param_mean_cube = set_up_variable_cube( self.expected_scale_param_mean, name="scale_parameter", units="Kelvin^2", attributes=MANDATORY_ATTRIBUTE_DEFAULTS, )
def process(*cubes: cli.inputcube, distribution, truth_attribute, cycletime, units=None, predictor='mean', tolerance: float = 0.01, max_iterations: int = 1000): """Estimate coefficients for Ensemble Model Output Statistics. Loads in arguments for estimating coefficients for Ensemble Model Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian Regression (NGR). Two sources of input data must be provided: historical forecasts and historical truth data (to use in calibration). The estimated coefficients are output as a cube. Args: cubes (list of iris.cube.Cube): A list of cubes containing the historical forecasts and corresponding truth used for calibration. They must have the same cube name and will be separated based on the truth attribute. Optionally this may also contain a single land-sea mask cube on the same domain as the historic forecasts and truth (where land points are set to one and sea points are set to zero). distribution (str): The distribution that will be used for calibration. This will be dependant upon the input phenomenon. truth_attribute (str): An attribute and its value in the format of "attribute=value", which must be present on historical truth cubes. cycletime (str): This denotes the cycle at which forecasts will be calibrated using the calculated EMOS coefficients. The validity time in the output coefficients cube will be calculated relative to this cycletime. This cycletime is in the format YYYYMMDDTHHMMZ. units (str): The units that calibration should be undertaken in. The historical forecast and truth will be converted as required. predictor (str): String to specify the form of the predictor used to calculate the location parameter when estimating the EMOS coefficients. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as options. tolerance (float): The tolerance for the Continuous Ranked Probability Score (CRPS) calculated by the minimisation. Once multiple iterations result in a CRPS equal to the same value within the specified tolerance, the minimisation will terminate. max_iterations (int): The maximum number of iterations allowed until the minimisation has converged to a stable solution. If the maximum number of iterations is reached but the minimisation has not yet converged to a stable solution, then the available solution is used anyway, and a warning is raised. If the predictor is "realizations", then the number of iterations may require increasing, as there will be more coefficients to solve. Returns: iris.cube.Cube: Cube containing the coefficients estimated using EMOS. The cube contains a coefficient_index dimension coordinate and a coefficient_name auxiliary coordinate. """ from improver.calibration import split_forecasts_and_truth from improver.calibration.ensemble_calibration import ( EstimateCoefficientsForEnsembleCalibration) forecast, truth, land_sea_mask = split_forecasts_and_truth( cubes, truth_attribute) plugin = EstimateCoefficientsForEnsembleCalibration( distribution, cycletime, desired_units=units, predictor=predictor, tolerance=tolerance, max_iterations=max_iterations) return plugin(forecast, truth, landsea_mask=land_sea_mask)
def process( *cubes: cli.inputcube, distribution, truth_attribute, point_by_point=False, use_default_initial_guess=False, units=None, predictor="mean", tolerance: float = 0.02, max_iterations: int = 1000, ): """Estimate coefficients for Ensemble Model Output Statistics. Loads in arguments for estimating coefficients for Ensemble Model Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian Regression (NGR). Two sources of input data must be provided: historical forecasts and historical truth data (to use in calibration). The estimated coefficients are output as a cube. Args: cubes (list of iris.cube.Cube): A list of cubes containing the historical forecasts and corresponding truth used for calibration. They must have the same cube name and will be separated based on the truth attribute. Optionally this may also contain a single land-sea mask cube on the same domain as the historic forecasts and truth (where land points are set to one and sea points are set to zero). distribution (str): The distribution that will be used for minimising the Continuous Ranked Probability Score when estimating the EMOS coefficients. This will be dependent upon the input phenomenon. truth_attribute (str): An attribute and its value in the format of "attribute=value", which must be present on historical truth cubes. point_by_point (bool): If True, coefficients are calculated independently for each point within the input cube by creating an initial guess and minimising each grid point independently. If False, a single set of coefficients is calculated using all points. Warning: This option is memory intensive and is unsuitable for gridded input. Using a default initial guess may reduce the memory overhead option. use_default_initial_guess (bool): If True, use the default initial guess. The default initial guess assumes no adjustments are required to the initial choice of predictor to generate the calibrated distribution. This means coefficients of 1 for the multiplicative coefficients and 0 for the additive coefficients. If False, the initial guess is computed. units (str): The units that calibration should be undertaken in. The historical forecast and truth will be converted as required. predictor (str): String to specify the form of the predictor used to calculate the location parameter when estimating the EMOS coefficients. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as options. tolerance (float): The tolerance for the Continuous Ranked Probability Score (CRPS) calculated by the minimisation. Once multiple iterations result in a CRPS equal to the same value within the specified tolerance, the minimisation will terminate. max_iterations (int): The maximum number of iterations allowed until the minimisation has converged to a stable solution. If the maximum number of iterations is reached but the minimisation has not yet converged to a stable solution, then the available solution is used anyway, and a warning is raised. If the predictor is "realizations", then the number of iterations may require increasing, as there will be more coefficients to solve. Returns: iris.cube.CubeList: CubeList containing the coefficients estimated using EMOS. Each coefficient is stored in a separate cube. """ from improver.calibration import split_forecasts_and_truth from improver.calibration.ensemble_calibration import ( EstimateCoefficientsForEnsembleCalibration, ) forecast, truth, land_sea_mask = split_forecasts_and_truth( cubes, truth_attribute) plugin = EstimateCoefficientsForEnsembleCalibration( distribution, point_by_point=point_by_point, use_default_initial_guess=use_default_initial_guess, desired_units=units, predictor=predictor, tolerance=tolerance, max_iterations=max_iterations, ) return plugin(forecast, truth, landsea_mask=land_sea_mask)