def test_exception_for_missing_forecast_inputs(self): """Test that when all forecasts are missing an exception is raised.""" self.realization_forecasts = [] msg = 'Missing historical forecast input.' with self.assertRaisesRegex(IOError, msg): split_forecasts_and_truth( self.realization_forecasts + self.realization_truths + [self.landsea_mask], self.truth_attribute)
def test_exception_for_multiple_land_sea_masks(self): """Test that when multiple land-sea masks are provided an exception is raised.""" msg = 'Expected one cube for land-sea mask.' with self.assertRaisesRegex(IOError, msg): split_forecasts_and_truth( self.realization_forecasts + self.realization_truths + [self.landsea_mask, self.landsea_mask], self.truth_attribute)
def test_exception_for_unintended_cube_combination(self): """Test that when the forecast and truth cubes have different names, indicating different diagnostics, an exception is raised.""" self.realization_truths[0].rename('kitten_density') msg = 'Must have cubes with 1 or 2 distinct names.' with self.assertRaisesRegex(ValueError, msg): split_forecasts_and_truth( self.realization_forecasts + self.realization_truths + [self.landsea_mask, self.landsea_mask], self.truth_attribute)
def process( *cubes: cli.inputcube, truth_attribute, n_probability_bins: int = 5, single_value_lower_limit: bool = False, single_value_upper_limit: bool = False, aggregate_coordinates: cli.comma_separated_list = None, ): """Populate reliability tables for use in reliability calibration. Loads historical forecasts and gridded truths that are compared to build reliability tables. Reliability tables are returned as a cube with a leading threshold dimension that matches that of the forecast probability cubes and the thresholded truth. Args: cubes (list of iris.cube.Cube): A list of cubes containing the historical probability forecasts and corresponding truths used for calibration. These cubes must include the same diagnostic name in their names, and must both have equivalent threshold coordinates. The cubes will be distinguished using the user provided truth attribute. truth_attribute (str): An attribute and its value in the format of "attribute=value", which must be present on truth cubes. n_probability_bins (int): The total number of probability bins required in the reliability tables. If single value limits are turned on, these are included in this total. If using single_value_limits this value must be at least 3. single_value_lower_limit (bool): Mandates that the lowest bin should be single valued, with a small precision tolerance, defined as 1.0E-6. The bin is thus 0 to 1.0E-6. single_value_upper_limit (bool): Mandates that the highest bin should be single valued, with a small precision tolerance, defined as 1.0E-6. The bin is thus (1 - 1.0E-6) to 1. aggregate_coordinates (List[str]): An optional list of coordinates over which to aggregate the reliability calibration table using summation. This is equivalent to constructing then using aggregate-reliability-tables but with reduced memory usage due to avoiding large intermediate data. Returns: iris.cube.Cube: Reliability tables for the forecast diagnostic with a leading threshold coordinate. """ from improver.calibration import split_forecasts_and_truth from improver.calibration.reliability_calibration import ( ConstructReliabilityCalibrationTables, ) forecast, truth, _ = split_forecasts_and_truth(cubes, truth_attribute) return ConstructReliabilityCalibrationTables( n_probability_bins=n_probability_bins, single_value_lower_limit=single_value_lower_limit, single_value_upper_limit=single_value_upper_limit, )(forecast, truth, aggregate_coordinates)
def test_realization_data(self): """Test that when multiple forecast cubes and truth cubes are provided, the groups are created as expected.""" forecast, truth, land_sea_mask = split_forecasts_and_truth( self.realization_forecasts + self.realization_truths, self.truth_attribute ) self.assertIsInstance(forecast, iris.cube.Cube) self.assertIsInstance(truth, iris.cube.Cube) self.assertIsNone(land_sea_mask, None) self.assertSequenceEqual((2, 4, 4), forecast.shape) self.assertSequenceEqual((2, 4, 4), truth.shape) self.assertTrue(np.all(forecast.data)) self.assertFalse(np.any(truth.data))
def process( *cubes: cli.inputcube, truth_attribute, n_probability_bins: int = 5, single_value_limits: bool = True, ): """Populate reliability tables for use in reliability calibration. Loads historical forecasts and gridded truths that are compared to build reliability tables. Reliability tables are returned as a cube with a leading threshold dimension that matches that of the forecast probability cubes and the thresholded truth. Args: cubes (list of iris.cube.Cube): A list of cubes containing the historical probability forecasts and corresponding truths used for calibration. These cubes must include the same diagnostic name in their names, and must both have equivalent threshold coordinates. The cubes will be distinguished using the user provided truth attribute. truth_attribute (str): An attribute and its value in the format of "attribute=value", which must be present on truth cubes. n_probability_bins (int): The total number of probability bins required in the reliability tables. If single value limits are turned on, these are included in this total. If using single_value_limits this value must be at least 3. single_value_limits (bool): Mandates that the extrema bins (0 and 1) should be single valued, with a small precision tolerance of 1.0E-6, e.g. 0 to 1.0E-6 for the lowest bin, and 1 - 1.0E-6 to 1 for the highest bin. Returns: iris.cube.Cube: Reliability tables for the forecast diagnostic with a leading threshold coordinate. """ from improver.calibration import split_forecasts_and_truth from improver.calibration.reliability_calibration import ( ConstructReliabilityCalibrationTables, ) forecast, truth, _ = split_forecasts_and_truth(cubes, truth_attribute) return ConstructReliabilityCalibrationTables( n_probability_bins=n_probability_bins, single_value_limits=single_value_limits)(forecast, truth)
def test_realization_data_with_land_sea_mask(self): """Test that when multiple forecast cubes, truth cubes, and a single land-sea mask are provided, the groups are created as expected.""" forecast, truth, land_sea_mask = split_forecasts_and_truth( self.realization_forecasts + self.realization_truths + [self.landsea_mask], self.truth_attribute) self.assertIsInstance(forecast, iris.cube.Cube) self.assertIsInstance(truth, iris.cube.Cube) self.assertIsInstance(land_sea_mask, iris.cube.Cube) self.assertEqual('land_binary_mask', land_sea_mask.name()) self.assertSequenceEqual((2, 4, 4), forecast.shape) self.assertSequenceEqual((2, 4, 4), truth.shape) self.assertTrue(np.all(forecast.data)) self.assertFalse(np.any(truth.data)) self.assertSequenceEqual((4, 4), land_sea_mask.shape)
def process(*cubes: cli.inputcube, distribution, truth_attribute, cycletime, units=None, predictor='mean', tolerance: float = 0.01, max_iterations: int = 1000): """Estimate coefficients for Ensemble Model Output Statistics. Loads in arguments for estimating coefficients for Ensemble Model Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian Regression (NGR). Two sources of input data must be provided: historical forecasts and historical truth data (to use in calibration). The estimated coefficients are output as a cube. Args: cubes (list of iris.cube.Cube): A list of cubes containing the historical forecasts and corresponding truth used for calibration. They must have the same cube name and will be separated based on the truth attribute. Optionally this may also contain a single land-sea mask cube on the same domain as the historic forecasts and truth (where land points are set to one and sea points are set to zero). distribution (str): The distribution that will be used for calibration. This will be dependant upon the input phenomenon. truth_attribute (str): An attribute and its value in the format of "attribute=value", which must be present on historical truth cubes. cycletime (str): This denotes the cycle at which forecasts will be calibrated using the calculated EMOS coefficients. The validity time in the output coefficients cube will be calculated relative to this cycletime. This cycletime is in the format YYYYMMDDTHHMMZ. units (str): The units that calibration should be undertaken in. The historical forecast and truth will be converted as required. predictor (str): String to specify the form of the predictor used to calculate the location parameter when estimating the EMOS coefficients. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as options. tolerance (float): The tolerance for the Continuous Ranked Probability Score (CRPS) calculated by the minimisation. Once multiple iterations result in a CRPS equal to the same value within the specified tolerance, the minimisation will terminate. max_iterations (int): The maximum number of iterations allowed until the minimisation has converged to a stable solution. If the maximum number of iterations is reached but the minimisation has not yet converged to a stable solution, then the available solution is used anyway, and a warning is raised. If the predictor is "realizations", then the number of iterations may require increasing, as there will be more coefficients to solve. Returns: iris.cube.Cube: Cube containing the coefficients estimated using EMOS. The cube contains a coefficient_index dimension coordinate and a coefficient_name auxiliary coordinate. """ from improver.calibration import split_forecasts_and_truth from improver.calibration.ensemble_calibration import ( EstimateCoefficientsForEnsembleCalibration) forecast, truth, land_sea_mask = split_forecasts_and_truth( cubes, truth_attribute) plugin = EstimateCoefficientsForEnsembleCalibration( distribution, cycletime, desired_units=units, predictor=predictor, tolerance=tolerance, max_iterations=max_iterations) return plugin(forecast, truth, landsea_mask=land_sea_mask)
def process( *cubes: cli.inputcube, distribution, truth_attribute, point_by_point=False, use_default_initial_guess=False, units=None, predictor="mean", tolerance: float = 0.02, max_iterations: int = 1000, ): """Estimate coefficients for Ensemble Model Output Statistics. Loads in arguments for estimating coefficients for Ensemble Model Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian Regression (NGR). Two sources of input data must be provided: historical forecasts and historical truth data (to use in calibration). The estimated coefficients are output as a cube. Args: cubes (list of iris.cube.Cube): A list of cubes containing the historical forecasts and corresponding truth used for calibration. They must have the same cube name and will be separated based on the truth attribute. Optionally this may also contain a single land-sea mask cube on the same domain as the historic forecasts and truth (where land points are set to one and sea points are set to zero). distribution (str): The distribution that will be used for minimising the Continuous Ranked Probability Score when estimating the EMOS coefficients. This will be dependent upon the input phenomenon. truth_attribute (str): An attribute and its value in the format of "attribute=value", which must be present on historical truth cubes. point_by_point (bool): If True, coefficients are calculated independently for each point within the input cube by creating an initial guess and minimising each grid point independently. If False, a single set of coefficients is calculated using all points. Warning: This option is memory intensive and is unsuitable for gridded input. Using a default initial guess may reduce the memory overhead option. use_default_initial_guess (bool): If True, use the default initial guess. The default initial guess assumes no adjustments are required to the initial choice of predictor to generate the calibrated distribution. This means coefficients of 1 for the multiplicative coefficients and 0 for the additive coefficients. If False, the initial guess is computed. units (str): The units that calibration should be undertaken in. The historical forecast and truth will be converted as required. predictor (str): String to specify the form of the predictor used to calculate the location parameter when estimating the EMOS coefficients. Currently the ensemble mean ("mean") and the ensemble realizations ("realizations") are supported as options. tolerance (float): The tolerance for the Continuous Ranked Probability Score (CRPS) calculated by the minimisation. Once multiple iterations result in a CRPS equal to the same value within the specified tolerance, the minimisation will terminate. max_iterations (int): The maximum number of iterations allowed until the minimisation has converged to a stable solution. If the maximum number of iterations is reached but the minimisation has not yet converged to a stable solution, then the available solution is used anyway, and a warning is raised. If the predictor is "realizations", then the number of iterations may require increasing, as there will be more coefficients to solve. Returns: iris.cube.CubeList: CubeList containing the coefficients estimated using EMOS. Each coefficient is stored in a separate cube. """ from improver.calibration import split_forecasts_and_truth from improver.calibration.ensemble_calibration import ( EstimateCoefficientsForEnsembleCalibration, ) forecast, truth, land_sea_mask = split_forecasts_and_truth( cubes, truth_attribute) plugin = EstimateCoefficientsForEnsembleCalibration( distribution, point_by_point=point_by_point, use_default_initial_guess=use_default_initial_guess, desired_units=units, predictor=predictor, tolerance=tolerance, max_iterations=max_iterations, ) return plugin(forecast, truth, landsea_mask=land_sea_mask)