def test_mean_predictor_estimate_coefficients(self):
        """
        Test that the plugin returns the expected values for the initial guess
        for the calibration coefficients, when the ensemble mean is used
        as the predictor. The coefficients are estimated using a linear model,
        where there is an offset of one between the truth and the forecast
        during the training period. Therefore, in this case the result of the
        linear regression is a gradient of 1 and an intercept of 1.
        """
        data = np.array([0., 1., 1., 1.], dtype=np.float32)
        estimate_coefficients_from_linear_model_flag = True

        plugin = Plugin(self.distribution, self.desired_units)
        result = plugin.compute_initial_guess(
            self.truth, self.current_forecast_predictor_mean,
            self.predictor_of_mean_flag,
            estimate_coefficients_from_linear_model_flag)

        self.assertArrayAlmostEqual(result, data)
예제 #2
0
    def test_coefficient_values_for_fake_distribution(self):
        """
        Ensure the appropriate error is raised if the minimisation function
        requested is not available.
        """
        current_forecast = self.current_temperature_forecast_cube

        historic_forecasts = self.historic_temperature_forecast_cube

        truth = self.temperature_truth_cube

        distribution = "fake"
        desired_units = "degreesC"

        plugin = Plugin(distribution, desired_units)
        msg = "Distribution requested"
        with self.assertRaisesRegex(KeyError, msg):
            plugin.estimate_coefficients_for_ngr(current_forecast,
                                                 historic_forecasts, truth)
예제 #3
0
    def test_truth_data_is_none(self):
        """
        Ensure that a ValueError with the expected text is generated,
        if the input data is None, rather than a cube.
        """
        current_forecast = self.current_temperature_forecast_cube

        historic_forecasts = self.historic_temperature_forecast_cube

        truth = iris.cube.CubeList([None])

        distribution = "gaussian"
        desired_units = "degreesC"

        plugin = Plugin(distribution, desired_units)
        msg = "The input data within the"
        with self.assertRaisesRegex(TypeError, msg):
            plugin.estimate_coefficients_for_ngr(current_forecast,
                                                 historic_forecasts, truth)
    def test_coefficients_truncated_gaussian_realizations_statsmodels(self):
        """Ensure that the values for the optimised_coefficients match the
        expected values, and the coefficient names also match
        expected values for a truncated Gaussian distribution where the
        realizations are used as the predictor of the mean."""
        distribution = "truncated gaussian"
        predictor_of_mean_flag = "realizations"

        plugin = Plugin(distribution,
                        self.current_cycle,
                        predictor_of_mean_flag=predictor_of_mean_flag)
        result = plugin.process(self.historic_wind_speed_forecast_cube,
                                self.wind_speed_truth_cube)
        self.assertEMOSCoefficientsAlmostEqual(
            result.data,
            self.expected_realizations_truncated_gaussian_statsmodels)
        self.assertArrayEqual(
            result.coord("coefficient_name").points,
            self.coeff_names_realizations)
예제 #5
0
    def test_coefficient_values_for_gaussian_distribution_realizations(self):
        """
        Ensure that the values generated within optimised_coeffs match the
        expected values, and the coefficient names also match
        expected values.
        """
        import imp
        try:
            statsmodels_found = imp.find_module('statsmodels')
            statsmodels_found = True
        except ImportError:
            statsmodels_found = False

        if statsmodels_found:
            data = [-0.00114, -0.00006, 1.00037, -0.00196, 0.99999, -0.00315]
        else:
            data = [
                4.30804737e-02, 1.39042785e+00, 8.99047025e-04, 2.02661310e-01,
                9.27197381e-01, 3.17407626e-01
            ]

        current_forecast = self.current_temperature_forecast_cube

        historic_forecasts = self.historic_temperature_forecast_cube

        truth = self.temperature_truth_cube

        distribution = "gaussian"
        desired_units = "degreesC"
        predictor_of_mean_flag = "realizations"

        plugin = Plugin(distribution,
                        desired_units,
                        predictor_of_mean_flag=predictor_of_mean_flag)
        result = plugin.estimate_coefficients_for_ngr(current_forecast,
                                                      historic_forecasts,
                                                      truth)
        optimised_coeffs, coeff_names = result

        for key in optimised_coeffs.keys():
            self.assertArrayAlmostEqual(optimised_coeffs[key], data, decimal=5)
        self.assertListEqual(coeff_names, ["gamma", "delta", "a", "beta"])
예제 #6
0
    def test_realizations_predictor_estimate_coefficients(self):
        """
        Test that the plugin returns the expected values for the initial guess
        for the calibration coefficients, when the ensemble mean is used
        as the predictor. The coefficients are estimated using a linear model.
        In this case, the result of the linear regression is for an intercept
        of 0.333333 with different weights for the realizations because
        some of the realizations are closer to the truth, in this instance.
        """
        data = [0., 1., 0.333333, 0., 0.333333, 0.666667]
        predictor_of_mean_flag = "realizations"
        estimate_coefficients_from_linear_model_flag = True

        plugin = Plugin(self.distribution, self.desired_units)
        result = plugin.compute_initial_guess(
            self.truth,
            self.current_forecast_predictor_realizations,
            predictor_of_mean_flag,
            estimate_coefficients_from_linear_model_flag,
            no_of_realizations=self.no_of_realizations)
        self.assertArrayAlmostEqual(result, data)
예제 #7
0
    def test_basic_mean_predictor(self):
        """
        Test that the plugin returns a list containing the initial guess
        for the calibration coefficients, when the ensemble mean is used
        as the predictor.
        """
        cube = self.cube

        current_forecast_predictor = cube.collapsed("realization",
                                                    iris.analysis.MEAN)
        truth = cube.collapsed("realization", iris.analysis.MAX)
        distribution = "gaussian"
        desired_units = "degreesC"
        predictor_of_mean_flag = "mean"
        estimate_coefficients_from_linear_model_flag = False

        plugin = Plugin(distribution, desired_units)
        result = plugin.compute_initial_guess(
            truth, current_forecast_predictor, predictor_of_mean_flag,
            estimate_coefficients_from_linear_model_flag)
        self.assertIsInstance(result, list)
예제 #8
0
    def test_basic(self):
        """Ensure that the optimised_coeffs are returned as a dictionary,
           and the coefficient names are returned as a list."""
        current_forecast = self.current_temperature_forecast_cube

        historic_forecasts = self.historic_temperature_forecast_cube

        truth = self.temperature_truth_cube

        distribution = "gaussian"
        desired_units = "degreesC"

        plugin = Plugin(distribution, desired_units)
        result = plugin.estimate_coefficients_for_ngr(current_forecast,
                                                      historic_forecasts,
                                                      truth)
        optimised_coeffs, coeff_names = result
        self.assertIsInstance(optimised_coeffs, dict)
        self.assertIsInstance(coeff_names, list)
        for key in optimised_coeffs.keys():
            self.assertEqual(len(optimised_coeffs[key]), len(coeff_names))
    def test_coefficient_values_truncated_gaussian_distribution_realization(
            self):
        """Ensure that the values for the optimised_coefficients match the
        expected values, and the coefficient names also match
        expected values for a truncated Gaussian distribution where the
        realizations are used as the predictor of the mean."""
        import imp
        try:
            imp.find_module('statsmodels')
            statsmodels_found = True
        except ImportError:
            statsmodels_found = False

        if statsmodels_found:
            data = [
                0.11821805, -0.00474737, 0.17631301, 0.17178835, 0.66749225,
                0.72287342
            ]
        else:
            data = [
                2.05550997, 0.10577237, 0.00028531, 0.53208837, 0.67233013,
                0.53704241
            ]

        distribution = "truncated gaussian"
        current_cycle = "20171110T0000Z"
        predictor_of_mean_flag = "realizations"
        expected_coeff_names = ([
            'gamma', 'delta', 'alpha', 'beta0', 'beta1', 'beta2'
        ])

        plugin = Plugin(distribution,
                        current_cycle,
                        predictor_of_mean_flag=predictor_of_mean_flag)
        result = plugin.estimate_coefficients_for_ngr(
            self.historic_wind_speed_forecast_cube, self.wind_speed_truth_cube)

        self.assertArrayAlmostEqual(result.data, data)
        self.assertArrayEqual(
            result.coord("coefficient_name").points, expected_coeff_names)
    def test_basic_realizations_predictor(self):
        """
        Test that the plugin returns a list containing the initial guess
        for the calibration coefficients, when the individual ensemble
        realizations are used as predictors.
        """
        current_forecast_predictor = self.cube.copy()
        truth = self.cube.collapsed("realization", iris.analysis.MAX)
        distribution = "gaussian"
        desired_units = "degreesC"
        predictor_of_mean_flag = "realizations"
        no_of_realizations = 3
        estimate_coefficients_from_linear_model_flag = False

        plugin = Plugin(distribution, desired_units)
        result = plugin.compute_initial_guess(
            truth,
            current_forecast_predictor,
            predictor_of_mean_flag,
            estimate_coefficients_from_linear_model_flag,
            no_of_realizations=no_of_realizations)
        self.assertIsInstance(result, np.ndarray)
    def test_basic_mean_predictor_value_check(self):
        """
        Test that the plugin returns the expected values for the initial guess
        for the calibration coefficients, when the ensemble mean is used
        as the predictor. As coefficients are not estimated using a
        linear model, the default values for the initial guess are used.
        """
        data = [1, 1, 0, 1]

        current_forecast_predictor = self.cube.collapsed(
            "realization", iris.analysis.MEAN)
        truth = self.cube.collapsed("realization", iris.analysis.MAX)
        distribution = "gaussian"
        desired_units = "degreesC"
        predictor_of_mean_flag = "mean"
        estimate_coefficients_from_linear_model_flag = False

        plugin = Plugin(distribution, desired_units)
        result = plugin.compute_initial_guess(
            truth, current_forecast_predictor, predictor_of_mean_flag,
            estimate_coefficients_from_linear_model_flag)
        self.assertArrayAlmostEqual(result, data)
예제 #12
0
    def test_truth_data_is_fake_catch_warning(self, warning_list=None):
        """
        Ensure that a ValueError with the expected text is generated,
        if the input data is None, rather than a cube.
        """
        current_forecast = self.current_temperature_forecast_cube

        historic_forecasts = self.historic_temperature_forecast_cube

        truth = "fake"

        distribution = "gaussian"
        desired_units = "degreesC"

        plugin = Plugin(distribution, desired_units)

        plugin.estimate_coefficients_for_ngr(current_forecast,
                                             historic_forecasts, truth)
        self.assertTrue(len(warning_list) == 1)
        self.assertTrue(
            any(item.category == UserWarning for item in warning_list))
        self.assertTrue("is not a Cube or CubeList" in str(warning_list[0]))
    def test_coefficient_values_for_gaussian_distribution(self):
        """Ensure that the values for the optimised_coefficients match the
        expected values, and the coefficient names also match
        expected values for a Gaussian distribution."""
        data = [
            4.55819380e-06, -8.02401974e-09, 1.66667055e+00, 1.00000011e+00
        ]

        distribution = "gaussian"
        current_cycle = "20171110T0000Z"
        desired_units = "Celsius"

        plugin = Plugin(distribution,
                        current_cycle,
                        desired_units=desired_units)
        result = plugin.estimate_coefficients_for_ngr(
            self.historic_temperature_forecast_cube,
            self.temperature_truth_cube)

        self.assertArrayAlmostEqual(result.data, data)
        self.assertArrayEqual(
            result.coord("coefficient_name").points, self.coeff_names)
    def test_truth_unit_conversion(self):
        """Ensure the expected optimised coefficients are generated,
        even if the input truth cube has different units."""
        data = [
            4.55819380e-06, -8.02401974e-09, 1.66667055e+00, 1.00000011e+00
        ]

        truth = self.temperature_truth_cube

        truth.convert_units("Fahrenheit")

        distribution = "gaussian"
        current_cycle = "20171110T0000Z"
        desired_units = "degreesC"

        plugin = Plugin(distribution,
                        current_cycle,
                        desired_units=desired_units)
        result = plugin.estimate_coefficients_for_ngr(
            self.historic_temperature_forecast_cube, truth)

        self.assertArrayAlmostEqual(result.data, data, decimal=5)
    def test_mean_predictor_estimate_coefficients(self):
        """
        Test that the plugin returns the expected values for the initial guess
        for the calibration coefficients, when the ensemble mean is used
        as the predictor. The coefficients are estimated using a linear model.
        """
        data = np.array([1, 1, 2.66663, 1], dtype=np.float32)

        current_forecast_predictor = self.cube.collapsed(
            "realization", iris.analysis.MEAN)
        truth = self.cube.collapsed("realization", iris.analysis.MAX)
        distribution = "gaussian"
        desired_units = "degreesC"
        predictor_of_mean_flag = "mean"
        estimate_coefficients_from_linear_model_flag = True

        plugin = Plugin(distribution, desired_units)
        result = plugin.compute_initial_guess(
            truth, current_forecast_predictor, predictor_of_mean_flag,
            estimate_coefficients_from_linear_model_flag)

        self.assertArrayAlmostEqual(result, data, decimal=5)
    def test_coefficient_values_for_gaussian_distribution_realizations(self):
        """Ensure that the values for the optimised_coefficients match the
        expected values, and the coefficient names also match
        expected values for a Gaussian distribution where the
        realizations are used as the predictor of the mean."""
        import imp
        try:
            imp.find_module('statsmodels')
            statsmodels_found = True
        except ImportError:
            statsmodels_found = False

        if statsmodels_found:
            data = [-0.00114, -0.00006, 1.00037, -0.00196, 0.99999, -0.00315]
        else:
            data = [
                4.30804737e-02, 1.39042785e+00, 8.99047025e-04, 2.02661310e-01,
                9.27197381e-01, 3.17407626e-01
            ]

        distribution = "gaussian"
        current_cycle = "20171110T0000Z"
        desired_units = "Celsius"
        predictor_of_mean_flag = "realizations"
        expected_coeff_names = ([
            'gamma', 'delta', 'alpha', 'beta0', 'beta1', 'beta2'
        ])

        plugin = Plugin(distribution,
                        current_cycle,
                        desired_units=desired_units,
                        predictor_of_mean_flag=predictor_of_mean_flag)
        result = plugin.estimate_coefficients_for_ngr(
            self.historic_temperature_forecast_cube,
            self.temperature_truth_cube)
        self.assertArrayAlmostEqual(result.data, data, decimal=5)
        self.assertArrayEqual(
            result.coord("coefficient_name").points, expected_coeff_names)
예제 #17
0
    def test_truth_data_length_zero_catch_warning(self, warning_list=None):
        """
        Ensure that a ValueError with the expected text is generated,
        if the input data is None, rather than a cube.
        """
        current_forecast = self.current_temperature_forecast_cube

        historic_forecasts = self.historic_temperature_forecast_cube

        truth = iris.cube.CubeList([])

        distribution = "gaussian"
        desired_units = "degreesC"

        plugin = Plugin(distribution, desired_units)

        plugin.estimate_coefficients_for_ngr(current_forecast,
                                             historic_forecasts, truth)
        warning_msg = ("Insufficient input data present to estimate "
                       "coefficients using NGR.")
        self.assertTrue(
            any(item.category == UserWarning for item in warning_list))
        self.assertTrue(any(warning_msg in str(item) for item in warning_list))
예제 #18
0
    def test_truth_data_has_wrong_time_catch_warning(self, warning_list=None):
        """
        Ensure that a ValueError with the expected text is generated,
        if the input data is None, rather than a cube.
        """
        current_forecast = self.current_temperature_forecast_cube

        historic_forecasts = self.historic_temperature_forecast_cube

        truth = self.temperature_truth_cube
        truth.coord("forecast_reference_time").points += 10

        distribution = "gaussian"
        desired_units = "degreesC"

        plugin = Plugin(distribution, desired_units)

        plugin.estimate_coefficients_for_ngr(current_forecast,
                                             historic_forecasts, truth)
        warning_msg = "Unable to calibrate for the time points"
        self.assertTrue(
            any(item.category == UserWarning for item in warning_list))
        self.assertTrue(any(warning_msg in str(item) for item in warning_list))
예제 #19
0
    def test_members_predictor_estimate_coefficients(self):
        """
        Test that the plugin returns the expected values for the initial guess
        for the calibration coefficients, when the ensemble mean is used
        as the predictor. The coefficients are estimated using a linear model.
        """
        warnings.simplefilter("always")
        import imp
        try:
            statsmodels_found = imp.find_module('statsmodels')
            statsmodels_found = True
        except ImportError:
            statsmodels_found = False

        if statsmodels_found:
            data = [1., 1., 0.13559322, -0.11864407, 0.42372881, 0.69491525]
        else:
            data = [1, 1, 0, 1, 1, 1]

        cube = self.cube

        current_forecast_predictor = cube
        truth = cube.collapsed("realization", iris.analysis.MAX)
        distribution = "gaussian"
        desired_units = "degreesC"
        predictor_of_mean_flag = "members"
        no_of_members = 3
        estimate_coefficients_from_linear_model_flag = True

        plugin = Plugin(distribution, desired_units)
        result = plugin.compute_initial_guess(
            truth,
            current_forecast_predictor,
            predictor_of_mean_flag,
            estimate_coefficients_from_linear_model_flag,
            no_of_members=no_of_members)
        self.assertArrayAlmostEqual(result, data)
예제 #20
0
 def test_mismatching(self):
     """Test for when there is both a missing historic forecasts and a
     missing truth at different validity times. This results in the
     expected historic forecasts and the expected truths containing cubes
     at three matching validity times."""
     partial_truth = self.truth[1:].merge_cube()
     expected_historical_forecasts = iris.cube.CubeList([
         self.historic_forecasts[index] for index in (1, 3, 4)
     ]).merge_cube()
     expected_truth = iris.cube.CubeList(
         [self.truth[index] for index in (1, 3, 4)]).merge_cube()
     hf_result, truth_result = Plugin._filter_non_matching_cubes(
         self.partial_historic_forecasts, partial_truth)
     self.assertEqual(hf_result, expected_historical_forecasts)
     self.assertEqual(truth_result, expected_truth)
예제 #21
0
    def setUp(self):
        """Set up coefficients cubes for when either the ensemble mean or the
        ensemble realizations have been used as the predictor. The coefficients
        have been constructed from the same underlying set of ensemble
        realizations, so application of these coefficients would be expected
        to give similar results. The values for the coefficients used to
        construct the coefficients cubes are taken from the
        SetupExpectedCoefficients class. These coefficients are the
        expected outputs from the tests to estimate the coefficients."""
        super().setUp()
        # Set up a coefficients cube when using the ensemble mean as the
        # predictor.
        current_cycle = "20171110T0000Z"
        estimator = (EstimateCoefficientsForEnsembleCalibration(
            "gaussian", current_cycle, desired_units="Celsius"))
        self.coeffs_from_mean = (estimator.create_coefficients_cube(
            self.expected_mean_predictor_gaussian,
            self.current_temperature_forecast_cube))

        # Set up a coefficients cube when using the ensemble realization as the
        # predictor and the coefficients have been estimated using statsmodels.
        estimator = (EstimateCoefficientsForEnsembleCalibration(
            "gaussian",
            current_cycle,
            desired_units="Celsius",
            predictor_of_mean_flag="realizations"))
        self.coeffs_from_statsmodels_realizations = (
            estimator.create_coefficients_cube(
                self.expected_realizations_gaussian_statsmodels,
                self.current_temperature_forecast_cube))

        # Set up a coefficients cube when using the ensemble realization as the
        # predictor and the coefficients have been estimated without using
        # statsmodels.
        estimator = (EstimateCoefficientsForEnsembleCalibration(
            "gaussian",
            current_cycle,
            desired_units="Celsius",
            predictor_of_mean_flag="realizations"))
        self.coeffs_from_no_statsmodels_realizations = (
            estimator.create_coefficients_cube(
                self.expected_realizations_gaussian_no_statsmodels,
                self.current_temperature_forecast_cube))

        # Some expected data that are used in various tests.
        self.expected_calibrated_predictor_mean = (np.array(
            [[273.7854, 274.6913, 275.4461], [276.8652, 277.6502, 278.405],
             [279.492, 280.1562, 280.9715]]))
        self.expected_calibrated_variance_mean = (np.array(
            [[0.1952, 0.1974, 0.0117], [0.0226, 0.0197, 0.0117],
             [0.0532, 0.0029, 0.0007]]))
        self.expected_calibrated_predictor_statsmodels_realizations = (
            np.array([[274.1395, 275.0975, 275.258],
                      [276.9771, 277.3487, 278.3144],
                      [280.0085, 280.2506, 281.1632]]))
        self.expected_calibrated_variance_statsmodels_realizations = (np.array(
            [[0.8973, 0.9073, 0.0536], [0.1038, 0.0904, 0.0536],
             [0.2444, 0.0134, 0.0033]]))
        self.expected_calibrated_predictor_no_statsmodels_realizations = (
            np.array([[273.4695, 274.4673, 275.3034],
                      [276.8648, 277.733, 278.5632],
                      [279.7562, 280.4913, 281.3889]]))
        self.expected_calibrated_variance_no_statsmodels_realizations = (
            np.array([[0.9344, 0.9448, 0.0558], [0.1081, 0.0941, 0.0558],
                      [0.2545, 0.0139, 0.0035]]))
예제 #22
0
    def setUp(self):
        """Set up coefficients cubes for when either the ensemble mean or the
        ensemble realizations have been used as the predictor. The coefficients
        have been constructed from the same underlying set of ensemble
        realizations, so application of these coefficients would be expected
        to give similar results. The values for the coefficients used to
        construct the coefficients cubes are taken from the
        SetupExpectedCoefficients class. These coefficients are the
        expected outputs from the tests to estimate the coefficients."""
        super().setUp()
        # Set up a coefficients cube when using the ensemble mean as the
        # predictor.
        current_cycle = "20171110T0000Z"
        estimator = (EstimateCoefficientsForEnsembleCalibration(
            "gaussian", current_cycle, desired_units="Celsius"))
        self.coeffs_from_mean = (estimator.create_coefficients_cube(
            self.expected_mean_predictor_gaussian,
            self.current_temperature_forecast_cube))

        # Set up a coefficients cube when using the ensemble realization as the
        # predictor and the coefficients have been estimated using statsmodels.
        estimator = (EstimateCoefficientsForEnsembleCalibration(
            "gaussian",
            current_cycle,
            desired_units="Celsius",
            predictor_of_mean_flag="realizations"))
        self.coeffs_from_statsmodels_realizations = (
            estimator.create_coefficients_cube(
                self.expected_realizations_gaussian_statsmodels,
                self.current_temperature_forecast_cube))

        # Set up a coefficients cube when using the ensemble realization as the
        # predictor and the coefficients have been estimated without using
        # statsmodels.
        estimator = (EstimateCoefficientsForEnsembleCalibration(
            "gaussian",
            current_cycle,
            desired_units="Celsius",
            predictor_of_mean_flag="realizations"))
        self.coeffs_from_no_statsmodels_realizations = (
            estimator.create_coefficients_cube(
                self.expected_realizations_gaussian_no_statsmodels,
                self.current_temperature_forecast_cube))

        # Some expected data that are used in various tests.
        self.expected_loc_param_mean = (np.array(
            [[273.7854, 274.6913, 275.4461], [276.8652, 277.6502, 278.405],
             [279.492, 280.1562, 280.9715]],
            dtype=np.float32))
        self.expected_scale_param_mean = (np.array(
            [[0.1952, 0.1974, 0.0117], [0.0226, 0.0197, 0.0117],
             [0.0532, 0.0029, 0.0007]],
            dtype=np.float32))
        self.expected_loc_param_statsmodels_realizations = (np.array(
            [[274.1395, 275.0975, 275.258], [276.9771, 277.3487, 278.3144],
             [280.0085, 280.2506, 281.1632]],
            dtype=np.float32))
        self.expected_loc_param_no_statsmodels_realizations = (np.array(
            [[273.4695, 274.4673, 275.3034], [276.8648, 277.733, 278.5632],
             [279.7562, 280.4913, 281.3889]],
            dtype=np.float32))

        # Create output cubes with the expected data.
        self.expected_loc_param_mean_cube = set_up_variable_cube(
            self.expected_loc_param_mean,
            name="location_parameter",
            units="K",
            attributes=MANDATORY_ATTRIBUTE_DEFAULTS)
        self.expected_scale_param_mean_cube = (set_up_variable_cube(
            self.expected_scale_param_mean,
            name="scale_parameter",
            units="Kelvin^2",
            attributes=MANDATORY_ATTRIBUTE_DEFAULTS))
예제 #23
0
def process(*cubes: cli.inputcube,
            distribution,
            truth_attribute,
            cycletime,
            units=None,
            predictor_of_mean='mean',
            tolerance: float = 0.01,
            max_iterations: int = 1000):
    """Estimate coefficients for Ensemble Model Output Statistics.

    Loads in arguments for estimating coefficients for Ensemble Model
    Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
    Regression (NGR). Two sources of input data must be provided: historical
    forecasts and historical truth data (to use in calibration).
    The estimated coefficients are output as a cube.

    Args:
        cubes (list of iris.cube.Cube):
            A list of cubes containing the historical forecasts and
            corresponding truth used for calibration. They must have the same
            cube name and will be separated based on the truth attribute.
            Optionally this may also contain a single land-sea mask cube on the
            same domain as the historic forecasts and truth (where land points
            are set to one and sea points are set to zero).
        distribution (str):
            The distribution that will be used for calibration. This will be
            dependant upon the input phenomenon.
        truth_attribute (str):
            An attribute and its value in the format of "attribute=value",
            which must be present on historical truth cubes.
        cycletime (str):
            This denotes the cycle at which forecasts will be calibrated using
            the calculated EMOS coefficients. The validity time in the output
            coefficients cube will be calculated relative to this cycletime.
            This cycletime is in the format YYYYMMDDTHHMMZ.
        units (str):
            The units that calibration should be undertaken in. The historical
            forecast and truth will be converted as required.
        predictor_of_mean (str):
            String to specify the input to calculate the calibrated mean.
            Currently the ensemble mean ("mean") and the ensemble realizations
            ("realizations") are supported as the predictors.
        tolerance (float):
            The tolerance for the Continuous Ranked Probability Score (CRPS)
            calculated by the minimisation. Once multiple iterations result in
            a CRPS equal to the same value within the specified tolerance, the
            minimisation will terminate.
        max_iterations (int):
            The maximum number of iterations allowed until the minimisation has
            converged to a stable solution. If the maximum number of iterations
            is reached but the minimisation has not yet converged to a stable
            solution, then the available solution is used anyway, and a warning
            is raised.
            If the predictor_of_mean is "realizations", then the number of
            iterations may require increasing, as there will be more
            coefficients to solve.

    Returns:
        iris.cube.Cube:
            Cube containing the coefficients estimated using EMOS. The cube
            contains a coefficient_index dimension coordinate and a
            coefficient_name auxiliary coordinate.

    Raises:
        RuntimeError:
            An unexpected number of distinct cube names were passed in.
        RuntimeError:
            More than one cube was identified as a land-sea mask.
        RuntimeError:
            Missing truth or historical forecast in input cubes.

    """

    from collections import OrderedDict
    from improver.utilities.cube_manipulation import MergeCubes
    from improver.ensemble_calibration.ensemble_calibration import (
        EstimateCoefficientsForEnsembleCalibration)

    grouped_cubes = {}
    for cube in cubes:
        grouped_cubes.setdefault(cube.name(), []).append(cube)
    if len(grouped_cubes) == 1:
        # Only one group - all forecast/truth cubes
        land_sea_mask = None
        diag_name = list(grouped_cubes.keys())[0]
    elif len(grouped_cubes) == 2:
        # Two groups - the one with exactly one cube matching a name should
        # be the land_sea_mask, since we require more than 2 cubes in
        # the forecast/truth group
        grouped_cubes = OrderedDict(sorted(grouped_cubes.items(),
                                           key=lambda kv: len(kv[1])))
        # landsea name should be the key with the lowest number of cubes (1)
        landsea_name, diag_name = list(grouped_cubes.keys())
        land_sea_mask = grouped_cubes[landsea_name][0]
        if len(grouped_cubes[landsea_name]) != 1:
            raise RuntimeError('Expected one cube for land-sea mask.')
    else:
        raise RuntimeError('Must have cubes with 1 or 2 distinct names.')

    # split non-land_sea_mask cubes on forecast vs truth
    truth_key, truth_value = truth_attribute.split('=')
    input_cubes = grouped_cubes[diag_name]
    grouped_cubes = {'truth': [], 'historical forecast': []}
    for cube in input_cubes:
        if cube.attributes.get(truth_key) == truth_value:
            grouped_cubes['truth'].append(cube)
        else:
            grouped_cubes['historical forecast'].append(cube)

    missing_inputs = ' and '.join(k for k, v in grouped_cubes.items() if not v)
    if missing_inputs:
        raise RuntimeError('Missing ' + missing_inputs + ' input.')

    truth = MergeCubes()(grouped_cubes['truth'])
    forecast = MergeCubes()(grouped_cubes['historical forecast'])

    return EstimateCoefficientsForEnsembleCalibration(
        distribution, cycletime, desired_units=units,
        predictor_of_mean_flag=predictor_of_mean,
        tolerance=tolerance, max_iterations=max_iterations).process(
            forecast, truth, landsea_mask=land_sea_mask)
예제 #24
0
def process(historic_forecast, truth, combined, historic_forecast_dict,
            truth_dict, distribution, cycletime, units=None,
            predictor_of_mean='mean', max_iterations=1000):
    """Module for estimate coefficients for Ensemble Model Output Statistics.

    Loads in arguments for estimating coefficients for Ensemble Model
    Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
    Regression (NGR). Two sources of input data must be provided: historical
    forecasts and historical truth data (to use in calibration).
    The estimated coefficients are output as a cube.

    Args:
        historic_forecast (iris.cube.Cube):
            The cube containing the historical forecasts used for calibration.
        truth (iris.cube.Cube):
            The cube containing the truth used for calibration.
        combined (iris.cube.CubeList):
            A cubelist containing a combination of historic forecasts and
            associated truths.
        historic_forecast_dict (dict):
            Dictionary specifying the metadata that defines the historic
            forecast. For example:
            ::

                {
                    "attributes": {
                        "mosg__model_configuration": "uk_ens"
                    }
                }
        truth_dict (dict):
            Dictionary specifying the metadata that defines the truth.
            For example:
            ::

                {
                    "attributes": {
                        "mosg__model_configuration": "uk_det"
                    }
                }
        distribution (str):
            The distribution that will be used for calibration. This will be
            dependant upon the input phenomenon.
        cycletime (str):
            This denotes the cycle at which forecasts will be calibrated using
            the calculated EMOS coefficients. The validity time in the output
            coefficients cube will be calculated relative to this cycletime.
            This cycletime is in the format YYYYMMDDTHHMMZ.
        units (str):
            The units that calibration should be undertaken in. The historical
            forecast and truth will be converted as required.
            Default is None.
        predictor_of_mean (str):
            String to specify the input to calculate the calibrated mean.
            Currently the ensemble mean ("mean") and the ensemble realizations
            ("realizations") are supported as the predictors.
            Default is 'mean'.
        max_iterations (int):
            The maximum number of iterations allowed until the minimisation has
            converged to a stable solution. If the maximum number of iterations
            is reached but the minimisation has not yet converged to a stable
            solution, then the available solution is used anyway, and a warning
            is raised.
            If the predictor_of_mean is "realizations", then the number of
            iterations may require increasing, as there will be more
            coefficients to solve.
            Default is 1000.

    Returns:
        result (iris.cube.Cube or None):
            Cube containing the coefficients estimated using EMOS. The cube
            contains a coefficient_index dimension coordinate and a
            coefficient_name auxiliary coordinate. If no historic forecasts or
            truths are found then None is returned.

    Raises:
        ValueError: If the historic forecast and truth inputs are specified,
            then the combined input, historic forecast dictionary and truth
            dictionary should not be specified.
        ValueError: If one of the historic forecast or truth inputs are
            specified, then they should both be specified.
        ValueError: All of the combined_filepath, historic_forecast_identifier
            and truth_identifier arguments should be specified if one of the
            arguments are specified.

    Warns:
        UserWarning: The metadata to identify the desired historic forecast or
            truth has found nothing matching the metadata information supplied.

    """
    # The logic for the if statements below is:
    # 1. Check whether either the historic_forecast or the truth exists.
    # 2. Check that both the historic forecast and the truth exists, otherwise,
    #    raise an error.
    # 3. Check that none of the combined, historic forecast dictionary or
    #    truth dictionary inputs have been provided, as these arguments are
    #    invalid, if the historic forecast and truth inputs have been provided.
    if any([historic_forecast, truth]):
        if all([historic_forecast, truth]):
            if any([combined, historic_forecast_dict, truth_dict]):
                msg = ("If the historic_filepath and truth_filepath arguments "
                       "are specified then none of the the combined_filepath, "
                       "historic_forecast_identifier and truth_identifier "
                       "arguments should be specified.")
                raise ValueError(msg)
        else:
            msg = ("Both the historic_filepath and truth_filepath arguments "
                   "should be specified if one of these arguments are "
                   "specified.")
            raise ValueError(msg)

    # This if block follows the logic:
    # 1. Check whether any of the combined, historic forecast dictionary or
    #    truth dictionary inputs have been provided.
    # 2. If not all of these inputs have been provided then raise an error,
    #    as all of these inputs are required to separate the combined input
    #    into the historic forecasts and truths.
    if any([combined, historic_forecast_dict, truth_dict]):
        if not all([combined, historic_forecast_dict, truth_dict]):
            msg = ("All of the combined_filepath, "
                   "historic_forecast_identifier and truth_identifier "
                   "arguments should be specified if one of the arguments are "
                   "specified.")
            raise ValueError(msg)

    try:
        if combined is not None:
            historic_forecast, truth = SplitHistoricForecastAndTruth(
                historic_forecast_dict, truth_dict).process(combined)
    except ValueError as err:
        # This error arises if the metadata to identify the desired historic
        # forecast or truth has found nothing matching the metadata
        # information supplied.
        if str(err).startswith("The metadata to identify the desired"):
            warnings.warn(str(err))
            result = None
        else:
            raise
    else:
        result = EstimateCoefficientsForEnsembleCalibration(
            distribution, cycletime, desired_units=units,
            predictor_of_mean_flag=predictor_of_mean,
            max_iterations=max_iterations).process(historic_forecast, truth)

    return result
예제 #25
0
def main(argv=None):
    """Load in arguments for estimating coefficients for Ensemble Model Output
       Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
       Regression (NGR). 2 sources of input data must be provided: historical
       forecasts and historical truth data (to use in calibration). The
       estimated coefficients are written to a netCDF file.
    """
    parser = ArgParser(
        description='Estimate coefficients for Ensemble Model Output '
                    'Statistics (EMOS), otherwise known as Non-homogeneous '
                    'Gaussian Regression (NGR)')
    parser.add_argument('distribution', metavar='DISTRIBUTION',
                        choices=['gaussian', 'truncated gaussian'],
                        help='The distribution that will be used for '
                             'calibration. This will be dependent upon the '
                             'input phenomenon. This has to be supported by '
                             'the minimisation functions in '
                             'ContinuousRankedProbabilityScoreMinimisers.')
    parser.add_argument('cycletime', metavar='CYCLETIME', type=str,
                        help='This denotes the cycle at which forecasts '
                             'will be calibrated using the calculated '
                             'EMOS coefficients. The validity time in the '
                             'output coefficients cube will be calculated '
                             'relative to this cycletime. '
                             'This cycletime is in the format '
                             'YYYYMMDDTHHMMZ.')
    # Filepaths for historic and truth data.
    parser.add_argument('historic_filepath', metavar='HISTORIC_FILEPATH',
                        help='A path to an input NetCDF file containing the '
                             'historic forecast(s) used for calibration.')
    parser.add_argument('truth_filepath', metavar='TRUTH_FILEPATH',
                        help='A path to an input NetCDF file containing the '
                             'historic truth analyses used for calibration.')
    parser.add_argument('output_filepath', metavar='OUTPUT_FILEPATH',
                        help='The output path for the processed NetCDF')
    # Optional arguments.
    parser.add_argument('--units', metavar='UNITS',
                        help='The units that calibration should be undertaken '
                             'in. The historical forecast and truth will be '
                             'converted as required.')
    parser.add_argument('--predictor_of_mean', metavar='PREDICTOR_OF_MEAN',
                        choices=['mean', 'realizations'], default='mean',
                        help='String to specify the predictor used to '
                             'calibrate the forecast mean. Currently the '
                             'ensemble mean ("mean") and the ensemble '
                             'realizations ("realizations") are supported as '
                             'options. Default: "mean".')
    parser.add_argument('--max_iterations', metavar='MAX_ITERATIONS',
                        type=np.int32, default=1000,
                        help='The maximum number of iterations allowed '
                             'until the minimisation has converged to a '
                             'stable solution. If the maximum number '
                             'of iterations is reached, but the '
                             'minimisation has not yet converged to a '
                             'stable solution, then the available solution '
                             'is used anyway, and a warning is raised.'
                             'This may be modified for testing purposes '
                             'but otherwise kept fixed. If the '
                             'predictor_of_mean is "realizations", '
                             'then the number of iterations may require '
                             'increasing, as there will be more coefficients '
                             'to solve for.')
    args = parser.parse_args(args=argv)

    historic_forecast = load_cube(args.historic_filepath)
    truth = load_cube(args.truth_filepath)

    # Estimate coefficients using Ensemble Model Output Statistics (EMOS).
    estcoeffs = EstimateCoefficientsForEnsembleCalibration(
        args.distribution, args.cycletime, desired_units=args.units,
        predictor_of_mean_flag=args.predictor_of_mean,
        max_iterations=args.max_iterations)
    coefficients = (
        estcoeffs.process(historic_forecast, truth))

    save_netcdf(coefficients, args.output_filepath)