def test_check_data(self):
        """
        Test that the plugin returns an Iris.cube.Cube matching the expected
        data values when a cube containing mean and variance is passed in.
        The resulting data values are the percentiles, which have been
        generated.
        """
        data = np.array([[[[225.568115, 236.818115, 248.068115],
                           [259.318115, 270.568115, 281.818115],
                           [293.068115, 304.318115, 315.568115]]],
                         [[[229.483322, 240.733322, 251.983322],
                           [263.233307, 274.483307, 285.733307],
                           [296.983307, 308.233307, 319.483307]]],
                         [[[233.398529, 244.648529, 255.898529],
                           [267.148499, 278.398499, 289.648499],
                           [300.898499, 312.148499, 323.398499]]]],
                        dtype=np.float32)

        cube = self.current_temperature_forecast_cube
        current_forecast_predictor = cube.collapsed("realization",
                                                    iris.analysis.MEAN)
        current_forecast_variance = cube.collapsed("realization",
                                                   iris.analysis.VARIANCE)
        percentiles = [10, 50, 90]
        plugin = Plugin()
        result = plugin._mean_and_variance_to_percentiles(
            current_forecast_predictor, current_forecast_variance, cube,
            percentiles)
        self.assertIsInstance(result, Cube)
        self.assertArrayAlmostEqual(result.data, data)
    def test_number_of_percentiles(self):
        """
        Test that the plugin returns a cube with the expected number of
        percentiles.
        """
        cube = self.current_temperature_forecast_cube
        current_forecast_predictor = cube.collapsed("realization",
                                                    iris.analysis.MEAN)
        current_forecast_variance = cube.collapsed("realization",
                                                   iris.analysis.VARIANCE)
        raw_forecast = cube.copy()

        no_of_percentiles = len(raw_forecast.coord("realization").points)
        expected = np.array([[[[227.42273, 238.67273, 249.92273],
                               [261.1727, 272.4227, 283.6727],
                               [294.9227, 306.1727, 317.4227]]],
                             [[[229.48332, 240.73332, 251.98332],
                               [263.2333, 274.4833, 285.7333],
                               [296.9833, 308.2333, 319.4833]]],
                             [[[231.54391, 242.79391, 254.04391],
                               [265.2939, 276.5439, 287.7939],
                               [299.0439, 310.2939, 321.5439]]]])

        plugin = Plugin()
        result = plugin.process(current_forecast_predictor,
                                current_forecast_variance,
                                cube,
                                no_of_percentiles=no_of_percentiles)

        self.assertEqual(len(raw_forecast.coord("realization").points),
                         len(result.coord("percentile").points))
        self.assertArrayAlmostEqual(expected, result.data, decimal=4)
    def test_list_of_percentiles(self):
        """
        Test that the plugin returns a cube with the expected percentiles
        when a specific list of percentiles is provided.
        """
        cube = self.current_temperature_forecast_cube
        current_forecast_predictor = cube.collapsed("realization",
                                                    iris.analysis.MEAN)
        current_forecast_variance = cube.collapsed("realization",
                                                   iris.analysis.VARIANCE)

        percentiles = [10, 50, 90]
        expected = np.array([[[[225.56812, 236.81812, 248.06812],
                               [259.3181, 270.5681, 281.8181],
                               [293.0681, 304.3181, 315.5681]]],
                             [[[229.48332, 240.73332, 251.98332],
                               [263.2333, 274.4833, 285.7333],
                               [296.9833, 308.2333, 319.4833]]],
                             [[[233.39853, 244.64853, 255.89853],
                               [267.1485, 278.3985, 289.6485],
                               [300.8985, 312.1485, 323.3985]]]])

        plugin = Plugin()
        result = plugin.process(current_forecast_predictor,
                                current_forecast_variance,
                                cube,
                                percentiles=percentiles)

        self.assertEqual(len(percentiles),
                         len(result.coord("percentile").points))
        self.assertArrayAlmostEqual(percentiles,
                                    result.coord("percentile").points)
        self.assertArrayAlmostEqual(expected, result.data, decimal=4)
    def test_if_nearly_identical_data(self):
        """
        Test that the plugin returns the expected values, if every
        percentile has an identical value. This causes an issue because
        the default for the underlying scipy function is to yield a NaN for
        tied values. For this application, any NaN values are overwritten with
        the predicted mean value for all probability thresholds.
        """
        data = np.array([[[[1., 1., 1.], [4., 2., 2.], [3., 3., 3.]]],
                         [[[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]]],
                         [[[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]]]])

        result_data = np.array([[[[1., 1., 1.], [1.18685838, 2., 2.],
                                  [3., 3., 3.]]],
                                [[[1., 1., 1.], [2.66666667, 2., 2.],
                                  [3., 3., 3.]]],
                                [[[1., 1., 1.], [4.14647495, 2., 2.],
                                  [3., 3., 3.]]]])

        cube = self.current_temperature_forecast_cube
        cube.data = data
        current_forecast_predictor = cube.collapsed("realization",
                                                    iris.analysis.MEAN)
        current_forecast_variance = cube.collapsed("realization",
                                                   iris.analysis.VARIANCE)
        percentiles = [10, 50, 90]
        plugin = Plugin()
        result = plugin._mean_and_variance_to_percentiles(
            current_forecast_predictor, current_forecast_variance, cube,
            percentiles)
        self.assertArrayAlmostEqual(result.data, result_data)
    def test_simple_data_truncnorm_distribution(self):
        """
        Test that the plugin returns an iris.cube.Cube matching the expected
        data values when a cube containing mean and variance is passed in.
        The resulting data values are the percentiles, which have been
        generated using a truncated normal distribution.
        """
        data = np.array([[[[1, 1, 1], [1, 1, 1], [1, 1, 1]]],
                         [[[2, 2, 2], [2, 2, 2], [2, 2, 2]]],
                         [[[3, 3, 3], [3, 3, 3], [3, 3, 3]]]])

        result_data = np.array([[[[0.827385, 0.827385, 0.827385],
                                  [0.827385, 0.827385, 0.827385],
                                  [0.827385, 0.827385, 0.827385]]],
                                [[[2.028517, 2.028517, 2.028517],
                                  [2.028517, 2.028517, 2.028517],
                                  [2.028517, 2.028517, 2.028517]]],
                                [[[3.2946239, 3.2946239, 3.2946239],
                                  [3.2946239, 3.2946239, 3.2946239],
                                  [3.2946239, 3.2946239, 3.2946239]]]])

        cube = self.current_temperature_forecast_cube
        cube.data = data
        current_forecast_predictor = cube.collapsed("realization",
                                                    iris.analysis.MEAN)
        current_forecast_variance = cube.collapsed("realization",
                                                   iris.analysis.VARIANCE)
        percentiles = [10, 50, 90]
        plugin = Plugin(distribution="truncnorm", shape_parameters=[0, np.inf])
        result = plugin._mean_and_variance_to_percentiles(
            current_forecast_predictor, current_forecast_variance, cube,
            percentiles)
        self.assertIsInstance(result, Cube)
        self.assertArrayAlmostEqual(result.data, result_data)
    def test_simple_data(self):
        """
        Test that the plugin returns the expected values for the generated
        percentiles when an idealised set of data values between 1 and 3
        is used to create the mean and the variance.
        """
        data = np.array([[[[1, 1, 1], [1, 1, 1], [1, 1, 1]]],
                         [[[2, 2, 2], [2, 2, 2], [2, 2, 2]]],
                         [[[3, 3, 3], [3, 3, 3], [3, 3, 3]]]])

        result_data = np.array([[[[0.71844843, 0.71844843, 0.71844843],
                                  [0.71844843, 0.71844843, 0.71844843],
                                  [0.71844843, 0.71844843, 0.71844843]]],
                                [[[2., 2., 2.], [2., 2., 2.], [2., 2., 2.]]],
                                [[[3.28155157, 3.28155157, 3.28155157],
                                  [3.28155157, 3.28155157, 3.28155157],
                                  [3.28155157, 3.28155157, 3.28155157]]]])

        cube = self.current_temperature_forecast_cube
        cube.data = data
        current_forecast_predictor = cube.collapsed("realization",
                                                    iris.analysis.MEAN)
        current_forecast_variance = cube.collapsed("realization",
                                                   iris.analysis.VARIANCE)
        percentiles = [10, 50, 90]
        plugin = Plugin()
        result = plugin._mean_and_variance_to_percentiles(
            current_forecast_predictor, current_forecast_variance, cube,
            percentiles)
        self.assertArrayAlmostEqual(result.data, result_data)
 def test_many_percentiles(self):
     """
     Test that the plugin returns an iris.cube.Cube if many percentiles
     are requested.
     """
     cube = self.current_temperature_forecast_cube
     current_forecast_predictor = cube.collapsed(
         "realization", iris.analysis.MEAN)
     current_forecast_variance = cube.collapsed(
         "realization", iris.analysis.VARIANCE)
     percentiles = np.linspace(1, 99, num=1000, endpoint=True)
     plugin = Plugin()
     result = plugin._mean_and_variance_to_percentiles(
         current_forecast_predictor, current_forecast_variance, percentiles)
     self.assertIsInstance(result, Cube)
    def test_basic(self):
        """Test that the plugin returns an Iris.cube.Cube."""
        cube = self.current_temperature_forecast_cube
        current_forecast_predictor = cube.collapsed(
            "realization", iris.analysis.MEAN)
        current_forecast_variance = cube.collapsed(
            "realization", iris.analysis.VARIANCE)
        raw_forecast = cube.copy()

        predictor_and_variance = CubeList(
            [current_forecast_predictor, current_forecast_variance])
        no_of_percentiles = len(raw_forecast.coord("realization").points)

        plugin = Plugin()
        result = plugin.process(predictor_and_variance, no_of_percentiles)
        self.assertIsInstance(result, Cube)
 def test_negative_percentiles(self):
     """
     Test that the plugin returns the expected values for the
     percentiles if negative probabilities are requested.
     """
     cube = self.current_temperature_forecast_cube
     current_forecast_predictor = cube.collapsed("realization",
                                                 iris.analysis.MEAN)
     current_forecast_variance = cube.collapsed("realization",
                                                iris.analysis.VARIANCE)
     percentiles = [-10, 10]
     plugin = Plugin()
     msg = "NaNs are present within the result for the"
     with self.assertRaisesRegex(ValueError, msg):
         plugin._mean_and_variance_to_percentiles(
             current_forecast_predictor, current_forecast_variance, cube,
             percentiles)
    def test_number_of_percentiles(self):
        """
        Test that the plugin returns a cube with the expected number of
        percentiles.
        """
        cube = self.current_temperature_forecast_cube
        current_forecast_predictor = cube.collapsed(
            "realization", iris.analysis.MEAN)
        current_forecast_variance = cube.collapsed(
            "realization", iris.analysis.VARIANCE)
        raw_forecast = cube.copy()

        predictor_and_variance = CubeList(
            [current_forecast_predictor, current_forecast_variance])

        no_of_percentiles = len(raw_forecast.coord("realization").points)

        plugin = Plugin()
        result = plugin.process(predictor_and_variance, no_of_percentiles)
        self.assertEqual(
            len(raw_forecast.coord("realization").points),
            len(result.coord("percentile_over_realization").points))
예제 #11
0
    def test_multiple_keyword_arguments_error(self):
        """
        Test that the plugin raises an error when both the no_of_percentiles
        keyword argument and the percentiles keyword argument are provided.
        """
        cube = self.current_temperature_forecast_cube
        current_forecast_predictor = cube.collapsed("realization",
                                                    iris.analysis.MEAN)
        current_forecast_variance = cube.collapsed("realization",
                                                   iris.analysis.VARIANCE)
        raw_forecast = cube.copy()

        no_of_percentiles = len(raw_forecast.coord("realization").points)
        percentiles = [10, 25, 50, 75, 90]

        plugin = Plugin()
        msg = "Please specify either the number of percentiles or"
        with self.assertRaisesRegex(ValueError, msg):
            plugin.process(current_forecast_predictor,
                           current_forecast_variance,
                           no_of_percentiles=no_of_percentiles,
                           percentiles=percentiles)
def main(argv=None):
    """Load in arguments for applying coefficients for Ensemble Model Output
       Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
       Regression (NGR). The coefficients are applied to the forecast
       that is supplied, so as to calibrate the forecast. The calibrated
       forecast is written to a netCDF file.
    """
    parser = ArgParser(
        description='Apply coefficients for Ensemble Model Output '
        'Statistics (EMOS), otherwise known as Non-homogeneous '
        'Gaussian Regression (NGR). The supported input formats '
        'are realizations, probabilities and percentiles. '
        'The forecast will be converted to realizations before '
        'applying the coefficients and then converted back to '
        'match the input format.')
    # Filepaths for the forecast, EMOS coefficients and the output.
    parser.add_argument(
        'forecast_filepath',
        metavar='FORECAST_FILEPATH',
        help='A path to an input NetCDF file containing the forecast to be '
        'calibrated. The input format could be either realizations, '
        'probabilities or percentiles.')
    parser.add_argument('coefficients_filepath',
                        metavar='COEFFICIENTS_FILEPATH',
                        help='A path to an input NetCDF file containing the '
                        'coefficients used for calibration.')
    parser.add_argument('output_filepath',
                        metavar='OUTPUT_FILEPATH',
                        help='The output path for the processed NetCDF')
    # Optional arguments.
    parser.add_argument(
        '--num_realizations',
        metavar='NUMBER_OF_REALIZATIONS',
        default=None,
        type=np.int32,
        help='Optional argument to specify the number of '
        'ensemble realizations to produce. '
        'If the current forecast is input as probabilities or '
        'percentiles then this argument is used to create the requested '
        'number of realizations. In addition, this argument is used to '
        'construct the requested number of realizations from the mean '
        'and variance output after applying the EMOS coefficients.'
        'Default will be the number of realizations in the raw input '
        'file, if realizations are provided as input, otherwise if the '
        'input format is probabilities or percentiles, then an error '
        'will be raised if no value is provided.')
    parser.add_argument(
        '--random_ordering',
        default=False,
        action='store_true',
        help='Option to reorder the post-processed forecasts randomly. If not '
        'set, the ordering of the raw ensemble is used. This option is '
        'only valid when the input format is realizations.')
    parser.add_argument(
        '--random_seed',
        metavar='RANDOM_SEED',
        default=None,
        help='Option to specify a value for the random seed for testing '
        'purposes, otherwise, the default random seed behaviour is '
        'utilised. The random seed is used in the generation of the '
        'random numbers used for either the random_ordering option to '
        'order the input percentiles randomly, rather than use the '
        'ordering from the raw ensemble, or for splitting tied values '
        'within the raw ensemble, so that the values from the input '
        'percentiles can be ordered to match the raw ensemble.')
    parser.add_argument(
        '--ecc_bounds_warning',
        default=False,
        action='store_true',
        help='If True, where the percentiles exceed the ECC bounds range, '
        'raise a warning rather than an exception. This occurs when the '
        'current forecast is in the form of probabilities and is '
        'converted to percentiles, as part of converting the input '
        'probabilities into realizations.')
    parser.add_argument(
        '--predictor_of_mean',
        metavar='PREDICTOR_OF_MEAN',
        choices=['mean', 'realizations'],
        default='mean',
        help='String to specify the predictor used to calibrate the forecast '
        'mean. Currently the ensemble mean ("mean") and the ensemble '
        'realizations ("realizations") are supported as options. '
        'Default: "mean".')

    args = parser.parse_args(args=argv)

    current_forecast = load_cube(args.forecast_filepath)
    coeffs = load_cube(args.coefficients_filepath)

    original_current_forecast = current_forecast.copy()

    msg = ("The current forecast has been provided as {0}. "
           "These {0} need to be converted to realizations "
           "for ensemble calibration. The args.num_realizations "
           "argument is used to define the number of realizations "
           "to construct from the input {0}, so if the "
           "current forecast is provided as {0} then "
           "args.num_realizations must be defined.")

    try:
        find_percentile_coordinate(current_forecast)
        input_forecast_type = "percentiles"
    except CoordinateNotFoundError:
        input_forecast_type = "realizations"

    if current_forecast.name().startswith("probability_of"):
        input_forecast_type = "probabilities"
        # If probabilities, convert to percentiles.
        conversion_plugin = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=args.ecc_bounds_warning)
    elif input_forecast_type == "percentiles":
        # If percentiles, resample percentiles so that the percentiles are
        # evenly spaced.
        conversion_plugin = ResamplePercentiles(
            ecc_bounds_warning=args.ecc_bounds_warning)

    # If percentiles, resample percentiles and then rebadge.
    # If probabilities, generate percentiles and then rebadge.
    if input_forecast_type in ["percentiles", "probabilities"]:
        if not args.num_realizations:
            raise ValueError(msg.format(input_forecast_type))
        current_forecast = conversion_plugin.process(
            current_forecast, no_of_percentiles=args.num_realizations)
        current_forecast = (
            RebadgePercentilesAsRealizations().process(current_forecast))

    # Default number of ensemble realizations is the number in
    # the raw forecast.
    if not args.num_realizations:
        args.num_realizations = len(
            current_forecast.coord('realization').points)

    # Apply coefficients as part of Ensemble Model Output Statistics (EMOS).
    ac = ApplyCoefficientsFromEnsembleCalibration(
        current_forecast,
        coeffs,
        predictor_of_mean_flag=args.predictor_of_mean)
    calibrated_predictor, calibrated_variance = ac.process()

    # If input forecast is probabilities, convert output into probabilities.
    # If input forecast is percentiles, convert output into percentiles.
    # If input forecast is realizations, convert output into realizations.
    if input_forecast_type == "probabilities":
        result = GenerateProbabilitiesFromMeanAndVariance().process(
            calibrated_predictor, calibrated_variance,
            original_current_forecast)
    elif input_forecast_type == "percentiles":
        perc_coord = find_percentile_coordinate(original_current_forecast)
        result = GeneratePercentilesFromMeanAndVariance().process(
            calibrated_predictor,
            calibrated_variance,
            percentiles=perc_coord.points)
    elif input_forecast_type == "realizations":
        # Ensemble Copula Coupling to generate realizations
        # from mean and variance.
        percentiles = GeneratePercentilesFromMeanAndVariance().process(
            calibrated_predictor,
            calibrated_variance,
            no_of_percentiles=args.num_realizations)
        result = EnsembleReordering().process(
            percentiles,
            current_forecast,
            random_ordering=args.random_ordering,
            random_seed=args.random_seed)
    save_netcdf(result, args.output_filepath)
예제 #13
0
def process(current_forecast, coeffs, landsea_mask, num_realizations=None,
            random_ordering=False, random_seed=None,
            ecc_bounds_warning=False, predictor_of_mean='mean'):
    """Applying coefficients for Ensemble Model Output Statistics.

    Load in arguments for applying coefficients for Ensemble Model Output
    Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
    Regression (NGR). The coefficients are applied to the forecast
    that is supplied, so as to calibrate the forecast. The calibrated
    forecast is written to a cube. If no coefficients are provided the input
    forecast is returned unchanged.

    Args:
        current_forecast (iris.cube.Cube):
            A Cube containing the forecast to be calibrated. The input format
            could be either realizations, probabilities or percentiles.
        coeffs (iris.cube.Cube or None):
            A cube containing the coefficients used for calibration or None.
            If none then then current_forecast is returned unchanged.
        landsea_mask (iris.cube.Cube or None):
            A cube containing the land-sea mask on the same domain as the
            forecast that is to be calibrated. Land points are "
            "specified by ones and sea points are specified by zeros. "
            "If not None this argument will enable land-only calibration, in "
            "which sea points are returned without the application of "
            "calibration."
        num_realizations (numpy.int32):
            Optional argument to specify the number of ensemble realizations
            to produce. If the current forecast is input as probabilities or
            percentiles then this argument is used to create the requested
            number of realizations. In addition, this argument is used to
            construct the requested number of realizations from the mean and
            variance output after applying the EMOS coefficients.
            Default is None.
        random_ordering (bool):
            Option to reorder the post-processed forecasts randomly. If not
            set, the ordering of the raw ensemble is used. This option is
            only valid when the input format is realizations.
            Default is False.
        random_seed (int):
            Option to specify a value for the random seed for testing
            purposes, otherwise the default random seen behaviour is utilised.
            The random seed is used in the generation of the random numbers
            used for either the random_ordering option to order the input
            percentiles randomly, rather than use the ordering from the raw
            ensemble, or for splitting tied values within the raw ensemble,
            so that the values from the input percentiles can be ordered to
            match the raw ensemble.
            Default is None.
        ecc_bounds_warning (bool):
            If True, where the percentiles exceed the ECC bounds range,
            raises a warning rather than an exception. This occurs when the
            current forecasts is in the form of probabilities and is
            converted to percentiles, as part of converting the input
            probabilities into realizations.
            Default is False.
        predictor_of_mean (str):
            String to specify the predictor used to calibrate the forecast
            mean. Currently the ensemble mean "mean" as the ensemble
            realization "realization" are supported as options.
            Default is 'mean'

    Returns:
        iris.cube.Cube:
            The calibrated forecast cube.

    Raises:
        ValueError:
            If the current forecast is a coefficients cube.
        ValueError:
            If the coefficients cube does not have the right name of
            "emos_coefficients".
        ValueError:
            If the forecast type is 'percentiles' or 'probabilities' while no
            num_realizations are given.

    """
    if coeffs is None:
        msg = ("There are no coefficients provided for calibration. The "
               "uncalibrated forecast will be returned.")
        warnings.warn(msg)
        return current_forecast

    elif coeffs.name() != 'emos_coefficients':
        msg = ("The current coefficients cube does not have the "
               "name 'emos_coefficients'")
        raise ValueError(msg)

    if current_forecast.name() == 'emos_coefficients':
        msg = "The current forecast cube has the name 'emos_coefficients'"
        raise ValueError(msg)

    original_current_forecast = current_forecast.copy()
    try:
        find_percentile_coordinate(current_forecast)
        input_forecast_type = "percentiles"
    except CoordinateNotFoundError:
        input_forecast_type = "realizations"

    if current_forecast.name().startswith("probability_of"):
        input_forecast_type = "probabilities"
        # If probabilities, convert to percentiles.
        conversion_plugin = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=ecc_bounds_warning)
    elif input_forecast_type == "percentiles":
        # If percentiles, resample percentiles so that the percentiles are
        # evenly spaced.
        conversion_plugin = ResamplePercentiles(
            ecc_bounds_warning=ecc_bounds_warning)

    # If percentiles, re-sample percentiles and then re-badge.
    # If probabilities, generate percentiles and then re-badge.
    if input_forecast_type in ["percentiles", "probabilities"]:
        if not num_realizations:
            raise ValueError(
                "The current forecast has been provided as {0}. "
                "These {0} need to be converted to realizations "
                "for ensemble calibration. The num_realizations "
                "argument is used to define the number of realizations "
                "to construct from the input {0}, so if the "
                "current forecast is provided as {0} then "
                "num_realizations must be defined.".format(
                    input_forecast_type))
        current_forecast = conversion_plugin.process(
            current_forecast, no_of_percentiles=num_realizations)
        current_forecast = (
            RebadgePercentilesAsRealizations().process(current_forecast))

    # Default number of ensemble realizations is the number in
    # the raw forecast.
    if not num_realizations:
        num_realizations = len(
            current_forecast.coord('realization').points)

    # Apply coefficients as part of Ensemble Model Output Statistics (EMOS).
    ac = ApplyCoefficientsFromEnsembleCalibration(
        predictor_of_mean_flag=predictor_of_mean)
    calibrated_predictor, calibrated_variance = ac.process(
        current_forecast, coeffs, landsea_mask=landsea_mask)

    # If input forecast is probabilities, convert output into probabilities.
    # If input forecast is percentiles, convert output into percentiles.
    # If input forecast is realizations, convert output into realizations.
    if input_forecast_type == "probabilities":
        result = GenerateProbabilitiesFromMeanAndVariance().process(
            calibrated_predictor, calibrated_variance,
            original_current_forecast)
    elif input_forecast_type == "percentiles":
        perc_coord = find_percentile_coordinate(original_current_forecast)
        result = GeneratePercentilesFromMeanAndVariance().process(
            calibrated_predictor, calibrated_variance,
            percentiles=perc_coord.points)
    elif input_forecast_type == "realizations":
        # Ensemble Copula Coupling to generate realizations
        # from mean and variance.
        percentiles = GeneratePercentilesFromMeanAndVariance().process(
            calibrated_predictor, calibrated_variance,
            no_of_percentiles=num_realizations)
        result = EnsembleReordering().process(
            percentiles, current_forecast,
            random_ordering=random_ordering, random_seed=random_seed)
    return result
def main(argv=None):
    """Do ensemble calibration using the EnsembleCalibration plugin.
    """
    parser = ArgParser(
        description='Apply the requested ensemble calibration method using '
        'the current forecast (to be calibrated) in the form of '
        'realizations, probabilities, or percentiles, historical '
        'forecasts in the form of realizations and historical truth data '
        '(to use in calibration). The mean and variance output from the '
        'EnsembleCalibration plugin can be written to an output file '
        'if required. If the current forecast is supplied in the form of '
        'probabilities or percentiles, these are converted to realizations '
        'prior to calibration. After calibration, the mean and variance '
        'computed in the calibration are converted to match the format of the '
        'current forecast i.e. if realizations are input, realizations '
        'are output, if probabilities are input, probabilities are output, '
        'and if percentiles are input, percentiles are output.'
        'If realizations are input, realizations are regenerated using '
        'Ensemble Copula Coupling.')
    # Arguments for EnsembleCalibration
    parser.add_argument(
        'units',
        metavar='UNITS_TO_CALIBRATE_IN',
        help='The unit that calibration should be undertaken in. The current '
        'forecast, historical forecast and truth will be converted as '
        'required.')
    parser.add_argument(
        'distribution',
        metavar='DISTRIBUTION',
        choices=['gaussian', 'truncated gaussian'],
        help='The distribution that will be used for calibration. This will '
        'be dependent upon the input phenomenon. This has to be '
        'supported by the minimisation functions in '
        'ContinuousRankedProbabilityScoreMinimisers.')
    # Filepaths for current, historic and truth data.
    parser.add_argument(
        'input_filepath',
        metavar='INPUT_FILE',
        help='A path to an input NetCDF file containing the current forecast '
        'to be processed. The file provided could be in the form of '
        'realizations, probabilities or percentiles.')
    parser.add_argument(
        'historic_filepath',
        metavar='HISTORIC_DATA_FILE',
        help='A path to an input NetCDF file containing the historic '
        'forecast(s) used for calibration. The file provided must be in '
        'the form of realizations.')
    parser.add_argument(
        'truth_filepath',
        metavar='TRUTH_DATA_FILE',
        help='A path to an input NetCDF file containing the historic truth '
        'analyses used for calibration.')
    parser.add_argument('output_filepath',
                        metavar='OUTPUT_FILE',
                        help='The output path for the processed NetCDF')
    # Optional arguments.
    parser.add_argument(
        '--predictor_of_mean',
        metavar='CALIBRATE_MEAN_FLAG',
        choices=['mean', 'realizations'],
        default='mean',
        help='String to specify the input to calculate the calibrated mean. '
        'Currently the ensemble mean ("mean") and the ensemble '
        'realizations ("realizations") are supported as the predictors. '
        'Default: "mean".')
    parser.add_argument(
        '--save_mean',
        metavar='MEAN_FILE',
        default=False,
        help='Option to save the mean output from EnsembleCalibration plugin. '
        'If used, a path to save the output to must be provided.')
    parser.add_argument(
        '--save_variance',
        metavar='VARIANCE_FILE',
        default=False,
        help='Option to save the variance output from EnsembleCalibration '
        'plugin. If used, a path to save the output to must be provided.')
    parser.add_argument(
        '--num_realizations',
        metavar='NUMBER_OF_REALIZATIONS',
        default=None,
        type=np.int32,
        help='Optional argument to specify the number of '
        'ensemble realizations to produce. '
        'If the current forecast is input as probabilities or '
        'percentiles then this argument is used to create the requested '
        'number of realizations. In addition, this argument is used to '
        'construct the requested number of realizations from the mean '
        'and variance output after applying the EMOS coefficients.'
        'Default will be the number of realizations in the raw input '
        'file, if realizations are provided as input, otherwise if the '
        'input format is probabilities or percentiles, then an error '
        'will be raised if no value is provided.')
    parser.add_argument(
        '--random_ordering',
        default=False,
        action='store_true',
        help='Option to reorder the post-processed forecasts randomly. If not '
        'set, the ordering of the raw ensemble is used. This option is '
        'only valid when the input format is realizations.')
    parser.add_argument(
        '--random_seed',
        metavar='RANDOM_SEED',
        default=None,
        help='Option to specify a value for the random seed for testing '
        'purposes, otherwise, the default random seed behaviour is '
        'utilised. The random seed is used in the generation of the '
        'random numbers used for either the random_ordering option to '
        'order the input percentiles randomly, rather than use the '
        'ordering from the raw ensemble, or for splitting tied values '
        'within the raw ensemble, so that the values from the input '
        'percentiles can be ordered to match the raw ensemble.')
    parser.add_argument(
        '--ecc_bounds_warning',
        default=False,
        action='store_true',
        help='If True, where the percentiles exceed the ECC bounds range, '
        'raise a warning rather than an exception. This occurs when the '
        'current forecast is in the form of probabilities and is '
        'converted to percentiles, as part of converting the input '
        'probabilities into realizations.')
    parser.add_argument(
        '--max_iterations',
        metavar='MAX_ITERATIONS',
        type=np.int32,
        default=1000,
        help='The maximum number of iterations allowed until the minimisation '
        'has converged to a stable solution. If the maximum number of '
        'iterations is reached, but the minimisation has not yet '
        'converged to a stable solution, then the available solution is '
        'used anyway, and a warning is raised. This may be modified for '
        'testing purposes but otherwise kept fixed. If the '
        'predictor_of_mean is "realizations", then the number of '
        'iterations may require increasing, as there will be more '
        'coefficients to solve for.')
    args = parser.parse_args(args=argv)

    current_forecast = load_cube(args.input_filepath)
    historic_forecast = load_cube(args.historic_filepath)
    truth = load_cube(args.truth_filepath)

    original_current_forecast = current_forecast.copy()

    msg = ("The current forecast has been provided as {0}. "
           "These {0} need to be converted to realizations "
           "for ensemble calibration. The args.num_realizations "
           "argument is used to define the number of realizations "
           "to construct from the input {0}, so if the "
           "current forecast is provided as {0} then "
           "args.num_realizations must be defined.")

    try:
        find_percentile_coordinate(current_forecast)
        input_forecast_type = "percentiles"
    except CoordinateNotFoundError:
        input_forecast_type = "realizations"

    if current_forecast.name().startswith("probability_of"):
        input_forecast_type = "probabilities"
        # If probabilities, convert to percentiles.
        conversion_plugin = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=args.ecc_bounds_warning)
    elif input_forecast_type == "percentiles":
        # If percentiles, resample percentiles so that the percentiles are
        # evenly spaced.
        conversion_plugin = ResamplePercentiles(
            ecc_bounds_warning=args.ecc_bounds_warning)

    # If percentiles, resample percentiles and then rebadge.
    # If probabilities, generate percentiles and then rebadge.
    if input_forecast_type in ["percentiles", "probabilities"]:
        if not args.num_realizations:
            raise ValueError(msg.format(input_forecast_type))
        current_forecast = conversion_plugin.process(
            current_forecast, no_of_percentiles=args.num_realizations)
        current_forecast = (
            RebadgePercentilesAsRealizations().process(current_forecast))

    # Default number of ensemble realizations is the number in
    # the raw forecast.
    if not args.num_realizations:
        args.num_realizations = len(
            current_forecast.coord('realization').points)

    # Ensemble-Calibration to calculate the mean and variance.
    forecast_predictor, forecast_variance = EnsembleCalibration(
        args.distribution,
        args.units,
        predictor_of_mean_flag=args.predictor_of_mean,
        max_iterations=args.max_iterations).process(current_forecast,
                                                    historic_forecast, truth)

    # If required, save the mean and variance.
    if args.save_mean:
        save_netcdf(forecast_predictor, args.save_mean)
    if args.save_variance:
        save_netcdf(forecast_variance, args.save_variance)

    # If input forecast is probabilities, convert output into probabilities.
    # If input forecast is percentiles, convert output into percentiles.
    # If input forecast is realizations, convert output into realizations.
    if input_forecast_type == "probabilities":
        result = GenerateProbabilitiesFromMeanAndVariance().process(
            forecast_predictor, forecast_variance, original_current_forecast)
    elif input_forecast_type == "percentiles":
        perc_coord = find_percentile_coordinate(original_current_forecast)
        result = GeneratePercentilesFromMeanAndVariance().process(
            forecast_predictor,
            forecast_variance,
            percentiles=perc_coord.points)
    elif input_forecast_type == "realizations":
        # Ensemble Copula Coupling to generate realizations
        # from mean and variance.
        percentiles = GeneratePercentilesFromMeanAndVariance().process(
            forecast_predictor,
            forecast_variance,
            no_of_percentiles=args.num_realizations)
        result = EnsembleReordering().process(
            percentiles,
            current_forecast,
            random_ordering=args.random_ordering,
            random_seed=args.random_seed)
    save_netcdf(result, args.output_filepath)
예제 #15
0
def process(cube: cli.inputcube,
            coefficients: cli.inputcube = None,
            land_sea_mask: cli.inputcube = None,
            *,
            distribution,
            realizations_count: int = None,
            randomise=False,
            random_seed: int = None,
            ignore_ecc_bounds=False,
            predictor_of_mean='mean',
            shape_parameters: cli.comma_separated_list = None):
    """Applying coefficients for Ensemble Model Output Statistics.

    Load in arguments for applying coefficients for Ensemble Model Output
    Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
    Regression (NGR). The coefficients are applied to the forecast
    that is supplied, so as to calibrate the forecast. The calibrated
    forecast is written to a cube. If no coefficients are provided the input
    forecast is returned unchanged.

    Args:
        cube (iris.cube.Cube):
            A Cube containing the forecast to be calibrated. The input format
            could be either realizations, probabilities or percentiles.
        coefficients (iris.cube.Cube):
            A cube containing the coefficients used for calibration or None.
            If none then then input is returned unchanged.
        land_sea_mask (iris.cube.Cube):
            A cube containing the land-sea mask on the same domain as the
            forecast that is to be calibrated. Land points are "
            "specified by ones and sea points are specified by zeros. "
            "If not None this argument will enable land-only calibration, in "
            "which sea points are returned without the application of "
            "calibration."
        distribution (str):
            The distribution for constructing realizations, percentiles or
            probabilities. This should typically match the distribution used
            for minimising the Continuous Ranked Probability Score when
            estimating the EMOS coefficients. The distributions available are
            those supported by :data:`scipy.stats`.
        realizations_count (int):
            Optional argument to specify the number of ensemble realizations
            to produce. If the current forecast is input as probabilities or
            percentiles then this argument is used to create the requested
            number of realizations. In addition, this argument is used to
            construct the requested number of realizations from the mean and
            variance output after applying the EMOS coefficients.
        randomise (bool):
            Option to reorder the post-processed forecasts randomly. If not
            set, the ordering of the raw ensemble is used. This option is
            only valid when the input format is realizations.
        random_seed (int):
            Option to specify a value for the random seed for testing
            purposes, otherwise the default random seen behaviour is utilised.
            The random seed is used in the generation of the random numbers
            used for either the randomise option to order the input
            percentiles randomly, rather than use the ordering from the raw
            ensemble, or for splitting tied values within the raw ensemble,
            so that the values from the input percentiles can be ordered to
            match the raw ensemble.
        ignore_ecc_bounds (bool):
            If True, where the percentiles exceed the ECC bounds range,
            raises a warning rather than an exception. This occurs when the
            current forecasts is in the form of probabilities and is
            converted to percentiles, as part of converting the input
            probabilities into realizations.
        predictor_of_mean (str):
            String to specify the predictor used to calibrate the forecast
            mean. Currently the ensemble mean "mean" as the ensemble
            realization "realization" are supported as options.
        shape_parameters ():
            The shape parameters required for defining the distribution
            specified by the distribution argument. The shape parameters
            should either be a number or 'inf' or '-inf' to represent
            infinity. Further details about appropriate shape parameters
            are available in scipy.stats. For the truncated normal
            distribution with a lower bound of zero, as available when
            estimating EMOS coefficients, the appropriate shape parameters
            are 0 and inf.

    Returns:
        iris.cube.Cube:
            The calibrated forecast cube.

    Raises:
        ValueError:
            If the current forecast is a coefficients cube.
        ValueError:
            If the coefficients cube does not have the right name of
            "emos_coefficients".
        ValueError:
            If the forecast type is 'percentiles' or 'probabilities' while no
            realizations_count are given.

    """
    import warnings

    import numpy as np
    from iris.exceptions import CoordinateNotFoundError

    from improver.ensemble_calibration.ensemble_calibration import (
        ApplyCoefficientsFromEnsembleCalibration)
    from improver.ensemble_copula_coupling.ensemble_copula_coupling import (
        EnsembleReordering, GeneratePercentilesFromMeanAndVariance,
        GeneratePercentilesFromProbabilities,
        GenerateProbabilitiesFromMeanAndVariance,
        RebadgePercentilesAsRealizations, ResamplePercentiles)
    from improver.metadata.probabilistic import find_percentile_coordinate

    current_forecast = cube

    if coefficients is None:
        msg = ("There are no coefficients provided for calibration. The "
               "uncalibrated forecast will be returned.")
        warnings.warn(msg)
        return current_forecast

    elif coefficients.name() != 'emos_coefficients':
        msg = ("The current coefficients cube does not have the "
               "name 'emos_coefficients'")
        raise ValueError(msg)

    if current_forecast.name() == 'emos_coefficients':
        msg = "The current forecast cube has the name 'emos_coefficients'"
        raise ValueError(msg)

    original_current_forecast = current_forecast.copy()
    try:
        find_percentile_coordinate(current_forecast)
        input_forecast_type = "percentiles"
    except CoordinateNotFoundError:
        input_forecast_type = "realizations"

    if current_forecast.name().startswith("probability_of"):
        input_forecast_type = "probabilities"
        # If probabilities, convert to percentiles.
        conversion_plugin = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=ignore_ecc_bounds)
    elif input_forecast_type == "percentiles":
        # If percentiles, resample percentiles so that the percentiles are
        # evenly spaced.
        conversion_plugin = ResamplePercentiles(
            ecc_bounds_warning=ignore_ecc_bounds)

    # If percentiles, re-sample percentiles and then re-badge.
    # If probabilities, generate percentiles and then re-badge.
    if input_forecast_type in ["percentiles", "probabilities"]:
        if not realizations_count:
            raise ValueError(
                "The current forecast has been provided as {0}. "
                "These {0} need to be converted to realizations "
                "for ensemble calibration. The realizations_count "
                "argument is used to define the number of realizations "
                "to construct from the input {0}, so if the "
                "current forecast is provided as {0} then "
                "realizations_count must be defined.".format(
                    input_forecast_type))
        current_forecast = conversion_plugin.process(
            current_forecast, no_of_percentiles=realizations_count)
        current_forecast = (
            RebadgePercentilesAsRealizations().process(current_forecast))

    # Default number of ensemble realizations is the number in
    # the raw forecast.
    if not realizations_count:
        realizations_count = len(current_forecast.coord('realization').points)

    # Apply coefficients as part of Ensemble Model Output Statistics (EMOS).
    ac = ApplyCoefficientsFromEnsembleCalibration(predictor=predictor_of_mean)
    calibrated_predictor, calibrated_variance = ac.process(
        current_forecast, coefficients, landsea_mask=land_sea_mask)

    if shape_parameters:
        shape_parameters = [np.float32(x) for x in shape_parameters]

    # If input forecast is probabilities, convert output into probabilities.
    # If input forecast is percentiles, convert output into percentiles.
    # If input forecast is realizations, convert output into realizations.
    if input_forecast_type == "probabilities":
        result = GenerateProbabilitiesFromMeanAndVariance(
            distribution=distribution,
            shape_parameters=shape_parameters).process(
                calibrated_predictor, calibrated_variance,
                original_current_forecast)
    elif input_forecast_type == "percentiles":
        perc_coord = find_percentile_coordinate(original_current_forecast)
        result = GeneratePercentilesFromMeanAndVariance(
            distribution=distribution,
            shape_parameters=shape_parameters).process(
                calibrated_predictor,
                calibrated_variance,
                original_current_forecast,
                percentiles=perc_coord.points)
    elif input_forecast_type == "realizations":
        # Ensemble Copula Coupling to generate realizations
        # from mean and variance.
        percentiles = GeneratePercentilesFromMeanAndVariance(
            distribution=distribution,
            shape_parameters=shape_parameters).process(
                calibrated_predictor,
                calibrated_variance,
                original_current_forecast,
                no_of_percentiles=realizations_count)
        result = EnsembleReordering().process(percentiles,
                                              current_forecast,
                                              random_ordering=randomise,
                                              random_seed=random_seed)
    return result