Esempio n. 1
0
    def test_2d_cube(self):
        """
        Test that the plugin returns the correct cube data for a
        2d input cube.
        """
        raw_data = np.array([[3], [2], [1]])

        calibrated_data = np.array([[1], [2], [3]])

        result_data = np.array([[3], [2], [1]])

        cube = self.cube[:, :, 0, 0].copy()
        raw_cube = cube.copy()
        raw_cube.data = raw_data
        calibrated_cube = cube.copy()
        calibrated_cube.data = calibrated_data

        plugin = Plugin()
        result = plugin.rank_ecc(calibrated_cube, raw_cube)
        self.assertArrayAlmostEqual(result.data, result_data)
Esempio n. 2
0
 def test_realization_for_greater_than(self):
     """
     Test to check the behaviour whether the number of percentiles is
     greater than the number of realizations. For when the length of the
     percentiles is greater than the length of the realizations,
     check that the points of the realization coordinate is as expected.
     """
     data = [12, 13, 14]
     post_processed_forecast_percentiles = self.percentile_cube
     raw_forecast_realizations = self.realization_cube
     raw_forecast_realizations = raw_forecast_realizations[:2, :, :, :]
     raw_forecast_realizations.coord("realization").points = [12, 13]
     plu = Plugin()
     result = plu._recycle_raw_ensemble_realizations(
         post_processed_forecast_percentiles,
         raw_forecast_realizations,
         self.perc_coord,
     )
     self.assertIsInstance(result, Cube)
     self.assertArrayAlmostEqual(data, result.coord("realization").points)
Esempio n. 3
0
 def test_realization_for_greater_than(self):
     """
     Test to check the behaviour when the number of percentiles is
     greater than the number of realizations.
     """
     expected_data = np.array([
         [[4.0, 4.625, 5.25], [5.875, 6.5, 7.125], [7.75, 8.375, 9.0]],
         [[6.0, 6.625, 7.25], [7.875, 8.5, 9.125], [9.75, 10.375, 11.0]],
         [[4.0, 4.625, 5.25], [5.875, 6.5, 7.125], [7.75, 8.375, 9.0]],
     ])
     raw_forecast_realizations = self.realization_cube[:2, :, :]
     raw_forecast_realizations.coord("realization").points = [12, 13]
     result = Plugin()._recycle_raw_ensemble_realizations(
         self.percentile_cube,
         raw_forecast_realizations,
         self.perc_coord,
     )
     self.assertIsInstance(result, Cube)
     self.assertArrayEqual(result.coord("realization").points, [12, 13, 14])
     self.assertArrayAlmostEqual(result.data, expected_data)
Esempio n. 4
0
 def test_realization_for_less_than_check_data(self):
     """
     Test to check the behaviour whether the number of percentiles is
     less than the number of members. For when the length of the
     percentiles is less than the length of the members, check that the
     points of the realization coordinate is as expected.
     """
     data = np.array([[[[4., 4.625, 5.25], [5.875, 6.5, 7.125],
                        [7.75, 8.375, 9.]],
                       [[6., 6.625, 7.25], [7.875, 8.5, 9.125],
                        [9.75, 10.375, 11.]]]])
     post_processed_forecast_percentiles = self.percentile_cube
     raw_forecast_members = self.realization_cube
     post_processed_forecast_percentiles = (
         post_processed_forecast_percentiles[:2, :, :, :])
     plu = Plugin()
     result = plu._recycle_raw_ensemble_members(
         post_processed_forecast_percentiles, raw_forecast_members,
         self.perc_coord)
     self.assertArrayAlmostEqual(data, result.data)
    def test_ordered_data(self):
        """
        Test that the plugin returns an Iris Cube where the cube data is an
        ordered numpy array for the calibrated data with the same ordering
        as the raw data.
        """
        raw_data = np.array([[[[1, 1, 1], [1, 1, 1], [1, 1, 1]]],
                             [[[2, 2, 2], [2, 2, 2], [2, 2, 2]]],
                             [[[3, 3, 3], [3, 3, 3], [3, 3, 3]]]])

        calibrated_data = raw_data

        raw_cube = self.cube.copy()
        raw_cube.data = raw_data
        calibrated_cube = self.cube.copy()
        calibrated_cube.data = calibrated_data

        plugin = Plugin()
        result = plugin.rank_ecc(calibrated_cube, raw_cube)
        self.assertArrayAlmostEqual(result.data, calibrated_data)
Esempio n. 6
0
    def test_basic(self):
        """Test that the plugin returns an iris.cube.Cube."""
        raw_data = np.array([[[[1, 1, 1], [1, 1, 1], [1, 1, 1]]],
                             [[[2, 2, 2], [2, 2, 2], [2, 2, 2]]],
                             [[[3, 3, 3], [3, 3, 3], [3, 3, 3]]]])
        calibrated_data = np.array([[[[0.71844843, 0.71844843, 0.71844843],
                                      [0.71844843, 0.71844843, 0.71844843],
                                      [0.71844843, 0.71844843, 0.71844843]]],
                                    [[[2., 2., 2.], [2., 2., 2.], [2., 2.,
                                                                   2.]]],
                                    [[[3.28155157, 3.28155157, 3.28155157],
                                      [3.28155157, 3.28155157, 3.28155157],
                                      [3.28155157, 3.28155157, 3.28155157]]]])

        raw_cube = self.cube.copy()
        raw_cube.data = raw_data
        calibrated_cube = self.cube.copy()
        calibrated_cube.data = calibrated_data

        plugin = Plugin()
        result = plugin.rank_ecc(calibrated_cube, raw_cube)
        self.assertIsInstance(result, Cube)
Esempio n. 7
0
 def test_basic_masked_input_data_not_nans(self):
     """
     Test that the plugin returns an iris.cube.Cube, the cube has a
     realization coordinate with specific realization numbers and is
     correctly re-ordered to match the source realizations, when the
     input data is masked and the masked data is not a nan.
     """
     # Assuming input data and raw ensemble are masked in the same way.
     self.raw_cube.data[:, 0, 0] = 1000
     self.raw_cube.data = np.ma.masked_equal(self.raw_cube.data, 1000)
     self.post_processed_percentiles.data[:, 0, 0] = 1000
     self.post_processed_percentiles.data = np.ma.masked_equal(
         self.post_processed_percentiles.data, 1000)
     expected_data = self.raw_cube.data.copy()
     result = Plugin().process(self.post_processed_percentiles,
                               self.raw_cube)
     self.assertIsInstance(result, Cube)
     self.assertTrue(result.coords("realization"))
     self.assertEqual(result.coord("realization"),
                      self.raw_cube.coord("realization"))
     self.assertArrayAlmostEqual(result.data, expected_data)
     self.assertArrayEqual(result.data.mask, expected_data.mask)
Esempio n. 8
0
    def test_realization_for_equal_check_data(self):
        """
        Test to check the behaviour whether the number of percentiles equals
        the number of realizations. For when the length of the percentiles
        equals the length of the realizations, check that the points of the
        realization coordinate is as expected.
        """
        data = np.array([
            [[[4.0, 4.625, 5.25], [5.875, 6.5, 7.125], [7.75, 8.375, 9.0]]],
            [[[6.0, 6.625, 7.25], [7.875, 8.5, 9.125], [9.75, 10.375, 11.0]]],
            [[[8.0, 8.625, 9.25], [9.875, 10.5, 11.125], [11.75, 12.375,
                                                          13.0]]],
        ])

        post_processed_forecast_percentiles = self.percentile_cube
        raw_forecast_realizations = self.realization_cube
        plu = Plugin()
        result = plu._recycle_raw_ensemble_realizations(
            post_processed_forecast_percentiles,
            raw_forecast_realizations,
            self.perc_coord,
        )
        self.assertArrayAlmostEqual(data, result.data)
Esempio n. 9
0
    def test_3d_cube(self):
        """Test that the plugin returns the correct cube data for a
        3d input cube."""
        raw_data = np.array([[[1, 1]], [[3, 2]], [[2, 3]]])

        calibrated_data = np.array([[[1, 1]], [[2, 2]], [[3, 3]]])

        # Reordering of the calibrated_data array to match
        # the raw_data ordering
        result_data = np.array([[[1, 1]], [[3, 2]], [[2, 3]]])

        cube = self.cube[:, :, :2, 0].copy()

        raw_cube = cube.copy()
        raw_cube.data = raw_data

        calibrated_cube = cube.copy()
        calibrated_cube.data = calibrated_data

        plugin = Plugin()
        result = plugin.rank_ecc(calibrated_cube, raw_cube)

        self.assertArrayAlmostEqual(result.data, result_data)
Esempio n. 10
0
    def test_unordered_data(self):
        """
        Test that the plugin returns an iris.cube.Cube with the correct data.
        ECC orders the calibrated data based on the ordering of the raw data.
        This could mean that the calibrated data appears out of order.
        ECC does not reorder the calibrated data in a monotonically-increasing
        order.
        """
        raw_data = np.array([
            [[[5, 5, 5], [7, 5, 5], [5, 5, 5]]],
            [[[4, 4, 4], [4, 4, 4], [4, 4, 4]]],
            [[[6, 6, 6], [6, 6, 6], [6, 6, 6]]],
        ])

        calibrated_data = np.array([
            [[[4, 5, 4], [4, 5, 4], [4, 5, 4]]],
            [[[5, 6, 5], [5, 6, 5], [5, 6, 5]]],
            [[[6, 7, 6], [6, 7, 6], [6, 7, 6]]],
        ])

        # This reordering does not pay attention to the values within the
        # calibrated data, the rankings created to perform the sorting are
        # taken exclusively from the raw_data.
        result_data = np.array([
            [[[5, 6, 5], [6, 6, 5], [5, 6, 5]]],
            [[[4, 5, 4], [4, 5, 4], [4, 5, 4]]],
            [[[6, 7, 6], [5, 7, 6], [6, 7, 6]]],
        ])

        raw_cube = self.cube.copy()
        raw_cube.data = raw_data
        calibrated_cube = self.cube.copy()
        calibrated_cube.data = calibrated_data

        plugin = Plugin()
        result = plugin.rank_ecc(calibrated_cube, raw_cube)
        self.assertArrayAlmostEqual(result.data, result_data)
    def test_3d_cube_masked_nans(self):
        """Test that the plugin returns the correct cube data for a
        3d input cube with a mask applied to each realization, and there are
        nans under the mask."""
        mask = np.array([[[True, False]],
                         [[True, False]],
                         [[True, False]]])
        raw_data = np.array(
            [[[1, 9]],
             [[3, 5]],
             [[2, 7]]])

        calibrated_data = np.ma.MaskedArray(
            [[[np.nan, 6]],
             [[np.nan, 8]],
             [[np.nan, 10]]], mask=mask, dtype=np.float32)

        # Reordering of the calibrated_data array to match
        # the raw_data ordering
        result_data = np.array(
            [[[np.nan, 10]],
             [[np.nan, 6]],
             [[np.nan, 8]]], dtype=np.float32)

        cube = self.cube[:, :, :2, 0].copy()

        raw_cube = cube.copy()
        raw_cube.data = raw_data

        calibrated_cube = cube.copy()
        calibrated_cube.data = calibrated_data

        plugin = Plugin()
        result = plugin.rank_ecc(calibrated_cube, raw_cube)
        self.assertArrayAlmostEqual(result.data.data, result_data)
        self.assertArrayEqual(result.data.mask, mask)
        self.assertEqual(result.data.dtype, np.float32)
Esempio n. 12
0
 def test_realization_for_greater_than_check_data(self):
     """
     Test to check the behaviour whether the number of percentiles is
     greater than the number of realizations. For when the length of the
     percentiles is greater than the length of the realizations, check
     that the points of the realization coordinate is as expected.
     """
     data = np.array([[
         [[4.0, 4.625, 5.25], [5.875, 6.5, 7.125], [7.75, 8.375, 9.0]],
         [[6.0, 6.625, 7.25], [7.875, 8.5, 9.125], [9.75, 10.375, 11.0]],
         [[4.0, 4.625, 5.25], [5.875, 6.5, 7.125], [7.75, 8.375, 9.0]],
     ]])
     post_processed_forecast_percentiles = self.percentile_cube
     raw_forecast_realizations = self.realization_cube
     # Slice number of raw forecast realizations, so that there are fewer
     # realizations than percentiles.
     raw_forecast_realizations = raw_forecast_realizations[:2, :, :, :]
     plu = Plugin()
     result = plu._recycle_raw_ensemble_realizations(
         post_processed_forecast_percentiles,
         raw_forecast_realizations,
         self.perc_coord,
     )
     self.assertArrayAlmostEqual(data, result.data)
Esempio n. 13
0
    def test_3d_cube_tied_values_random_seed(self):
        """
        Test that the plugin returns the correct cube data for a
        3d input cube, when there are tied values witin the
        raw ensemble realizations. The random seed is specified to ensure that
        only one option, out of the two possible options will be returned.
        """
        raw_data = np.array(
            [[[1, 1]],
             [[3, 2]],
             [[2, 2]]])

        calibrated_data = np.array(
            [[[1, 1]],
             [[2, 2]],
             [[3, 3]]])

        # Reordering of the calibrated_data array to match
        # the raw_data ordering
        result_data = np.array(
            [[[1, 1]],
             [[3, 2]],
             [[2, 3]]])

        cube = self.cube.copy()
        cube = cube[:, :, :2, 0]

        raw_cube = cube.copy()
        raw_cube.data = raw_data

        calibrated_cube = cube.copy()
        calibrated_cube.data = calibrated_data
        plugin = Plugin()
        result = plugin.rank_ecc(calibrated_cube, raw_cube, random_seed=0)
        result.transpose([1, 0, 2])
        self.assertArrayAlmostEqual(result.data, result_data)
Esempio n. 14
0
 def test_unmasked_data(self):
     """Test unmasked data does not raise any errors."""
     Plugin._check_input_cube_masks(self.post_processed_percentiles,
                                    self.raw_cube)
Esempio n. 15
0
    def test_2d_cube_recycling_raw_ensemble_realizations(self):
        """
        Test that the plugin returns the correct cube data for a
        2d input cube, if the number of raw ensemble realizations is fewer
        than the number of percentiles required, and therefore, raw
        ensemble realization recycling is required.

        Case where two raw ensemble realizations are exactly the same,
        after the raw ensemble realizations have been recycled.
        The number of raw ensemble realizations are recycled in order to match
        the number of percentiles.

        After recycling the raw _data will be
        raw_data = np.array([[1],
                             [2],
                             [1]])

        If there's a tie, the re-ordering randomly allocates the ordering
        for the data from the raw ensemble realizations, which is why there are
        two possible options for the resulting post-processed ensemble
        realizations.

        Raw ensemble realizations
        1,  2
        Post-processed percentiles
        1,  2,  3
        After recycling raw ensemble realizations
        1,  2,  1
        As the second ensemble realization(with a data value of 2), is the
        highest value, the highest value from the post-processed percentiles
        will be the second ensemble realization data value within the
        post-processed realizations. The data values of 1 and 2 from the
        post-processed percentiles will then be split between the first
        and third post-processed ensemble realizations.

        """
        raw_data = np.array([[1],
                             [2]])

        post_processed_percentiles_data = np.array([[1],
                                                    [2],
                                                    [3]])

        expected_first = np.array([[1],
                                   [3],
                                   [2]])

        expected_second = np.array([[2],
                                    [3],
                                    [1]])

        raw_cube = self.raw_cube[:2, :, 0, 0]
        raw_cube.data = raw_data
        post_processed_percentiles = (
            self.post_processed_percentiles[:, :, 0, 0])
        post_processed_percentiles.data = post_processed_percentiles_data

        plugin = Plugin()
        result = plugin.process(post_processed_percentiles, raw_cube)
        permutations = [expected_first, expected_second]
        matches = [
            np.array_equal(aresult, result.data) for aresult in permutations]
        self.assertIn(True, matches)
def main(argv=None):
    """Load in arguments for applying coefficients for Ensemble Model Output
       Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
       Regression (NGR). The coefficients are applied to the forecast
       that is supplied, so as to calibrate the forecast. The calibrated
       forecast is written to a netCDF file.
    """
    parser = ArgParser(
        description='Apply coefficients for Ensemble Model Output '
        'Statistics (EMOS), otherwise known as Non-homogeneous '
        'Gaussian Regression (NGR). The supported input formats '
        'are realizations, probabilities and percentiles. '
        'The forecast will be converted to realizations before '
        'applying the coefficients and then converted back to '
        'match the input format.')
    # Filepaths for the forecast, EMOS coefficients and the output.
    parser.add_argument(
        'forecast_filepath',
        metavar='FORECAST_FILEPATH',
        help='A path to an input NetCDF file containing the forecast to be '
        'calibrated. The input format could be either realizations, '
        'probabilities or percentiles.')
    parser.add_argument('coefficients_filepath',
                        metavar='COEFFICIENTS_FILEPATH',
                        help='A path to an input NetCDF file containing the '
                        'coefficients used for calibration.')
    parser.add_argument('output_filepath',
                        metavar='OUTPUT_FILEPATH',
                        help='The output path for the processed NetCDF')
    # Optional arguments.
    parser.add_argument(
        '--num_realizations',
        metavar='NUMBER_OF_REALIZATIONS',
        default=None,
        type=np.int32,
        help='Optional argument to specify the number of '
        'ensemble realizations to produce. '
        'If the current forecast is input as probabilities or '
        'percentiles then this argument is used to create the requested '
        'number of realizations. In addition, this argument is used to '
        'construct the requested number of realizations from the mean '
        'and variance output after applying the EMOS coefficients.'
        'Default will be the number of realizations in the raw input '
        'file, if realizations are provided as input, otherwise if the '
        'input format is probabilities or percentiles, then an error '
        'will be raised if no value is provided.')
    parser.add_argument(
        '--random_ordering',
        default=False,
        action='store_true',
        help='Option to reorder the post-processed forecasts randomly. If not '
        'set, the ordering of the raw ensemble is used. This option is '
        'only valid when the input format is realizations.')
    parser.add_argument(
        '--random_seed',
        metavar='RANDOM_SEED',
        default=None,
        help='Option to specify a value for the random seed for testing '
        'purposes, otherwise, the default random seed behaviour is '
        'utilised. The random seed is used in the generation of the '
        'random numbers used for either the random_ordering option to '
        'order the input percentiles randomly, rather than use the '
        'ordering from the raw ensemble, or for splitting tied values '
        'within the raw ensemble, so that the values from the input '
        'percentiles can be ordered to match the raw ensemble.')
    parser.add_argument(
        '--ecc_bounds_warning',
        default=False,
        action='store_true',
        help='If True, where the percentiles exceed the ECC bounds range, '
        'raise a warning rather than an exception. This occurs when the '
        'current forecast is in the form of probabilities and is '
        'converted to percentiles, as part of converting the input '
        'probabilities into realizations.')
    parser.add_argument(
        '--predictor_of_mean',
        metavar='PREDICTOR_OF_MEAN',
        choices=['mean', 'realizations'],
        default='mean',
        help='String to specify the predictor used to calibrate the forecast '
        'mean. Currently the ensemble mean ("mean") and the ensemble '
        'realizations ("realizations") are supported as options. '
        'Default: "mean".')

    args = parser.parse_args(args=argv)

    current_forecast = load_cube(args.forecast_filepath)
    coeffs = load_cube(args.coefficients_filepath)

    original_current_forecast = current_forecast.copy()

    msg = ("The current forecast has been provided as {0}. "
           "These {0} need to be converted to realizations "
           "for ensemble calibration. The args.num_realizations "
           "argument is used to define the number of realizations "
           "to construct from the input {0}, so if the "
           "current forecast is provided as {0} then "
           "args.num_realizations must be defined.")

    try:
        find_percentile_coordinate(current_forecast)
        input_forecast_type = "percentiles"
    except CoordinateNotFoundError:
        input_forecast_type = "realizations"

    if current_forecast.name().startswith("probability_of"):
        input_forecast_type = "probabilities"
        # If probabilities, convert to percentiles.
        conversion_plugin = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=args.ecc_bounds_warning)
    elif input_forecast_type == "percentiles":
        # If percentiles, resample percentiles so that the percentiles are
        # evenly spaced.
        conversion_plugin = ResamplePercentiles(
            ecc_bounds_warning=args.ecc_bounds_warning)

    # If percentiles, resample percentiles and then rebadge.
    # If probabilities, generate percentiles and then rebadge.
    if input_forecast_type in ["percentiles", "probabilities"]:
        if not args.num_realizations:
            raise ValueError(msg.format(input_forecast_type))
        current_forecast = conversion_plugin.process(
            current_forecast, no_of_percentiles=args.num_realizations)
        current_forecast = (
            RebadgePercentilesAsRealizations().process(current_forecast))

    # Default number of ensemble realizations is the number in
    # the raw forecast.
    if not args.num_realizations:
        args.num_realizations = len(
            current_forecast.coord('realization').points)

    # Apply coefficients as part of Ensemble Model Output Statistics (EMOS).
    ac = ApplyCoefficientsFromEnsembleCalibration(
        current_forecast,
        coeffs,
        predictor_of_mean_flag=args.predictor_of_mean)
    calibrated_predictor, calibrated_variance = ac.process()

    # If input forecast is probabilities, convert output into probabilities.
    # If input forecast is percentiles, convert output into percentiles.
    # If input forecast is realizations, convert output into realizations.
    if input_forecast_type == "probabilities":
        result = GenerateProbabilitiesFromMeanAndVariance().process(
            calibrated_predictor, calibrated_variance,
            original_current_forecast)
    elif input_forecast_type == "percentiles":
        perc_coord = find_percentile_coordinate(original_current_forecast)
        result = GeneratePercentilesFromMeanAndVariance().process(
            calibrated_predictor,
            calibrated_variance,
            percentiles=perc_coord.points)
    elif input_forecast_type == "realizations":
        # Ensemble Copula Coupling to generate realizations
        # from mean and variance.
        percentiles = GeneratePercentilesFromMeanAndVariance().process(
            calibrated_predictor,
            calibrated_variance,
            no_of_percentiles=args.num_realizations)
        result = EnsembleReordering().process(
            percentiles,
            current_forecast,
            random_ordering=args.random_ordering,
            random_seed=args.random_seed)
    save_netcdf(result, args.output_filepath)
Esempio n. 17
0
def process(cube: cli.inputcube,
            raw_cube: cli.inputcube = None,
            *,
            realizations_count: int = None,
            sampling_method='quantile',
            ignore_ecc_bounds=False,
            randomise=False,
            random_seed: int = None,
            realizations: cli.comma_separated_list = None):
    """Convert percentiles to ensemble realizations using Ensemble Coupla
    Coupling.

    Percentiles are either rebadged as realizations or reordered if the
    raw_cube argument is given.

    Args:
        cube (iris.cube.Cube):
            Cube expected to contain a percentiles coordinate.
        raw_cube (iris.cube.Cube):
            Cube of raw (not post processed) weather data.
            If this argument is given ensemble realizations will be created
            from percentiles by reshuffling them in correspondance to the rank
            order of the raw ensemble. Otherwise, the percentiles are rebadged
            as realizations.
        realizations_count (int):
            The number of percentiles to be generated. This is also equal to
            the number of ensemble realizations that will be generated.
        sampling_method (str):
            Method to be used for generating the list of percentiles with
            forecasts generated at each percentile. The options are "quantile"
            and "random".
            The quantile option produces equally spaced percentiles which is
            the preferred option for full ensemble couple coupling with
            reorder enabled.
        ignore_ecc_bounds (bool):
            If True where percentiles (calculated as an intermediate output
            before realization) exceed the ECC bounds range, raises a
            warning rather than an exception.
        randomise (bool):
            Reorder randomly, rather than using the rank order of the raw_cube.
        random_seed (int):
            Option to specify a value for the random seed for testing purposes,
            otherwise, the default random seed behaviour is utilised.
            The random seed is used in the generation of the random numbers
            used for either the randomise option to order the input
            percentiles randomly, rather than use the ordering from the
            raw ensemble, or for splitting tied values within the raw ensemble
            so that the values from the input percentiles can be ordered to
            match the raw ensemble.
        realizations (list of ints):
            A list of ensemble realization numbers to use when rebadging the
            percentiles into realizations.

    Returns:
        iris.cube.Cube:
            The processed Cube.
    """
    from improver.ensemble_copula_coupling.ensemble_copula_coupling import (
        RebadgePercentilesAsRealizations, ResamplePercentiles,
        EnsembleReordering)

    if realizations:
        realizations = [int(x) for x in realizations]

    result = ResamplePercentiles(
        ecc_bounds_warning=ignore_ecc_bounds).process(
        cube, no_of_percentiles=realizations_count,
        sampling=sampling_method)

    if raw_cube:
        result = EnsembleReordering().process(
            result, raw_cube, random_ordering=randomise,
            random_seed=random_seed)
    else:
        result = RebadgePercentilesAsRealizations().process(
            result, ensemble_realization_numbers=realizations)

    return result
def main(argv=None):
    """Do ensemble calibration using the EnsembleCalibration plugin.
    """
    parser = ArgParser(
        description='Apply the requested ensemble calibration method using '
        'the current forecast (to be calibrated) in the form of '
        'realizations, probabilities, or percentiles, historical '
        'forecasts in the form of realizations and historical truth data '
        '(to use in calibration). The mean and variance output from the '
        'EnsembleCalibration plugin can be written to an output file '
        'if required. If the current forecast is supplied in the form of '
        'probabilities or percentiles, these are converted to realizations '
        'prior to calibration. After calibration, the mean and variance '
        'computed in the calibration are converted to match the format of the '
        'current forecast i.e. if realizations are input, realizations '
        'are output, if probabilities are input, probabilities are output, '
        'and if percentiles are input, percentiles are output.'
        'If realizations are input, realizations are regenerated using '
        'Ensemble Copula Coupling.')
    # Arguments for EnsembleCalibration
    parser.add_argument(
        'units',
        metavar='UNITS_TO_CALIBRATE_IN',
        help='The unit that calibration should be undertaken in. The current '
        'forecast, historical forecast and truth will be converted as '
        'required.')
    parser.add_argument(
        'distribution',
        metavar='DISTRIBUTION',
        choices=['gaussian', 'truncated gaussian'],
        help='The distribution that will be used for calibration. This will '
        'be dependent upon the input phenomenon. This has to be '
        'supported by the minimisation functions in '
        'ContinuousRankedProbabilityScoreMinimisers.')
    # Filepaths for current, historic and truth data.
    parser.add_argument(
        'input_filepath',
        metavar='INPUT_FILE',
        help='A path to an input NetCDF file containing the current forecast '
        'to be processed. The file provided could be in the form of '
        'realizations, probabilities or percentiles.')
    parser.add_argument(
        'historic_filepath',
        metavar='HISTORIC_DATA_FILE',
        help='A path to an input NetCDF file containing the historic '
        'forecast(s) used for calibration. The file provided must be in '
        'the form of realizations.')
    parser.add_argument(
        'truth_filepath',
        metavar='TRUTH_DATA_FILE',
        help='A path to an input NetCDF file containing the historic truth '
        'analyses used for calibration.')
    parser.add_argument('output_filepath',
                        metavar='OUTPUT_FILE',
                        help='The output path for the processed NetCDF')
    # Optional arguments.
    parser.add_argument(
        '--predictor_of_mean',
        metavar='CALIBRATE_MEAN_FLAG',
        choices=['mean', 'realizations'],
        default='mean',
        help='String to specify the input to calculate the calibrated mean. '
        'Currently the ensemble mean ("mean") and the ensemble '
        'realizations ("realizations") are supported as the predictors. '
        'Default: "mean".')
    parser.add_argument(
        '--save_mean',
        metavar='MEAN_FILE',
        default=False,
        help='Option to save the mean output from EnsembleCalibration plugin. '
        'If used, a path to save the output to must be provided.')
    parser.add_argument(
        '--save_variance',
        metavar='VARIANCE_FILE',
        default=False,
        help='Option to save the variance output from EnsembleCalibration '
        'plugin. If used, a path to save the output to must be provided.')
    parser.add_argument(
        '--num_realizations',
        metavar='NUMBER_OF_REALIZATIONS',
        default=None,
        type=np.int32,
        help='Optional argument to specify the number of '
        'ensemble realizations to produce. '
        'If the current forecast is input as probabilities or '
        'percentiles then this argument is used to create the requested '
        'number of realizations. In addition, this argument is used to '
        'construct the requested number of realizations from the mean '
        'and variance output after applying the EMOS coefficients.'
        'Default will be the number of realizations in the raw input '
        'file, if realizations are provided as input, otherwise if the '
        'input format is probabilities or percentiles, then an error '
        'will be raised if no value is provided.')
    parser.add_argument(
        '--random_ordering',
        default=False,
        action='store_true',
        help='Option to reorder the post-processed forecasts randomly. If not '
        'set, the ordering of the raw ensemble is used. This option is '
        'only valid when the input format is realizations.')
    parser.add_argument(
        '--random_seed',
        metavar='RANDOM_SEED',
        default=None,
        help='Option to specify a value for the random seed for testing '
        'purposes, otherwise, the default random seed behaviour is '
        'utilised. The random seed is used in the generation of the '
        'random numbers used for either the random_ordering option to '
        'order the input percentiles randomly, rather than use the '
        'ordering from the raw ensemble, or for splitting tied values '
        'within the raw ensemble, so that the values from the input '
        'percentiles can be ordered to match the raw ensemble.')
    parser.add_argument(
        '--ecc_bounds_warning',
        default=False,
        action='store_true',
        help='If True, where the percentiles exceed the ECC bounds range, '
        'raise a warning rather than an exception. This occurs when the '
        'current forecast is in the form of probabilities and is '
        'converted to percentiles, as part of converting the input '
        'probabilities into realizations.')
    parser.add_argument(
        '--max_iterations',
        metavar='MAX_ITERATIONS',
        type=np.int32,
        default=1000,
        help='The maximum number of iterations allowed until the minimisation '
        'has converged to a stable solution. If the maximum number of '
        'iterations is reached, but the minimisation has not yet '
        'converged to a stable solution, then the available solution is '
        'used anyway, and a warning is raised. This may be modified for '
        'testing purposes but otherwise kept fixed. If the '
        'predictor_of_mean is "realizations", then the number of '
        'iterations may require increasing, as there will be more '
        'coefficients to solve for.')
    args = parser.parse_args(args=argv)

    current_forecast = load_cube(args.input_filepath)
    historic_forecast = load_cube(args.historic_filepath)
    truth = load_cube(args.truth_filepath)

    original_current_forecast = current_forecast.copy()

    msg = ("The current forecast has been provided as {0}. "
           "These {0} need to be converted to realizations "
           "for ensemble calibration. The args.num_realizations "
           "argument is used to define the number of realizations "
           "to construct from the input {0}, so if the "
           "current forecast is provided as {0} then "
           "args.num_realizations must be defined.")

    try:
        find_percentile_coordinate(current_forecast)
        input_forecast_type = "percentiles"
    except CoordinateNotFoundError:
        input_forecast_type = "realizations"

    if current_forecast.name().startswith("probability_of"):
        input_forecast_type = "probabilities"
        # If probabilities, convert to percentiles.
        conversion_plugin = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=args.ecc_bounds_warning)
    elif input_forecast_type == "percentiles":
        # If percentiles, resample percentiles so that the percentiles are
        # evenly spaced.
        conversion_plugin = ResamplePercentiles(
            ecc_bounds_warning=args.ecc_bounds_warning)

    # If percentiles, resample percentiles and then rebadge.
    # If probabilities, generate percentiles and then rebadge.
    if input_forecast_type in ["percentiles", "probabilities"]:
        if not args.num_realizations:
            raise ValueError(msg.format(input_forecast_type))
        current_forecast = conversion_plugin.process(
            current_forecast, no_of_percentiles=args.num_realizations)
        current_forecast = (
            RebadgePercentilesAsRealizations().process(current_forecast))

    # Default number of ensemble realizations is the number in
    # the raw forecast.
    if not args.num_realizations:
        args.num_realizations = len(
            current_forecast.coord('realization').points)

    # Ensemble-Calibration to calculate the mean and variance.
    forecast_predictor, forecast_variance = EnsembleCalibration(
        args.distribution,
        args.units,
        predictor_of_mean_flag=args.predictor_of_mean,
        max_iterations=args.max_iterations).process(current_forecast,
                                                    historic_forecast, truth)

    # If required, save the mean and variance.
    if args.save_mean:
        save_netcdf(forecast_predictor, args.save_mean)
    if args.save_variance:
        save_netcdf(forecast_variance, args.save_variance)

    # If input forecast is probabilities, convert output into probabilities.
    # If input forecast is percentiles, convert output into percentiles.
    # If input forecast is realizations, convert output into realizations.
    if input_forecast_type == "probabilities":
        result = GenerateProbabilitiesFromMeanAndVariance().process(
            forecast_predictor, forecast_variance, original_current_forecast)
    elif input_forecast_type == "percentiles":
        perc_coord = find_percentile_coordinate(original_current_forecast)
        result = GeneratePercentilesFromMeanAndVariance().process(
            forecast_predictor,
            forecast_variance,
            percentiles=perc_coord.points)
    elif input_forecast_type == "realizations":
        # Ensemble Copula Coupling to generate realizations
        # from mean and variance.
        percentiles = GeneratePercentilesFromMeanAndVariance().process(
            forecast_predictor,
            forecast_variance,
            no_of_percentiles=args.num_realizations)
        result = EnsembleReordering().process(
            percentiles,
            current_forecast,
            random_ordering=args.random_ordering,
            random_seed=args.random_seed)
    save_netcdf(result, args.output_filepath)
Esempio n. 19
0
def process(current_forecast, coeffs, landsea_mask, num_realizations=None,
            random_ordering=False, random_seed=None,
            ecc_bounds_warning=False, predictor_of_mean='mean'):
    """Applying coefficients for Ensemble Model Output Statistics.

    Load in arguments for applying coefficients for Ensemble Model Output
    Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
    Regression (NGR). The coefficients are applied to the forecast
    that is supplied, so as to calibrate the forecast. The calibrated
    forecast is written to a cube. If no coefficients are provided the input
    forecast is returned unchanged.

    Args:
        current_forecast (iris.cube.Cube):
            A Cube containing the forecast to be calibrated. The input format
            could be either realizations, probabilities or percentiles.
        coeffs (iris.cube.Cube or None):
            A cube containing the coefficients used for calibration or None.
            If none then then current_forecast is returned unchanged.
        landsea_mask (iris.cube.Cube or None):
            A cube containing the land-sea mask on the same domain as the
            forecast that is to be calibrated. Land points are "
            "specified by ones and sea points are specified by zeros. "
            "If not None this argument will enable land-only calibration, in "
            "which sea points are returned without the application of "
            "calibration."
        num_realizations (numpy.int32):
            Optional argument to specify the number of ensemble realizations
            to produce. If the current forecast is input as probabilities or
            percentiles then this argument is used to create the requested
            number of realizations. In addition, this argument is used to
            construct the requested number of realizations from the mean and
            variance output after applying the EMOS coefficients.
            Default is None.
        random_ordering (bool):
            Option to reorder the post-processed forecasts randomly. If not
            set, the ordering of the raw ensemble is used. This option is
            only valid when the input format is realizations.
            Default is False.
        random_seed (int):
            Option to specify a value for the random seed for testing
            purposes, otherwise the default random seen behaviour is utilised.
            The random seed is used in the generation of the random numbers
            used for either the random_ordering option to order the input
            percentiles randomly, rather than use the ordering from the raw
            ensemble, or for splitting tied values within the raw ensemble,
            so that the values from the input percentiles can be ordered to
            match the raw ensemble.
            Default is None.
        ecc_bounds_warning (bool):
            If True, where the percentiles exceed the ECC bounds range,
            raises a warning rather than an exception. This occurs when the
            current forecasts is in the form of probabilities and is
            converted to percentiles, as part of converting the input
            probabilities into realizations.
            Default is False.
        predictor_of_mean (str):
            String to specify the predictor used to calibrate the forecast
            mean. Currently the ensemble mean "mean" as the ensemble
            realization "realization" are supported as options.
            Default is 'mean'

    Returns:
        iris.cube.Cube:
            The calibrated forecast cube.

    Raises:
        ValueError:
            If the current forecast is a coefficients cube.
        ValueError:
            If the coefficients cube does not have the right name of
            "emos_coefficients".
        ValueError:
            If the forecast type is 'percentiles' or 'probabilities' while no
            num_realizations are given.

    """
    if coeffs is None:
        msg = ("There are no coefficients provided for calibration. The "
               "uncalibrated forecast will be returned.")
        warnings.warn(msg)
        return current_forecast

    elif coeffs.name() != 'emos_coefficients':
        msg = ("The current coefficients cube does not have the "
               "name 'emos_coefficients'")
        raise ValueError(msg)

    if current_forecast.name() == 'emos_coefficients':
        msg = "The current forecast cube has the name 'emos_coefficients'"
        raise ValueError(msg)

    original_current_forecast = current_forecast.copy()
    try:
        find_percentile_coordinate(current_forecast)
        input_forecast_type = "percentiles"
    except CoordinateNotFoundError:
        input_forecast_type = "realizations"

    if current_forecast.name().startswith("probability_of"):
        input_forecast_type = "probabilities"
        # If probabilities, convert to percentiles.
        conversion_plugin = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=ecc_bounds_warning)
    elif input_forecast_type == "percentiles":
        # If percentiles, resample percentiles so that the percentiles are
        # evenly spaced.
        conversion_plugin = ResamplePercentiles(
            ecc_bounds_warning=ecc_bounds_warning)

    # If percentiles, re-sample percentiles and then re-badge.
    # If probabilities, generate percentiles and then re-badge.
    if input_forecast_type in ["percentiles", "probabilities"]:
        if not num_realizations:
            raise ValueError(
                "The current forecast has been provided as {0}. "
                "These {0} need to be converted to realizations "
                "for ensemble calibration. The num_realizations "
                "argument is used to define the number of realizations "
                "to construct from the input {0}, so if the "
                "current forecast is provided as {0} then "
                "num_realizations must be defined.".format(
                    input_forecast_type))
        current_forecast = conversion_plugin.process(
            current_forecast, no_of_percentiles=num_realizations)
        current_forecast = (
            RebadgePercentilesAsRealizations().process(current_forecast))

    # Default number of ensemble realizations is the number in
    # the raw forecast.
    if not num_realizations:
        num_realizations = len(
            current_forecast.coord('realization').points)

    # Apply coefficients as part of Ensemble Model Output Statistics (EMOS).
    ac = ApplyCoefficientsFromEnsembleCalibration(
        predictor_of_mean_flag=predictor_of_mean)
    calibrated_predictor, calibrated_variance = ac.process(
        current_forecast, coeffs, landsea_mask=landsea_mask)

    # If input forecast is probabilities, convert output into probabilities.
    # If input forecast is percentiles, convert output into percentiles.
    # If input forecast is realizations, convert output into realizations.
    if input_forecast_type == "probabilities":
        result = GenerateProbabilitiesFromMeanAndVariance().process(
            calibrated_predictor, calibrated_variance,
            original_current_forecast)
    elif input_forecast_type == "percentiles":
        perc_coord = find_percentile_coordinate(original_current_forecast)
        result = GeneratePercentilesFromMeanAndVariance().process(
            calibrated_predictor, calibrated_variance,
            percentiles=perc_coord.points)
    elif input_forecast_type == "realizations":
        # Ensemble Copula Coupling to generate realizations
        # from mean and variance.
        percentiles = GeneratePercentilesFromMeanAndVariance().process(
            calibrated_predictor, calibrated_variance,
            no_of_percentiles=num_realizations)
        result = EnsembleReordering().process(
            percentiles, current_forecast,
            random_ordering=random_ordering, random_seed=random_seed)
    return result
Esempio n. 20
0
def process(cube: cli.inputcube,
            coefficients: cli.inputcube = None,
            land_sea_mask: cli.inputcube = None,
            *,
            distribution,
            realizations_count: int = None,
            randomise=False,
            random_seed: int = None,
            ignore_ecc_bounds=False,
            predictor='mean',
            shape_parameters: cli.comma_separated_list = None):
    """Applying coefficients for Ensemble Model Output Statistics.

    Load in arguments for applying coefficients for Ensemble Model Output
    Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
    Regression (NGR). The coefficients are applied to the forecast
    that is supplied, so as to calibrate the forecast. The calibrated
    forecast is written to a cube. If no coefficients are provided the input
    forecast is returned unchanged.

    Args:
        cube (iris.cube.Cube):
            A Cube containing the forecast to be calibrated. The input format
            could be either realizations, probabilities or percentiles.
        coefficients (iris.cube.Cube):
            A cube containing the coefficients used for calibration or None.
            If none then then input is returned unchanged.
        land_sea_mask (iris.cube.Cube):
            A cube containing the land-sea mask on the same domain as the
            forecast that is to be calibrated. Land points are "
            "specified by ones and sea points are specified by zeros. "
            "If not None this argument will enable land-only calibration, in "
            "which sea points are returned without the application of "
            "calibration."
        distribution (str):
            The distribution for constructing realizations, percentiles or
            probabilities. This should typically match the distribution used
            for minimising the Continuous Ranked Probability Score when
            estimating the EMOS coefficients. The distributions available are
            those supported by :data:`scipy.stats`.
        realizations_count (int):
            Option to specify the number of ensemble realizations that will be
            created from probabilities or percentiles for input into EMOS.
        randomise (bool):
            Option to reorder the post-processed forecasts randomly. If not
            set, the ordering of the raw ensemble is used. This option is
            only valid when the input format is realizations.
        random_seed (int):
            Option to specify a value for the random seed for testing
            purposes, otherwise the default random seen behaviour is utilised.
            The random seed is used in the generation of the random numbers
            used for either the randomise option to order the input
            percentiles randomly, rather than use the ordering from the raw
            ensemble, or for splitting tied values within the raw ensemble,
            so that the values from the input percentiles can be ordered to
            match the raw ensemble.
        ignore_ecc_bounds (bool):
            If True, where the percentiles exceed the ECC bounds range,
            raises a warning rather than an exception. This occurs when the
            current forecasts is in the form of probabilities and is
            converted to percentiles, as part of converting the input
            probabilities into realizations.
        predictor (str):
            String to specify the form of the predictor used to calculate
            the location parameter when estimating the EMOS coefficients.
            Currently the ensemble mean ("mean") and the ensemble
            realizations ("realizations") are supported as the predictors.
        shape_parameters (float or str):
            The shape parameters required for defining the distribution
            specified by the distribution argument. The shape parameters
            should either be a number or 'inf' or '-inf' to represent
            infinity. Further details about appropriate shape parameters
            are available in scipy.stats. For the truncated normal
            distribution with a lower bound of zero, as available when
            estimating EMOS coefficients, the appropriate shape parameters
            are 0 and inf.

    Returns:
        iris.cube.Cube:
            The calibrated forecast cube.

    Raises:
        ValueError:
            If the current forecast is a coefficients cube.
        ValueError:
            If the coefficients cube does not have the right name of
            "emos_coefficients".
        ValueError:
            If the forecast type is 'percentiles' or 'probabilities' and the
            realizations_count argument is not provided.
    """
    import warnings

    import numpy as np
    from iris.exceptions import CoordinateNotFoundError

    from improver.calibration.ensemble_calibration import (
        ApplyCoefficientsFromEnsembleCalibration)
    from improver.ensemble_copula_coupling.ensemble_copula_coupling import (
        EnsembleReordering, ConvertLocationAndScaleParametersToPercentiles,
        ConvertLocationAndScaleParametersToProbabilities,
        ConvertProbabilitiesToPercentiles, RebadgePercentilesAsRealizations,
        ResamplePercentiles)
    from improver.calibration.utilities import merge_land_and_sea
    from improver.metadata.probabilistic import find_percentile_coordinate

    current_forecast = cube

    if current_forecast.name() in ['emos_coefficients', 'land_binary_mask']:
        msg = "The current forecast cube has the name {}"
        raise ValueError(msg.format(current_forecast.name()))

    if coefficients is None:
        msg = ("There are no coefficients provided for calibration. The "
               "uncalibrated forecast will be returned.")
        warnings.warn(msg)
        return current_forecast

    if coefficients.name() != 'emos_coefficients':
        msg = ("The current coefficients cube does not have the "
               "name 'emos_coefficients'")
        raise ValueError(msg)

    if land_sea_mask and land_sea_mask.name() != 'land_binary_mask':
        msg = ("The land_sea_mask cube does not have the "
               "name 'land_binary_mask'")
        raise ValueError(msg)

    original_current_forecast = current_forecast.copy()
    try:
        find_percentile_coordinate(current_forecast)
        input_forecast_type = "percentiles"
    except CoordinateNotFoundError:
        input_forecast_type = "realizations"

    if current_forecast.name().startswith("probability_of"):
        input_forecast_type = "probabilities"
        conversion_plugin = ConvertProbabilitiesToPercentiles(
            ecc_bounds_warning=ignore_ecc_bounds)
    elif input_forecast_type == "percentiles":
        # Initialise plugin to resample percentiles so that the percentiles are
        # evenly spaced.
        conversion_plugin = ResamplePercentiles(
            ecc_bounds_warning=ignore_ecc_bounds)

    if input_forecast_type in ["percentiles", "probabilities"]:
        if not realizations_count:
            raise ValueError(
                "The current forecast has been provided as {0}. "
                "These {0} need to be converted to realizations "
                "for ensemble calibration. The realizations_count "
                "argument is used to define the number of realizations "
                "to construct from the input {0}, so if the "
                "current forecast is provided as {0} then "
                "realizations_count must be defined.".format(
                    input_forecast_type))
        current_forecast = conversion_plugin.process(
            current_forecast, no_of_percentiles=realizations_count)
        current_forecast = (
            RebadgePercentilesAsRealizations().process(current_forecast))

    # Apply coefficients as part of Ensemble Model Output Statistics (EMOS).
    ac = ApplyCoefficientsFromEnsembleCalibration(predictor=predictor)
    location_parameter, scale_parameter = ac.process(
        current_forecast, coefficients, landsea_mask=land_sea_mask)

    if shape_parameters:
        shape_parameters = [np.float32(x) for x in shape_parameters]

    # Convert the output forecast type (i.e. realizations, percentiles,
    # probabilities) to match the input forecast type.
    if input_forecast_type == "probabilities":
        result = ConvertLocationAndScaleParametersToProbabilities(
            distribution=distribution,
            shape_parameters=shape_parameters).process(
                location_parameter, scale_parameter, original_current_forecast)
    elif input_forecast_type == "percentiles":
        perc_coord = find_percentile_coordinate(original_current_forecast)
        result = ConvertLocationAndScaleParametersToPercentiles(
            distribution=distribution,
            shape_parameters=shape_parameters).process(
                location_parameter,
                scale_parameter,
                original_current_forecast,
                percentiles=perc_coord.points)
    elif input_forecast_type == "realizations":
        # Ensemble Copula Coupling to generate realizations
        # from the location and scale parameter.
        no_of_percentiles = len(current_forecast.coord('realization').points)
        percentiles = ConvertLocationAndScaleParametersToPercentiles(
            distribution=distribution,
            shape_parameters=shape_parameters).process(
                location_parameter,
                scale_parameter,
                original_current_forecast,
                no_of_percentiles=no_of_percentiles)
        result = EnsembleReordering().process(percentiles,
                                              current_forecast,
                                              random_ordering=randomise,
                                              random_seed=random_seed)
    if land_sea_mask:
        # Fill in masked sea points with uncalibrated data.
        merge_land_and_sea(result, original_current_forecast)
    return result
def main(argv=None):
    """
    Load in the arguments and apply the requested variant of Ensemble
    Copula Coupling for converting percentile data to realizations.
    """
    parser = ArgParser(description='Convert a dataset containing '
                       'probabilities into one containing '
                       'ensemble realizations using Ensemble Copula Coupling.')

    # General options:
    parser.add_argument('input_filepath',
                        metavar='INPUT_FILE',
                        help='A path to an input NetCDF file to be processed.'
                        ' Must contain a percentile dimension.')
    parser.add_argument('output_filepath',
                        metavar='OUTPUT_FILE',
                        help='The output path for the processed NetCDF.')
    parser.add_argument('--no_of_percentiles',
                        default=None,
                        type=int,
                        metavar='NUMBER_OF_PERCENTILES',
                        help='The number of percentiles to be generated. '
                        'This is also equal to the number of ensemble '
                        'realizations that will be generated.')
    parser.add_argument('--sampling_method',
                        default='quantile',
                        const='quantile',
                        nargs='?',
                        choices=['quantile', 'random'],
                        metavar='PERCENTILE_SAMPLING_METHOD',
                        help='Method to be used for generating the list of '
                        'percentiles with forecasts generated at each '
                        'percentile. The options are "quantile" and '
                        '"random". "quantile" is the default option. '
                        'The "quantile" option produces equally spaced '
                        'percentiles which is the preferred '
                        'option for full Ensemble Copula Coupling with '
                        'reordering enabled.')
    parser.add_argument(
        '--ecc_bounds_warning',
        default=False,
        action='store_true',
        help='If True, where percentiles (calculated as an intermediate '
        'output before realizations) exceed the ECC bounds range, raise '
        'a warning rather than an exception.')

    # Different use cases:
    # (We can either reorder OR rebadge)
    group = parser.add_mutually_exclusive_group(required=True)

    group.add_argument('--reordering',
                       default=False,
                       action='store_true',
                       help='The option used to create ensemble realizations '
                       'from percentiles by reordering the input '
                       'percentiles based on the order of the '
                       'raw ensemble forecast.')
    group.add_argument('--rebadging',
                       default=False,
                       action='store_true',
                       help='The option used to create ensemble realizations '
                       'from percentiles by rebadging the input '
                       'percentiles.')

    # If reordering, can do so either based on original realizations,
    # or randomly.
    reordering = parser.add_argument_group(
        'Reordering options', 'Options for reordering the input percentiles '
        'using the raw ensemble forecast as required to create ensemble '
        'realizations.')
    reordering.add_argument('--raw_forecast_filepath',
                            metavar='RAW_FORECAST_FILE',
                            help='A path to an raw forecast NetCDF file to be '
                            'processed. This option is compulsory, if the '
                            'reordering option is selected.')
    reordering.add_argument('--random_ordering',
                            default=False,
                            action='store_true',
                            help='Decide whether or not to use random '
                            'ordering within the ensemble reordering step.')
    reordering.add_argument(
        '--random_seed',
        default=None,
        help='Option to specify a value for the random seed for testing '
        'purposes, otherwise, the default random seed behaviour is '
        'utilised. The random seed is used in the generation of the '
        'random numbers used for either the random_ordering option to '
        'order the input percentiles randomly, rather than use the '
        'ordering from the raw ensemble, or for splitting tied values '
        'within the raw ensemble, so that the values from the input '
        'percentiles can be ordered to match the raw ensemble.')

    rebadging = parser.add_argument_group(
        'Rebadging options', 'Options for rebadging the input percentiles '
        'as ensemble realizations.')
    rebadging.add_argument('--realization_numbers',
                           default=None,
                           metavar='REALIZATION_NUMBERS',
                           nargs="+",
                           help='A list of ensemble realization numbers to '
                           'use when rebadging the percentiles '
                           'into realizations.')

    args = parser.parse_args(args=argv)

    # CLI argument checking:
    # Can only do one of reordering or rebadging: if options are passed which
    # correspond to the opposite method, raise an exception.
    # Note: Shouldn't need to check that both/none are set, since they are
    # defined as mandatory, but mutually exclusive, options.
    if args.reordering:
        if args.realization_numbers is not None:
            parser.wrong_args_error('realization_numbers', 'reordering')
    if args.rebadging:
        if ((args.raw_forecast_filepath is not None)
                or (args.random_ordering is not False)):
            parser.wrong_args_error('raw_forecast_filepath, random_ordering',
                                    'rebadging')

    # Safe to now actually do the work...
    cube = load_cube(args.input_filepath)

    result_cube = ResamplePercentiles(
        ecc_bounds_warning=args.ecc_bounds_warning).process(
            cube,
            no_of_percentiles=args.no_of_percentiles,
            sampling=args.sampling_method)

    if args.reordering:
        raw_forecast = load_cube(args.raw_forecast_filepath)
        result_cube = EnsembleReordering().process(
            result_cube,
            raw_forecast,
            random_ordering=args.random_ordering,
            random_seed=args.random_seed)
    elif args.rebadging:
        if args.realization_numbers is not None:
            args.realization_numbers = ([
                int(num) for num in args.realization_numbers
            ])
        result_cube = RebadgePercentilesAsRealizations().process(
            result_cube, ensemble_realization_numbers=args.realization_numbers)

    save_netcdf(result_cube, args.output_filepath)
def process(cube,
            raw_forecast=None,
            no_of_realizations=None,
            reordering=False,
            rebadging=False,
            random_seed=None,
            ecc_bounds_warning=False):
    """Convert from probabilities to ensemble realizations.

    Args:
        cube (iris.cube.Cube):
            Cube to be processed.
        raw_forecast (iris.cube.Cube):
            Cube of raw (not post processed) weather data.
            This option is compulsory, if the reordering option is selected.
        no_of_realizations (int):
            Optional definition of the number of ensemble realizations to
            be generated. These are generated though an intermediate
            percentile representation. Theses percentiles will be
            distributed regularly with the aim of dividing into blocks
            of equal probability. If the reordering option is specified
            and the number of realization is not given the number
            of realizations is taken from the number of realizations
            in the raw forecast cube.
            Default is None.
        reordering (bool):
            The option used to create ensemble realizations from percentiles
            by reordering the input percentiles based on the order of the
            raw ensemble.
            Default is False.
        rebadging (bool):
            Th option used to create ensemble realizations from percentiles
            by rebadging the input percentiles.
            Default is False.
        random_seed (int):
            Option to specify a value for the random seed for testing
            purposes, otherwise the default random seed behaviours is
            utilised. The random seed is used in the generation of the
            random numbers used for splitting tied values within the raw
            ensemble, so that the values from the input percentiles can
            be ordered to match the raw ensemble.
            Default is None.
        ecc_bounds_warning (bool):
            If True, where percentiles (calculated as an intermediate output
            before realization) exceed to ECC bounds range, raises a warning
            rather than an exception.
            Default is False.

    Returns:
        iris.cube.Cube:
            Processed result Cube.

    Raises:
        TypeError:
            If rebadging is used with raw_forecast.
        TypeError:
            If rebadging is used with random_seed.
        ValueError:
            If raw_forecast isn't supplied when using reordering.
    """
    if rebadging:
        if raw_forecast is not None:
            raise TypeError('rebadging cannot be used with raw_forecast.')
        if random_seed is not None:
            raise TypeError('rebadging cannot be used with random_seed.')

    if reordering:
        no_of_realizations = no_of_realizations
        # If no_of_realizations is not given, take the number from the raw
        # ensemble cube.
        if no_of_realizations is None:
            no_of_realizations = len(raw_forecast.coord("realization").points)
            if raw_forecast is None:
                message = ("You must supply a raw forecast cube if using the "
                           "reordering option.")
                raise ValueError(message)

        cube = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=ecc_bounds_warning).process(
                cube, no_of_percentiles=no_of_realizations)
        result = EnsembleReordering().process(cube,
                                              raw_forecast,
                                              random_ordering=False,
                                              random_seed=random_seed)
    elif rebadging:
        cube = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=ecc_bounds_warning).process(
                cube, no_of_percentiles=no_of_realizations)
        result = RebadgePercentilesAsRealizations().process(cube)
    return result
def process(cube: cli.inputcube,
            raw_cube: cli.inputcube = None,
            *,
            realizations_count: int = None,
            random_seed: int = None,
            ignore_ecc_bounds=False):
    """Convert probabilities to ensemble realizations using Ensemble Copula
    Coupling.

    Probabilities are first converted to percentiles, which are then either
    rebadged as realizations or reordered if the raw_cube argument is given.

    Args:
        cube (iris.cube.Cube):
            Cube to be processed.
        raw_cube (iris.cube.Cube):
            Cube of raw (not post processed) weather data.
            If this argument is given ensemble realizations will be created
            from percentiles by reshuffling them in correspondence to the rank
            order of the raw ensemble. Otherwise, the percentiles are rebadged
            as realizations.
        realizations_count (int):
            Optional definition of the number of ensemble realizations to
            be generated. These are generated though an intermediate
            percentile representation. Theses percentiles will be
            distributed regularly with the aim of dividing into blocks
            of equal probability. If the raw_cube is given
            and the number of realization is not given the number
            of realizations is taken from the number of realizations
            in the raw_cube.
        random_seed (int):
            Option to specify a value for the random seed for testing
            purposes, otherwise the default random seed behaviours is
            utilised. The random seed is used in the generation of the
            random numbers used for splitting tied values within the raw
            ensemble, so that the values from the input percentiles can
            be ordered to match the raw ensemble.
        ignore_ecc_bounds (bool):
            If True, where percentiles (calculated as an intermediate output
            before realization) exceed to ECC bounds range, raises a warning
            rather than an exception.

    Returns:
        iris.cube.Cube:
            Processed result Cube.
    """
    from improver.ensemble_copula_coupling.ensemble_copula_coupling import (
        ConvertProbabilitiesToPercentiles, RebadgePercentilesAsRealizations,
        EnsembleReordering)

    if realizations_count is None and raw_cube:
        # If realizations_count is not given, take the number from the raw
        # ensemble cube.
        realizations_count = len(raw_cube.coord("realization").points)

    result = ConvertProbabilitiesToPercentiles(
        ecc_bounds_warning=ignore_ecc_bounds)(
            cube, no_of_percentiles=realizations_count)

    if raw_cube:
        result = EnsembleReordering()(result,
                                      raw_cube,
                                      random_ordering=False,
                                      random_seed=random_seed)
    else:
        result = RebadgePercentilesAsRealizations()(result)

    return result
Esempio n. 24
0
def main(argv=None):
    """Convert from probabilities to ensemble realizations via a CLI."""

    cli_specific_arguments = [(['--no_of_realizations'], {
        'metavar':
        'NUMBER_OF_REALIZATIONS',
        'default':
        None,
        'type':
        int,
        'help':
        ("Optional definition of the number of ensemble realizations to "
         "be generated. These are generated through an intermediate "
         "percentile representation. These percentiles will be "
         "distributed regularly with the aim of dividing into blocks of "
         "equal probability. If the reordering option is specified and "
         "the number of realizations is not given then the number of "
         "realizations is taken from the number of realizations in the "
         "raw forecast NetCDF file.")
    })]

    cli_definition = {
        'central_arguments': ('input_file', 'output_file'),
        'specific_arguments':
        cli_specific_arguments,
        'description': ('Convert a dataset containing '
                        'probabilities into one containing '
                        'ensemble realizations.')
    }
    parser = ArgParser(**cli_definition)
    # add mutually exculsive options rebadge and reorder.
    # If reordering add option for raw ensemble - raise error if
    # raw ens missing.
    group = parser.add_mutually_exclusive_group(required=True)

    group.add_argument('--reordering',
                       default=False,
                       action='store_true',
                       help='The option used to create ensemble realizations '
                       'from percentiles by reordering the input '
                       'percentiles based on the order of the '
                       'raw ensemble forecast.')
    group.add_argument('--rebadging',
                       default=False,
                       action='store_true',
                       help='The option used to create ensemble realizations '
                       'from percentiles by rebadging the input '
                       'percentiles.')

    # If reordering, we need a raw ensemble forecast.
    reordering = parser.add_argument_group(
        'Reordering options', 'Options for reordering the input percentiles '
        'using the raw ensemble forecast as required to create ensemble '
        'realizations.')
    reordering.add_argument('--raw_forecast_filepath',
                            metavar='RAW_FORECAST_FILE',
                            help='A path to an raw forecast NetCDF file to be '
                            'processed. This option is compulsory, if the '
                            'reordering option is selected.')
    reordering.add_argument(
        '--random_seed',
        default=None,
        help='Option to specify a value for the random seed for testing '
        'purposes, otherwise, the default random seed behaviour is '
        'utilised. The random seed is used in the generation of the '
        'random numbers used for splitting tied values '
        'within the raw ensemble, so that the values from the input '
        'percentiles can be ordered to match the raw ensemble.')
    reordering.add_argument(
        '--ecc_bounds_warning',
        default=False,
        action='store_true',
        help='If True, where percentiles (calculated as an intermediate '
        'output before realizations) exceed the ECC bounds range, raise '
        'a warning rather than an exception.')

    args = parser.parse_args(args=argv)

    # CLI argument checking:
    # Can only do one of reordering or rebadging: if options are passed which
    # correspond to the opposite method, raise an exception.
    # Note: Shouldn't need to check that both/none are set, since they are
    # defined as mandatory, but mutually exclusive, options.
    if args.rebadging:
        if ((args.raw_forecast_filepath is not None)
                or (args.random_seed is not None)):
            parser.wrong_args_error('raw_forecast_filepath, random_seed',
                                    'rebadging')

    # Process the data
    cube = load_cube(args.input_filepath)

    if args.reordering:
        if args.raw_forecast_filepath is None:
            message = ("You must supply a raw forecast filepath if using the "
                       "reordering option.")
            raise ValueError(message)
        else:
            raw_forecast = load_cube(args.raw_forecast_filepath)
            try:
                raw_forecast.coord("realization")
            except CoordinateNotFoundError:
                message = ("The netCDF file from the raw_forecast_filepath "
                           "must have a realization coordinate.")
                raise ValueError(message)

        no_of_realizations = args.no_of_realizations
        # If no_of_realizations is not given, take the number from the raw
        # ensemble cube.
        if args.no_of_realizations is None:
            no_of_realizations = len(raw_forecast.coord("realization").points)

        cube = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=args.ecc_bounds_warning).process(
                cube, no_of_percentiles=no_of_realizations)
        cube = EnsembleReordering().process(cube,
                                            raw_forecast,
                                            random_ordering=False,
                                            random_seed=args.random_seed)
    elif args.rebadging:
        cube = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=args.ecc_bounds_warning).process(
                cube, no_of_percentiles=args.no_of_realizations)
        cube = RebadgePercentilesAsRealizations().process(cube)

    save_netcdf(cube, args.output_filepath)
def process(cube, raw_forecast=None, no_of_percentiles=None,
            sampling_method='quantile', ecc_bounds_warning=False,
            reordering=False, rebadging=False, random_ordering=False,
            random_seed=None, realization_numbers=None):
    """Runs Ensemble Copula Coupling processing.

    Converts a dataset containing percentiles into one containing ensemble
    realizations using Ensemble Coupla Coupling.

    Args:
        cube (iris.cube.Cube):
            Cube expected to contain a percentiles coordinate.
        raw_forecast (iris.cube.Cube):
            Cube of raw (not post processed) weather data.
            This option is compulsory, if the reordering option is selected.
        no_of_percentiles (int):
            The number of percentiles to be generated. This is also equal to
            the number of ensemble realizations that will be generated.
            Default is None.
        sampling_method (str):
            Method to be used for generating the list of percentiles with
            forecasts generated at each percentile. The options are "quantile"
            and "random".
            The quantile option produces equally spaced percentiles which is
            the preferred option for full ensemble couple coupling with
            reordering enabled.
            Default is 'quantile'.
        ecc_bounds_warning (bool):
            If True where percentiles (calculated as an intermediate output
            before realization) exceed the ECC bounds range, raises a
            warning rather than an exception.
            Default is False.
        reordering (bool):
            The option used to create ensemble realizations from percentiles
            by reordering the input percentiles based on the order of the
            raw ensemble forecast.
            Default is False.
        rebadging (bool):
            The option used to create ensemble realizations from percentiles
            by rebadging the input percentiles.
            Default is False.
        random_ordering (bool):
            If random_ordering is True, the post-processed forecasts are
            reordered randomly, rather than using the ordering of the
            raw ensemble.
            Default is False.
        random_seed (int):
            Option to specify a value for the random seed for testing purposes,
            otherwise, the default random seed behaviour is utilised.
            The random seed is used in the generation of the random numbers
            used for either the random_ordering option to order the input
            percentiles randomly, rather than use the ordering from the
            raw ensemble, or for splitting tied values within the raw ensemble
            so that the values from the input percentiles can be ordered to
            match the raw ensemble.
            Default is None.
        realization_numbers (list of ints):
            A list of ensemble realization numbers to use when rebadging the
            percentiles into realizations.
            Default is None.

    Returns:
        iris.cube.Cube:
            The processed Cube.
    """
    if reordering:
        if realization_numbers is not None:
            raise TypeError('realization_numbers cannot be used with '
                            'reordering.')
    if rebadging:
        if raw_forecast is not None:
            raise TypeError('rebadging cannot be used with raw_forecast.')
    if rebadging:
        if random_ordering is not False:
            raise TypeError('rebadging cannot be used with random_ordering.')

    result = ResamplePercentiles(
        ecc_bounds_warning=ecc_bounds_warning).process(
        cube, no_of_percentiles=no_of_percentiles,
        sampling=sampling_method)
    if reordering:
        result = EnsembleReordering().process(
            result, raw_forecast, random_ordering=random_ordering,
            random_seed=random_seed)
    elif rebadging:
        result = RebadgePercentilesAsRealizations().process(
            result, ensemble_realization_numbers=realization_numbers)
    return result
Esempio n. 26
0
def process(
    cube: cli.inputcube,
    raw_cube: cli.inputcube = None,
    *,
    realizations_count: int = None,
    random_seed: int = None,
    ignore_ecc_bounds=False,
):
    """Converts an incoming cube into one containing realizations.

    Args:
        cube (iris.cube.Cube):
            A cube to be processed.
        raw_cube (iris.cube.Cube):
            Cube of raw (not post processed) weather data.
            If this argument is given ensemble realizations will be created
            from percentiles by reshuffling them in correspondence to the rank
            order of the raw ensemble. Otherwise, the percentiles are rebadged
            as realizations.
        realizations_count (int):
            The number of ensemble realizations in the output.
        random_seed (int):
            Option to specify a value for the random seed when reordering percentiles.
            This value is for testing purposes only, to ensure reproduceable outputs.
            It should not be used in real time operations as it may introduce a bias
            into the reordered forecasts.
        ignore_ecc_bounds (bool):
            If True where percentiles (calculated as an intermediate output
            before realization) exceed the ECC bounds range, raises a
            warning rather than an exception.

    Returns:
        iris.cube.Cube:
            The processed cube.
    """
    from improver.ensemble_copula_coupling.ensemble_copula_coupling import (
        ConvertProbabilitiesToPercentiles,
        EnsembleReordering,
        RebadgePercentilesAsRealizations,
        ResamplePercentiles,
    )
    from improver.metadata.probabilistic import is_probability

    if cube.coords("realization"):
        return cube

    if not cube.coords("percentile") and not is_probability(cube):
        raise ValueError("Unable to convert to realizations:\n" + str(cube))

    if realizations_count is None:
        try:
            realizations_count = len(raw_cube.coord("realization").points)
        except AttributeError:
            # raised if raw_cube is None, hence has no attribute "coord"
            msg = "Either realizations_count or raw_cube must be provided"
            raise ValueError(msg)

    if cube.coords("percentile"):
        percentiles = ResamplePercentiles(
            ecc_bounds_warning=ignore_ecc_bounds)(
                cube, no_of_percentiles=realizations_count)
    else:
        percentiles = ConvertProbabilitiesToPercentiles(
            ecc_bounds_warning=ignore_ecc_bounds)(
                cube, no_of_percentiles=realizations_count)

    if raw_cube:
        result = EnsembleReordering()(percentiles,
                                      raw_cube,
                                      random_seed=random_seed)
    else:
        result = RebadgePercentilesAsRealizations()(percentiles)

    return result