예제 #1
0
    def test_exception_for_missing_forecast_inputs(self):
        """Test that when all forecasts are missing an exception is raised."""

        self.realization_forecasts = []

        msg = 'Missing historical forecast input.'
        with self.assertRaisesRegex(IOError, msg):
            split_forecasts_and_truth(
                self.realization_forecasts + self.realization_truths +
                [self.landsea_mask], self.truth_attribute)
예제 #2
0
    def test_exception_for_multiple_land_sea_masks(self):
        """Test that when multiple land-sea masks are provided an exception is
        raised."""

        msg = 'Expected one cube for land-sea mask.'
        with self.assertRaisesRegex(IOError, msg):
            split_forecasts_and_truth(
                self.realization_forecasts + self.realization_truths +
                [self.landsea_mask, self.landsea_mask],
                self.truth_attribute)
예제 #3
0
    def test_exception_for_unintended_cube_combination(self):
        """Test that when the forecast and truth cubes have different names,
        indicating different diagnostics, an exception is raised."""

        self.realization_truths[0].rename('kitten_density')

        msg = 'Must have cubes with 1 or 2 distinct names.'
        with self.assertRaisesRegex(ValueError, msg):
            split_forecasts_and_truth(
                self.realization_forecasts + self.realization_truths +
                [self.landsea_mask, self.landsea_mask],
                self.truth_attribute)
def process(
    *cubes: cli.inputcube,
    truth_attribute,
    n_probability_bins: int = 5,
    single_value_lower_limit: bool = False,
    single_value_upper_limit: bool = False,
    aggregate_coordinates: cli.comma_separated_list = None,
):
    """Populate reliability tables for use in reliability calibration.

    Loads historical forecasts and gridded truths that are compared to build
    reliability tables. Reliability tables are returned as a cube with a
    leading threshold dimension that matches that of the forecast probability
    cubes and the thresholded truth.

    Args:
        cubes (list of iris.cube.Cube):
            A list of cubes containing the historical probability forecasts and
            corresponding truths used for calibration. These cubes must include
            the same diagnostic name in their names, and must both have
            equivalent threshold coordinates. The cubes will be distinguished
            using the user provided truth attribute.
        truth_attribute (str):
            An attribute and its value in the format of "attribute=value",
            which must be present on truth cubes.
        n_probability_bins (int):
            The total number of probability bins required in the reliability
            tables. If single value limits are turned on, these are included in
            this total. If using single_value_limits this value must be at
            least 3.
        single_value_lower_limit (bool):
            Mandates that the lowest bin should be single valued, with a small
            precision tolerance, defined as 1.0E-6. The bin is thus 0 to 1.0E-6.
        single_value_upper_limit (bool):
            Mandates that the highest bin should be single valued, with a small
            precision tolerance, defined as 1.0E-6. The bin is thus (1 - 1.0E-6) to 1.
        aggregate_coordinates (List[str]):
            An optional list of coordinates over which to aggregate the reliability
            calibration table using summation. This is equivalent to constructing
            then using aggregate-reliability-tables but with reduced memory
            usage due to avoiding large intermediate data.

    Returns:
        iris.cube.Cube:
            Reliability tables for the forecast diagnostic with a leading
            threshold coordinate.
    """
    from improver.calibration import split_forecasts_and_truth
    from improver.calibration.reliability_calibration import (
        ConstructReliabilityCalibrationTables, )

    forecast, truth, _ = split_forecasts_and_truth(cubes, truth_attribute)

    return ConstructReliabilityCalibrationTables(
        n_probability_bins=n_probability_bins,
        single_value_lower_limit=single_value_lower_limit,
        single_value_upper_limit=single_value_upper_limit,
    )(forecast, truth, aggregate_coordinates)
예제 #5
0
    def test_realization_data(self):
        """Test that when multiple forecast cubes and truth cubes are provided,
        the groups are created as expected."""

        forecast, truth, land_sea_mask = split_forecasts_and_truth(
            self.realization_forecasts + self.realization_truths, self.truth_attribute
        )

        self.assertIsInstance(forecast, iris.cube.Cube)
        self.assertIsInstance(truth, iris.cube.Cube)
        self.assertIsNone(land_sea_mask, None)
        self.assertSequenceEqual((2, 4, 4), forecast.shape)
        self.assertSequenceEqual((2, 4, 4), truth.shape)
        self.assertTrue(np.all(forecast.data))
        self.assertFalse(np.any(truth.data))
def process(
    *cubes: cli.inputcube,
    truth_attribute,
    n_probability_bins: int = 5,
    single_value_limits: bool = True,
):
    """Populate reliability tables for use in reliability calibration.

    Loads historical forecasts and gridded truths that are compared to build
    reliability tables. Reliability tables are returned as a cube with a
    leading threshold dimension that matches that of the forecast probability
    cubes and the thresholded truth.

    Args:
        cubes (list of iris.cube.Cube):
            A list of cubes containing the historical probability forecasts and
            corresponding truths used for calibration. These cubes must include
            the same diagnostic name in their names, and must both have
            equivalent threshold coordinates. The cubes will be distinguished
            using the user provided truth attribute.
        truth_attribute (str):
            An attribute and its value in the format of "attribute=value",
            which must be present on truth cubes.
        n_probability_bins (int):
            The total number of probability bins required in the reliability
            tables. If single value limits are turned on, these are included in
            this total. If using single_value_limits this value must be at
            least 3.
        single_value_limits (bool):
            Mandates that the extrema bins (0 and 1) should be single valued,
            with a small precision tolerance of 1.0E-6, e.g. 0 to 1.0E-6 for
            the lowest bin, and 1 - 1.0E-6 to 1 for the highest bin.

    Returns:
        iris.cube.Cube:
            Reliability tables for the forecast diagnostic with a leading
            threshold coordinate.
    """
    from improver.calibration import split_forecasts_and_truth
    from improver.calibration.reliability_calibration import (
        ConstructReliabilityCalibrationTables, )

    forecast, truth, _ = split_forecasts_and_truth(cubes, truth_attribute)

    return ConstructReliabilityCalibrationTables(
        n_probability_bins=n_probability_bins,
        single_value_limits=single_value_limits)(forecast, truth)
예제 #7
0
    def test_realization_data_with_land_sea_mask(self):
        """Test that when multiple forecast cubes, truth cubes, and
        a single land-sea mask are provided, the groups are created as
        expected."""

        forecast, truth, land_sea_mask = split_forecasts_and_truth(
            self.realization_forecasts + self.realization_truths +
            [self.landsea_mask], self.truth_attribute)

        self.assertIsInstance(forecast, iris.cube.Cube)
        self.assertIsInstance(truth, iris.cube.Cube)
        self.assertIsInstance(land_sea_mask, iris.cube.Cube)
        self.assertEqual('land_binary_mask', land_sea_mask.name())
        self.assertSequenceEqual((2, 4, 4), forecast.shape)
        self.assertSequenceEqual((2, 4, 4), truth.shape)
        self.assertTrue(np.all(forecast.data))
        self.assertFalse(np.any(truth.data))
        self.assertSequenceEqual((4, 4), land_sea_mask.shape)
def process(*cubes: cli.inputcube,
            distribution,
            truth_attribute,
            cycletime,
            units=None,
            predictor='mean',
            tolerance: float = 0.01,
            max_iterations: int = 1000):
    """Estimate coefficients for Ensemble Model Output Statistics.

    Loads in arguments for estimating coefficients for Ensemble Model
    Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
    Regression (NGR). Two sources of input data must be provided: historical
    forecasts and historical truth data (to use in calibration).
    The estimated coefficients are output as a cube.

    Args:
        cubes (list of iris.cube.Cube):
            A list of cubes containing the historical forecasts and
            corresponding truth used for calibration. They must have the same
            cube name and will be separated based on the truth attribute.
            Optionally this may also contain a single land-sea mask cube on the
            same domain as the historic forecasts and truth (where land points
            are set to one and sea points are set to zero).
        distribution (str):
            The distribution that will be used for calibration. This will be
            dependant upon the input phenomenon.
        truth_attribute (str):
            An attribute and its value in the format of "attribute=value",
            which must be present on historical truth cubes.
        cycletime (str):
            This denotes the cycle at which forecasts will be calibrated using
            the calculated EMOS coefficients. The validity time in the output
            coefficients cube will be calculated relative to this cycletime.
            This cycletime is in the format YYYYMMDDTHHMMZ.
        units (str):
            The units that calibration should be undertaken in. The historical
            forecast and truth will be converted as required.
        predictor (str):
            String to specify the form of the predictor used to calculate the
            location parameter when estimating the EMOS coefficients.
            Currently the ensemble mean ("mean") and the ensemble realizations
            ("realizations") are supported as options.
        tolerance (float):
            The tolerance for the Continuous Ranked Probability Score (CRPS)
            calculated by the minimisation. Once multiple iterations result in
            a CRPS equal to the same value within the specified tolerance, the
            minimisation will terminate.
        max_iterations (int):
            The maximum number of iterations allowed until the minimisation has
            converged to a stable solution. If the maximum number of iterations
            is reached but the minimisation has not yet converged to a stable
            solution, then the available solution is used anyway, and a warning
            is raised. If the predictor is "realizations", then the number of
            iterations may require increasing, as there will be more
            coefficients to solve.

    Returns:
        iris.cube.Cube:
            Cube containing the coefficients estimated using EMOS. The cube
            contains a coefficient_index dimension coordinate and a
            coefficient_name auxiliary coordinate.
    """

    from improver.calibration import split_forecasts_and_truth
    from improver.calibration.ensemble_calibration import (
        EstimateCoefficientsForEnsembleCalibration)

    forecast, truth, land_sea_mask = split_forecasts_and_truth(
        cubes, truth_attribute)

    plugin = EstimateCoefficientsForEnsembleCalibration(
        distribution,
        cycletime,
        desired_units=units,
        predictor=predictor,
        tolerance=tolerance,
        max_iterations=max_iterations)

    return plugin(forecast, truth, landsea_mask=land_sea_mask)
예제 #9
0
def process(
    *cubes: cli.inputcube,
    distribution,
    truth_attribute,
    point_by_point=False,
    use_default_initial_guess=False,
    units=None,
    predictor="mean",
    tolerance: float = 0.02,
    max_iterations: int = 1000,
):
    """Estimate coefficients for Ensemble Model Output Statistics.

    Loads in arguments for estimating coefficients for Ensemble Model
    Output Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
    Regression (NGR). Two sources of input data must be provided: historical
    forecasts and historical truth data (to use in calibration).
    The estimated coefficients are output as a cube.

    Args:
        cubes (list of iris.cube.Cube):
            A list of cubes containing the historical forecasts and
            corresponding truth used for calibration. They must have the same
            cube name and will be separated based on the truth attribute.
            Optionally this may also contain a single land-sea mask cube on the
            same domain as the historic forecasts and truth (where land points
            are set to one and sea points are set to zero).
        distribution (str):
            The distribution that will be used for minimising the
            Continuous Ranked Probability Score when estimating the EMOS
            coefficients. This will be dependent upon the input phenomenon.
        truth_attribute (str):
            An attribute and its value in the format of "attribute=value",
            which must be present on historical truth cubes.
        point_by_point (bool):
            If True, coefficients are calculated independently for each point
            within the input cube by creating an initial guess and minimising
            each grid point independently. If False, a single set of
            coefficients is calculated using all points.
            Warning: This option is memory intensive and is unsuitable for
            gridded input. Using a default initial guess may reduce the memory
            overhead option.
        use_default_initial_guess (bool):
            If True, use the default initial guess. The default initial guess
            assumes no adjustments are required to the initial choice of
            predictor to generate the calibrated distribution. This means
            coefficients of 1 for the multiplicative coefficients and 0 for
            the additive coefficients. If False, the initial guess is computed.
        units (str):
            The units that calibration should be undertaken in. The historical
            forecast and truth will be converted as required.
        predictor (str):
            String to specify the form of the predictor used to calculate the
            location parameter when estimating the EMOS coefficients.
            Currently the ensemble mean ("mean") and the ensemble realizations
            ("realizations") are supported as options.
        tolerance (float):
            The tolerance for the Continuous Ranked Probability Score (CRPS)
            calculated by the minimisation. Once multiple iterations result in
            a CRPS equal to the same value within the specified tolerance, the
            minimisation will terminate.
        max_iterations (int):
            The maximum number of iterations allowed until the minimisation has
            converged to a stable solution. If the maximum number of iterations
            is reached but the minimisation has not yet converged to a stable
            solution, then the available solution is used anyway, and a warning
            is raised. If the predictor is "realizations", then the number of
            iterations may require increasing, as there will be more
            coefficients to solve.

    Returns:
        iris.cube.CubeList:
            CubeList containing the coefficients estimated using EMOS. Each
            coefficient is stored in a separate cube.
    """

    from improver.calibration import split_forecasts_and_truth
    from improver.calibration.ensemble_calibration import (
        EstimateCoefficientsForEnsembleCalibration, )

    forecast, truth, land_sea_mask = split_forecasts_and_truth(
        cubes, truth_attribute)

    plugin = EstimateCoefficientsForEnsembleCalibration(
        distribution,
        point_by_point=point_by_point,
        use_default_initial_guess=use_default_initial_guess,
        desired_units=units,
        predictor=predictor,
        tolerance=tolerance,
        max_iterations=max_iterations,
    )
    return plugin(forecast, truth, landsea_mask=land_sea_mask)