예제 #1
0
 def test_fails_if_data_is_not_cube(self):
     """Test it raises a Type Error if cube is not a cube."""
     msg = ('Expecting data to be an instance of '
            'iris.cube.Cube but is'
            ' {}.'.format(type(self.wg_perc)))
     with self.assertRaisesRegex(TypeError, msg):
         find_percentile_coordinate(self.wg_perc)
예제 #2
0
 def test_fails_if_no_perc_coord(self):
     """Test it raises an Error if there is no percentile coord."""
     msg = "No percentile coord found on"
     cube = self.cube_wg
     cube.remove_coord("percentile")
     with self.assertRaisesRegex(CoordinateNotFoundError, msg):
         find_percentile_coordinate(cube)
예제 #3
0
 def test_fails_if_too_many_perc_coord(self):
     """Test it raises a Value Error if there are too many perc coords."""
     msg = "Too many percentile coords found"
     cube = self.cube_wg
     new_perc_coord = iris.coords.AuxCoord(
         1, long_name="percentile", units="no_unit"
     )
     cube.add_aux_coord(new_perc_coord)
     with self.assertRaisesRegex(ValueError, msg):
         find_percentile_coordinate(cube)
예제 #4
0
    def check_percentile_coord(cube):
        """
        Determines if the cube to be blended has a percentile dimension
        coordinate.

        Args:
            cube (iris.cube.Cube):
                The cube to be checked for a percentile coordinate.
        Returns:
            iris.coords.DimCoord or None:
                None if no percentile dimension coordinate is found. If
                such a coordinate is found it is returned.
        Raises:
            ValueError : If there is a percentile coord and it is not a
                dimension coord in the cube.
            ValueError : If there is a percentile dimension with only one
                point, we need at least two points in order to do the blending.
        """
        try:
            perc_coord = find_percentile_coordinate(cube)
            perc_dim = cube.coord_dims(perc_coord.name())
            if not perc_dim:
                msg = "The percentile coord must be a dimension of the cube."
                raise ValueError(msg)
            # Check the percentile coordinate has more than one point,
            # otherwise raise an error as we won't be able to blend.
            if len(perc_coord.points) < 2.0:
                msg = ("Percentile coordinate does not have enough points"
                       " in order to blend. Must have at least 2 percentiles.")
                raise ValueError(msg)
            return perc_coord
        except CoordinateNotFoundError:
            return None
예제 #5
0
    def extract_percentile_data(cube: Cube, req_percentile: float,
                                standard_name: str) -> Tuple[Cube, Coord]:
        """Extract percentile data from cube.

        Args:
            cube:
                Cube contain one or more percentiles.
            req_percentile:
                Required percentile value
            standard_name:
                Standard name of the data.

        Returns:
            - Cube containing the required percentile data
            - Percentile coordinate.
        """
        if not isinstance(cube, iris.cube.Cube):
            msg = ("Expecting {0:s} data to be an instance of "
                   "iris.cube.Cube but is"
                   " {1}.".format(standard_name, type(cube)))
            raise TypeError(msg)
        perc_coord = find_percentile_coordinate(cube)
        if cube.standard_name != standard_name:
            msg = ("Warning mismatching name for data expecting"
                   " {0:s} but found {1:s}".format(standard_name,
                                                   cube.standard_name))
            warnings.warn(msg)
        constraint = iris.Constraint(
            coord_values={perc_coord.name(): req_percentile})
        result = cube.extract(constraint)
        if result is None:
            msg = "Could not find required percentile " "{0:3.1f} in cube".format(
                req_percentile)
            raise ValueError(msg)
        return result, perc_coord
예제 #6
0
 def setUp(self):
     """ Set up a percentiles cube, plugin instance and orography cube """
     self.percentiles_cube = set_up_percentiles_cube()
     self.percentile_coordinate = find_percentile_coordinate(
         self.percentiles_cube)
     self.new_name = "probability"
     self.plugin_instance = ProbabilitiesFromPercentiles2D(
         self.percentiles_cube, self.new_name)
     self.orography_cube = set_up_threshold_cube()
예제 #7
0
    def process(self, cube):
        """
        Create a cube containing the percentiles as a new dimension.

        What's generated by default is:
            * 15 percentiles - (0%, 5%, 10%, 20%, 25%, 30%, 40%, 50%, 60%,
              70%, 75%, 80%, 90%, 95%, 100%)

        Args:
            cube (iris.cube.Cube):
                Given the collapse coordinate, convert the set of values
                along that coordinate into a PDF and extract percentiles.

        Returns:
            iris.cube.Cube:
                A single merged cube of all the cubes produced by each
                percentile collapse.

        """
        # Store data type and enforce the same type on return.
        data_type = cube.dtype
        # Test that collapse coords are present in cube before proceeding.
        n_collapse_coords = len(self.collapse_coord)
        n_valid_coords = sum([
            test_coord == coord.name() for coord in cube.coords()
            for test_coord in self.collapse_coord
        ])
        # Rename the percentile coordinate to "percentile" and also
        # makes sure that the associated unit is %.
        if n_valid_coords == n_collapse_coords:
            result = collapsed(
                cube,
                self.collapse_coord,
                iris.analysis.PERCENTILE,
                percent=self.percentiles,
                fast_percentile_method=self.fast_percentile_method,
            )

            result.data = result.data.astype(data_type)
            for coord in self.collapse_coord:
                result.remove_coord(coord)
            percentile_coord = find_percentile_coordinate(result)
            result.coord(percentile_coord).rename("percentile")
            result.coord(percentile_coord).units = "%"
            return result

        raise CoordinateNotFoundError(
            "Coordinate '{}' not found in cube passed to {}.".format(
                self.collapse_coord, self.__class__.__name__))
예제 #8
0
    def __init__(self, percentiles_cube, output_name):
        """
        Initialise class. Sets an inverse_ordering (bool) switch to true for
        cases where the percentiled data increases in the opposite sense to the
        percentile coordinate:

                e.g.  0th Percentile - Value = 10
                     10th Percentile - Value = 5
                     20th Percentile - Value = 0

        Args:
            percentiles_cube (iris.cube.Cube):
                The percentiled field from which probabilities will be obtained
                using the input cube. This cube should contain a percentiles
                dimension, with fields of values that correspond to these
                percentiles. The cube passed to the process method will contain
                values of the same diagnostic (e.g. height) as this reference
                cube.
            output_name (str):
                The name of the cube being created,
                e.g.'probability_of_snow_falling_level_below_ground_level'
        """
        self.percentile_coordinate = find_percentile_coordinate(
            percentiles_cube)
        if self.percentile_coordinate.points.shape[0] < 2:
            msg = ("Percentile coordinate has only one value. Interpolation "
                   "using ProbabilitiesFromPercentiles2D requires multiple "
                   "values are provided.")
            raise ValueError(msg)
        self.percentiles_cube = percentiles_cube
        self.output_name = output_name

        # Set inverse_ordering switch
        percentile_slices = percentiles_cube.slices_over(
            self.percentile_coordinate)
        self.inverse_ordering = False
        first_percentile = next(percentile_slices).data
        for percentile_values in percentile_slices:
            last_percentile = percentile_values.data
        if (first_percentile - last_percentile >= 0).all():
            self.inverse_ordering = True
예제 #9
0
    def extract_percentile_data(cube, req_percentile, standard_name):
        """Extract percentile data from cube.

        Args:
            cube (iris.cube.Cube):
                Cube contain one or more percentiles.
            req_percentile (float):
                Required percentile value
            standard_name (str):
                Standard name of the data.

        Returns:
            (tuple): tuple containing:
                **result** (iris.cube.Cube):
                    Cube containing the required percentile data
                **perc_coord** (iris.coords.Coord):
                    Percentile coordinate.

        """
        if not isinstance(cube, iris.cube.Cube):
            msg = ('Expecting {0:s} data to be an instance of '
                   'iris.cube.Cube but is'
                   ' {1}.'.format(standard_name, type(cube)))
            raise TypeError(msg)
        perc_coord = find_percentile_coordinate(cube)
        if cube.standard_name != standard_name:
            msg = ('Warning mismatching name for data expecting'
                   ' {0:s} but found {1:s}'.format(standard_name,
                                                   cube.standard_name))
            warnings.warn(msg)
        constraint = (iris.Constraint(
            coord_values={perc_coord.name(): req_percentile}))
        result = cube.extract(constraint)
        if result is None:
            msg = ('Could not find required percentile '
                   '{0:3.1f} in cube'.format(req_percentile))
            raise ValueError(msg)
        return result, perc_coord
예제 #10
0
 def test_fails_if_data_is_not_cube(self):
     """Test it raises a Type Error if cube is not a cube."""
     msg = "Expecting data to be an instance of iris.cube.Cube "
     with self.assertRaisesRegex(TypeError, msg):
         find_percentile_coordinate(50.0)
예제 #11
0
 def test_basic(self):
     """Test that the function returns a Coord."""
     perc_coord = find_percentile_coordinate(self.cube_wg)
     self.assertIsInstance(perc_coord, iris.coords.Coord)
     self.assertEqual(perc_coord.name(), "percentile")
예제 #12
0
def process(
    neighbour_cube: cli.inputcube,
    cube: cli.inputcube,
    lapse_rate: cli.inputcube = None,
    *,
    apply_lapse_rate_correction=False,
    land_constraint=False,
    similar_altitude=False,
    extract_percentiles: cli.comma_separated_list = None,
    ignore_ecc_bounds=False,
    new_title: str = None,
    suppress_warnings=False,
):
    """Module to run spot data extraction.

    Extract diagnostic data from gridded fields for spot data sites. It is
    possible to apply a temperature lapse rate adjustment to temperature data
    that helps to account for differences between the spot site's real altitude
    and that of the grid point from which the temperature data is extracted.

    Args:
        neighbour_cube (iris.cube.Cube):
            Cube of spot-data neighbours and the spot site information.
        cube (iris.cube.Cube):
            Cube containing the diagnostic data to be extracted.
        lapse_rate (iris.cube.Cube):
            Optional cube containing temperature lapse rates. If this cube is
            provided and a screen temperature cube is being processed, the
            lapse rates will be used to adjust the temperature to better
            represent each spot's site-altitude.
        apply_lapse_rate_correction (bool):
            Use to apply a lapse-rate correction to screen temperature data so
            that the data are a better match the altitude of the spot site for
            which they have been extracted.
        land_constraint (bool):
            Use to select the nearest-with-land-constraint neighbour-selection
            method from the neighbour_cube. This means that the grid points
            should be land points except for sites where none were found within
            the search radius when the neighbour cube was created. May be used
            with similar_altitude.
        similar_altitude (bool):
            Use to select the nearest-with-height-constraint
            neighbour-selection method from the neighbour_cube. These are grid
            points that were found to be the closest in altitude to the spot
            site within the search radius defined when the neighbour cube was
            created. May be used with land_constraint.
        extract_percentiles (list or int):
            If set to a percentile value or a list of percentile values,
            data corresponding to those percentiles will be returned. For
            example "25, 50, 75" will result in the 25th, 50th and 75th
            percentiles being returned from a cube of probabilities,
            percentiles or realizations. Deterministic input data will raise
            a warning message.
            Note that for percentiles inputs, the desired percentile(s) must
            exist in the input cube.
        ignore_ecc_bounds (bool):
            Demotes exceptions where calculated percentiles are outside the ECC
            bounds range to warnings.
        new_title (str):
            New title for the spot-extracted data.  If None, this attribute is
            removed from the output cube since it has no prescribed standard
            and may therefore contain grid information that is no longer
            correct after spot-extraction.
        suppress_warnings (bool):
            Suppress warning output. This option should only be used if it
            is known that warnings will be generated but they are not required.

    Returns:
        iris.cube.Cube:
           Cube of spot data.

    Raises:
        ValueError:
            If the percentile diagnostic cube does not contain the requested
            percentile value.
        ValueError:
            If the lapse rate cube was provided but the diagnostic being
            processed is not air temperature.
        ValueError:
            If the lapse rate cube provided does not have the name
            "air_temperature_lapse_rate"
        ValueError:
            If the lapse rate cube does not contain a single valued height
            coordinate.

    Warns:
        warning:
           If diagnostic cube is not a known probabilistic type.
        warning:
            If a lapse rate cube was provided, but the height of the
            temperature does not match that of the data used.
        warning:
            If a lapse rate cube was not provided, but the option to apply
            the lapse rate correction was enabled.

    """

    import warnings

    import iris
    import numpy as np
    from iris.exceptions import CoordinateNotFoundError

    from improver.ensemble_copula_coupling.ensemble_copula_coupling import (
        ConvertProbabilitiesToPercentiles, )
    from improver.metadata.probabilistic import find_percentile_coordinate
    from improver.percentile import PercentileConverter
    from improver.spotdata.apply_lapse_rate import SpotLapseRateAdjust
    from improver.spotdata.neighbour_finding import NeighbourSelection
    from improver.spotdata.spot_extraction import SpotExtraction
    from improver.utilities.cube_extraction import extract_subcube

    neighbour_selection_method = NeighbourSelection(
        land_constraint=land_constraint,
        minimum_dz=similar_altitude).neighbour_finding_method_name()
    result = SpotExtraction(
        neighbour_selection_method=neighbour_selection_method)(
            neighbour_cube, cube, new_title=new_title)

    # If a probability or percentile diagnostic cube is provided, extract
    # the given percentile if available. This is done after the spot-extraction
    # to minimise processing time; usually there are far fewer spot sites than
    # grid points.
    if extract_percentiles:
        extract_percentiles = [np.float32(x) for x in extract_percentiles]
        try:
            perc_coordinate = find_percentile_coordinate(result)
        except CoordinateNotFoundError:
            if "probability_of_" in result.name():
                result = ConvertProbabilitiesToPercentiles(
                    ecc_bounds_warning=ignore_ecc_bounds)(
                        result, percentiles=extract_percentiles)
                result = iris.util.squeeze(result)
            elif result.coords("realization", dim_coords=True):
                fast_percentile_method = not np.ma.isMaskedArray(result.data)
                result = PercentileConverter(
                    "realization",
                    percentiles=extract_percentiles,
                    fast_percentile_method=fast_percentile_method,
                )(result)
            else:
                msg = ("Diagnostic cube is not a known probabilistic type. "
                       "The {} percentile could not be extracted. Extracting "
                       "data from the cube including any leading "
                       "dimensions.".format(extract_percentiles))
                if not suppress_warnings:
                    warnings.warn(msg)
        else:
            constraint = [
                "{}={}".format(perc_coordinate.name(), extract_percentiles)
            ]
            perc_result = extract_subcube(result, constraint)
            if perc_result is not None:
                result = perc_result
            else:
                msg = ("The percentile diagnostic cube does not contain the "
                       "requested percentile value. Requested {}, available "
                       "{}".format(extract_percentiles,
                                   perc_coordinate.points))
                raise ValueError(msg)
    # Check whether a lapse rate cube has been provided and we are dealing with
    # temperature data and the lapse-rate option is enabled.
    if apply_lapse_rate_correction and lapse_rate:
        if not result.name() == "air_temperature":
            msg = ("A lapse rate cube was provided, but the diagnostic being "
                   "processed is not air temperature and cannot be adjusted.")
            raise ValueError(msg)

        if not lapse_rate.name() == "air_temperature_lapse_rate":
            msg = ("A cube has been provided as a lapse rate cube but does "
                   "not have the expected name air_temperature_lapse_rate: "
                   "{}".format(lapse_rate.name()))
            raise ValueError(msg)

        try:
            lapse_rate_height_coord = lapse_rate.coord("height")
        except (ValueError, CoordinateNotFoundError):
            msg = ("Lapse rate cube does not contain a single valued height "
                   "coordinate. This is required to ensure it is applied to "
                   "equivalent temperature data.")
            raise ValueError(msg)

        # Check the height of the temperature data matches that used to
        # calculate the lapse rates. If so, adjust temperatures using the lapse
        # rate values.
        if cube.coord("height") == lapse_rate_height_coord:
            plugin = SpotLapseRateAdjust(
                neighbour_selection_method=neighbour_selection_method)
            result = plugin(result, neighbour_cube, lapse_rate)
        elif not suppress_warnings:
            warnings.warn(
                "A lapse rate cube was provided, but the height of the "
                "temperature data does not match that of the data used "
                "to calculate the lapse rates. As such the temperatures "
                "were not adjusted with the lapse rates.")

    elif apply_lapse_rate_correction and not lapse_rate:
        if not suppress_warnings:
            warnings.warn(
                "A lapse rate cube was not provided, but the option to "
                "apply the lapse rate correction was enabled. No lapse rate "
                "correction could be applied.")

    # Remove the internal model_grid_hash attribute if present.
    result.attributes.pop("model_grid_hash", None)
    return result
예제 #13
0
    def run(self, cube: Cube) -> None:
        """Populates self-consistent interpreted parameters, or raises collated errors
        describing (as far as posible) how the metadata are a) not self-consistent,
        and / or b) not consistent with the Met Office IMPROVER standard.

        Although every effort has been made to return as much information as possible,
        collated errors may not be complete if the issue is fundamental. The developer
        is advised to rerun this tool after each fix, until no further problems are
        raised.
        """

        # 1) Interpret diagnostic and type-specific metadata, including cell methods
        if cube.name() in ANCILLARIES:
            self.field_type = self.ANCIL
            self.diagnostic = cube.name()
            if cube.cell_methods:
                self.errors.append(f"Unexpected cell methods {cube.cell_methods}")

        elif cube.name() in SPECIAL_CASES:
            self.field_type = self.diagnostic = cube.name()
            if cube.name() == "weather_code":
                for cm in cube.cell_methods:
                    if cm == WXCODE_MODE_CM and cube.name() in WXCODE_NAMES:
                        pass
                    else:
                        self.errors.append(
                            f"Unexpected cell methods {cube.cell_methods}"
                        )
            elif cube.name() == "wind_from_direction":
                if cube.cell_methods:
                    expected = CellMethod(method="mean", coords="realization")
                    if len(cube.cell_methods) > 1 or cube.cell_methods[0] != expected:
                        self.errors.append(
                            f"Unexpected cell methods {cube.cell_methods}"
                        )
            else:
                self.unhandled = True
                return

        else:
            if "probability" in cube.name() and "threshold" in cube.name():
                self.field_type = self.PROB
                self.check_probability_cube_metadata(cube)
            else:
                self.diagnostic = cube.name()
                try:
                    perc_coord = find_percentile_coordinate(cube)
                except CoordinateNotFoundError:
                    coords = get_coord_names(cube)
                    if any(
                        [cube.coord(coord).var_name == "threshold" for coord in coords]
                    ):
                        self.field_type = self.PROB
                        self.check_probability_cube_metadata(cube)
                    else:
                        self.field_type = self.DIAG
                else:
                    self.field_type = self.PERC
                    if perc_coord.name() != PERC_COORD:
                        self.errors.append(
                            f"Percentile coordinate should have name {PERC_COORD}, "
                            f"has {perc_coord.name()}"
                        )

                    if perc_coord.units != "%":
                        self.errors.append(
                            "Percentile coordinate should have units of %, "
                            f"has {perc_coord.units}"
                        )

            self.check_cell_methods(cube)

        # 2) Interpret model and blend information from cube attributes
        self.check_attributes(cube.attributes)

        # 3) Check whether expected coordinates are present
        coords = get_coord_names(cube)
        if "spot_index" in coords:
            self.check_spot_data(cube, coords)

        if self.field_type == self.ANCIL:
            # there is no definitive standard for time coordinates on static ancillaries
            pass
        elif cube.coords("time_in_local_timezone"):
            # For data on local timezones, the time coordinate will match the horizontal
            # dimensions and there will be no forecast period.
            expected_coords = set(LOCAL_TIME_COORDS + UNBLENDED_TIME_COORDS)
            expected_coords.discard("forecast_period")
            self._check_coords_present(coords, expected_coords)
            self._check_coords_are_horizontal(cube, ["time"])
        elif self.blended:
            self._check_coords_present(coords, BLENDED_TIME_COORDS)
        else:
            self._check_coords_present(coords, UNBLENDED_TIME_COORDS)

        # 4) Check points are equal to upper bounds for bounded time coordinates
        for coord in ["time", "forecast_period"]:
            if coord in get_coord_names(cube):
                self._check_coord_bounds(cube, coord)

        # 5) Check datatypes on data and coordinates
        try:
            check_mandatory_standards(cube)
        except ValueError as cause:
            self.errors.append(str(cause))

        # 6) Check multiple realizations only exist for ensemble models
        if self.field_type == self.DIAG:
            try:
                realization_coord = cube.coord("realization")
            except CoordinateNotFoundError:
                pass
            else:
                model_id = cube.attributes.get(self.model_id_attr, "ens")
                if "ens" not in model_id and len(realization_coord.points) > 1:
                    self.errors.append(
                        f"Deterministic model should not have {len(realization_coord.points)} "
                        "realizations"
                    )

        # 7) Raise collated errors if present
        if self.errors:
            raise ValueError("\n".join(self.errors))
예제 #14
0
def process(cube: cli.inputcube,
            coefficients: cli.inputcube = None,
            land_sea_mask: cli.inputcube = None,
            *,
            distribution,
            realizations_count: int = None,
            randomise=False,
            random_seed: int = None,
            ignore_ecc_bounds=False,
            predictor='mean',
            shape_parameters: cli.comma_separated_list = None):
    """Applying coefficients for Ensemble Model Output Statistics.

    Load in arguments for applying coefficients for Ensemble Model Output
    Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
    Regression (NGR). The coefficients are applied to the forecast
    that is supplied, so as to calibrate the forecast. The calibrated
    forecast is written to a cube. If no coefficients are provided the input
    forecast is returned unchanged.

    Args:
        cube (iris.cube.Cube):
            A Cube containing the forecast to be calibrated. The input format
            could be either realizations, probabilities or percentiles.
        coefficients (iris.cube.Cube):
            A cube containing the coefficients used for calibration or None.
            If none then then input is returned unchanged.
        land_sea_mask (iris.cube.Cube):
            A cube containing the land-sea mask on the same domain as the
            forecast that is to be calibrated. Land points are "
            "specified by ones and sea points are specified by zeros. "
            "If not None this argument will enable land-only calibration, in "
            "which sea points are returned without the application of "
            "calibration."
        distribution (str):
            The distribution for constructing realizations, percentiles or
            probabilities. This should typically match the distribution used
            for minimising the Continuous Ranked Probability Score when
            estimating the EMOS coefficients. The distributions available are
            those supported by :data:`scipy.stats`.
        realizations_count (int):
            Option to specify the number of ensemble realizations that will be
            created from probabilities or percentiles for input into EMOS.
        randomise (bool):
            Option to reorder the post-processed forecasts randomly. If not
            set, the ordering of the raw ensemble is used. This option is
            only valid when the input format is realizations.
        random_seed (int):
            Option to specify a value for the random seed for testing
            purposes, otherwise the default random seen behaviour is utilised.
            The random seed is used in the generation of the random numbers
            used for either the randomise option to order the input
            percentiles randomly, rather than use the ordering from the raw
            ensemble, or for splitting tied values within the raw ensemble,
            so that the values from the input percentiles can be ordered to
            match the raw ensemble.
        ignore_ecc_bounds (bool):
            If True, where the percentiles exceed the ECC bounds range,
            raises a warning rather than an exception. This occurs when the
            current forecasts is in the form of probabilities and is
            converted to percentiles, as part of converting the input
            probabilities into realizations.
        predictor (str):
            String to specify the form of the predictor used to calculate
            the location parameter when estimating the EMOS coefficients.
            Currently the ensemble mean ("mean") and the ensemble
            realizations ("realizations") are supported as the predictors.
        shape_parameters (float or str):
            The shape parameters required for defining the distribution
            specified by the distribution argument. The shape parameters
            should either be a number or 'inf' or '-inf' to represent
            infinity. Further details about appropriate shape parameters
            are available in scipy.stats. For the truncated normal
            distribution with a lower bound of zero, as available when
            estimating EMOS coefficients, the appropriate shape parameters
            are 0 and inf.

    Returns:
        iris.cube.Cube:
            The calibrated forecast cube.

    Raises:
        ValueError:
            If the current forecast is a coefficients cube.
        ValueError:
            If the coefficients cube does not have the right name of
            "emos_coefficients".
        ValueError:
            If the forecast type is 'percentiles' or 'probabilities' and the
            realizations_count argument is not provided.
    """
    import warnings

    import numpy as np
    from iris.exceptions import CoordinateNotFoundError

    from improver.calibration.ensemble_calibration import (
        ApplyCoefficientsFromEnsembleCalibration)
    from improver.ensemble_copula_coupling.ensemble_copula_coupling import (
        EnsembleReordering, ConvertLocationAndScaleParametersToPercentiles,
        ConvertLocationAndScaleParametersToProbabilities,
        ConvertProbabilitiesToPercentiles, RebadgePercentilesAsRealizations,
        ResamplePercentiles)
    from improver.calibration.utilities import merge_land_and_sea
    from improver.metadata.probabilistic import find_percentile_coordinate

    current_forecast = cube

    if current_forecast.name() in ['emos_coefficients', 'land_binary_mask']:
        msg = "The current forecast cube has the name {}"
        raise ValueError(msg.format(current_forecast.name()))

    if coefficients is None:
        msg = ("There are no coefficients provided for calibration. The "
               "uncalibrated forecast will be returned.")
        warnings.warn(msg)
        return current_forecast

    if coefficients.name() != 'emos_coefficients':
        msg = ("The current coefficients cube does not have the "
               "name 'emos_coefficients'")
        raise ValueError(msg)

    if land_sea_mask and land_sea_mask.name() != 'land_binary_mask':
        msg = ("The land_sea_mask cube does not have the "
               "name 'land_binary_mask'")
        raise ValueError(msg)

    original_current_forecast = current_forecast.copy()
    try:
        find_percentile_coordinate(current_forecast)
        input_forecast_type = "percentiles"
    except CoordinateNotFoundError:
        input_forecast_type = "realizations"

    if current_forecast.name().startswith("probability_of"):
        input_forecast_type = "probabilities"
        conversion_plugin = ConvertProbabilitiesToPercentiles(
            ecc_bounds_warning=ignore_ecc_bounds)
    elif input_forecast_type == "percentiles":
        # Initialise plugin to resample percentiles so that the percentiles are
        # evenly spaced.
        conversion_plugin = ResamplePercentiles(
            ecc_bounds_warning=ignore_ecc_bounds)

    if input_forecast_type in ["percentiles", "probabilities"]:
        if not realizations_count:
            raise ValueError(
                "The current forecast has been provided as {0}. "
                "These {0} need to be converted to realizations "
                "for ensemble calibration. The realizations_count "
                "argument is used to define the number of realizations "
                "to construct from the input {0}, so if the "
                "current forecast is provided as {0} then "
                "realizations_count must be defined.".format(
                    input_forecast_type))
        current_forecast = conversion_plugin.process(
            current_forecast, no_of_percentiles=realizations_count)
        current_forecast = (
            RebadgePercentilesAsRealizations().process(current_forecast))

    # Apply coefficients as part of Ensemble Model Output Statistics (EMOS).
    ac = ApplyCoefficientsFromEnsembleCalibration(predictor=predictor)
    location_parameter, scale_parameter = ac.process(
        current_forecast, coefficients, landsea_mask=land_sea_mask)

    if shape_parameters:
        shape_parameters = [np.float32(x) for x in shape_parameters]

    # Convert the output forecast type (i.e. realizations, percentiles,
    # probabilities) to match the input forecast type.
    if input_forecast_type == "probabilities":
        result = ConvertLocationAndScaleParametersToProbabilities(
            distribution=distribution,
            shape_parameters=shape_parameters).process(
                location_parameter, scale_parameter, original_current_forecast)
    elif input_forecast_type == "percentiles":
        perc_coord = find_percentile_coordinate(original_current_forecast)
        result = ConvertLocationAndScaleParametersToPercentiles(
            distribution=distribution,
            shape_parameters=shape_parameters).process(
                location_parameter,
                scale_parameter,
                original_current_forecast,
                percentiles=perc_coord.points)
    elif input_forecast_type == "realizations":
        # Ensemble Copula Coupling to generate realizations
        # from the location and scale parameter.
        no_of_percentiles = len(current_forecast.coord('realization').points)
        percentiles = ConvertLocationAndScaleParametersToPercentiles(
            distribution=distribution,
            shape_parameters=shape_parameters).process(
                location_parameter,
                scale_parameter,
                original_current_forecast,
                no_of_percentiles=no_of_percentiles)
        result = EnsembleReordering().process(percentiles,
                                              current_forecast,
                                              random_ordering=randomise,
                                              random_seed=random_seed)
    if land_sea_mask:
        # Fill in masked sea points with uncalibrated data.
        merge_land_and_sea(result, original_current_forecast)
    return result
예제 #15
0
def process(current_forecast, coeffs, landsea_mask, num_realizations=None,
            random_ordering=False, random_seed=None,
            ecc_bounds_warning=False, predictor_of_mean='mean'):
    """Applying coefficients for Ensemble Model Output Statistics.

    Load in arguments for applying coefficients for Ensemble Model Output
    Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
    Regression (NGR). The coefficients are applied to the forecast
    that is supplied, so as to calibrate the forecast. The calibrated
    forecast is written to a cube. If no coefficients are provided the input
    forecast is returned unchanged.

    Args:
        current_forecast (iris.cube.Cube):
            A Cube containing the forecast to be calibrated. The input format
            could be either realizations, probabilities or percentiles.
        coeffs (iris.cube.Cube or None):
            A cube containing the coefficients used for calibration or None.
            If none then then current_forecast is returned unchanged.
        landsea_mask (iris.cube.Cube or None):
            A cube containing the land-sea mask on the same domain as the
            forecast that is to be calibrated. Land points are "
            "specified by ones and sea points are specified by zeros. "
            "If not None this argument will enable land-only calibration, in "
            "which sea points are returned without the application of "
            "calibration."
        num_realizations (numpy.int32):
            Optional argument to specify the number of ensemble realizations
            to produce. If the current forecast is input as probabilities or
            percentiles then this argument is used to create the requested
            number of realizations. In addition, this argument is used to
            construct the requested number of realizations from the mean and
            variance output after applying the EMOS coefficients.
            Default is None.
        random_ordering (bool):
            Option to reorder the post-processed forecasts randomly. If not
            set, the ordering of the raw ensemble is used. This option is
            only valid when the input format is realizations.
            Default is False.
        random_seed (int):
            Option to specify a value for the random seed for testing
            purposes, otherwise the default random seen behaviour is utilised.
            The random seed is used in the generation of the random numbers
            used for either the random_ordering option to order the input
            percentiles randomly, rather than use the ordering from the raw
            ensemble, or for splitting tied values within the raw ensemble,
            so that the values from the input percentiles can be ordered to
            match the raw ensemble.
            Default is None.
        ecc_bounds_warning (bool):
            If True, where the percentiles exceed the ECC bounds range,
            raises a warning rather than an exception. This occurs when the
            current forecasts is in the form of probabilities and is
            converted to percentiles, as part of converting the input
            probabilities into realizations.
            Default is False.
        predictor_of_mean (str):
            String to specify the predictor used to calibrate the forecast
            mean. Currently the ensemble mean "mean" as the ensemble
            realization "realization" are supported as options.
            Default is 'mean'

    Returns:
        iris.cube.Cube:
            The calibrated forecast cube.

    Raises:
        ValueError:
            If the current forecast is a coefficients cube.
        ValueError:
            If the coefficients cube does not have the right name of
            "emos_coefficients".
        ValueError:
            If the forecast type is 'percentiles' or 'probabilities' while no
            num_realizations are given.

    """
    if coeffs is None:
        msg = ("There are no coefficients provided for calibration. The "
               "uncalibrated forecast will be returned.")
        warnings.warn(msg)
        return current_forecast

    elif coeffs.name() != 'emos_coefficients':
        msg = ("The current coefficients cube does not have the "
               "name 'emos_coefficients'")
        raise ValueError(msg)

    if current_forecast.name() == 'emos_coefficients':
        msg = "The current forecast cube has the name 'emos_coefficients'"
        raise ValueError(msg)

    original_current_forecast = current_forecast.copy()
    try:
        find_percentile_coordinate(current_forecast)
        input_forecast_type = "percentiles"
    except CoordinateNotFoundError:
        input_forecast_type = "realizations"

    if current_forecast.name().startswith("probability_of"):
        input_forecast_type = "probabilities"
        # If probabilities, convert to percentiles.
        conversion_plugin = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=ecc_bounds_warning)
    elif input_forecast_type == "percentiles":
        # If percentiles, resample percentiles so that the percentiles are
        # evenly spaced.
        conversion_plugin = ResamplePercentiles(
            ecc_bounds_warning=ecc_bounds_warning)

    # If percentiles, re-sample percentiles and then re-badge.
    # If probabilities, generate percentiles and then re-badge.
    if input_forecast_type in ["percentiles", "probabilities"]:
        if not num_realizations:
            raise ValueError(
                "The current forecast has been provided as {0}. "
                "These {0} need to be converted to realizations "
                "for ensemble calibration. The num_realizations "
                "argument is used to define the number of realizations "
                "to construct from the input {0}, so if the "
                "current forecast is provided as {0} then "
                "num_realizations must be defined.".format(
                    input_forecast_type))
        current_forecast = conversion_plugin.process(
            current_forecast, no_of_percentiles=num_realizations)
        current_forecast = (
            RebadgePercentilesAsRealizations().process(current_forecast))

    # Default number of ensemble realizations is the number in
    # the raw forecast.
    if not num_realizations:
        num_realizations = len(
            current_forecast.coord('realization').points)

    # Apply coefficients as part of Ensemble Model Output Statistics (EMOS).
    ac = ApplyCoefficientsFromEnsembleCalibration(
        predictor_of_mean_flag=predictor_of_mean)
    calibrated_predictor, calibrated_variance = ac.process(
        current_forecast, coeffs, landsea_mask=landsea_mask)

    # If input forecast is probabilities, convert output into probabilities.
    # If input forecast is percentiles, convert output into percentiles.
    # If input forecast is realizations, convert output into realizations.
    if input_forecast_type == "probabilities":
        result = GenerateProbabilitiesFromMeanAndVariance().process(
            calibrated_predictor, calibrated_variance,
            original_current_forecast)
    elif input_forecast_type == "percentiles":
        perc_coord = find_percentile_coordinate(original_current_forecast)
        result = GeneratePercentilesFromMeanAndVariance().process(
            calibrated_predictor, calibrated_variance,
            percentiles=perc_coord.points)
    elif input_forecast_type == "realizations":
        # Ensemble Copula Coupling to generate realizations
        # from mean and variance.
        percentiles = GeneratePercentilesFromMeanAndVariance().process(
            calibrated_predictor, calibrated_variance,
            no_of_percentiles=num_realizations)
        result = EnsembleReordering().process(
            percentiles, current_forecast,
            random_ordering=random_ordering, random_seed=random_seed)
    return result
예제 #16
0
def process(neighbour_cube,
            diagnostic_cube,
            lapse_rate_cube=None,
            apply_lapse_rate_correction=False,
            land_constraint=False,
            minimum_dz=False,
            extract_percentiles=None,
            ecc_bounds_warning=False,
            attributes_dict=None,
            suppress_warnings=False):
    """Module to run spot data extraction.

    Extract diagnostic data from gridded fields for spot data sites. It is
    possible to apply a temperature lapse rate adjustment to temperature data
    that helps to account for differences between the spot site's real altitude
    and that of the grid point from which the temperature data is extracted.

    Args:
        neighbour_cube (iris.cube.Cube):
            Cube of spot-data neighbours and the spot site information.
        diagnostic_cube (iris.cube.Cube):
            Cube containing the diagnostic data to be extracted.
        lapse_rate_cube (iris.cube.Cube):
            Cube containing temperature lapse rates. If this cube is provided
            and a screen temperature cube is being processed, the lapse rates
            will be used to adjust the temperature to better represent each
            spot's site-altitude.
        apply_lapse_rate_correction (bool):
            If True, and a lapse rate cube has been provided, extracted
            screen temperature will be adjusted to better match the altitude
            of the spot site for which they have been extracted.
            Default is False.
        land_constraint (bool):
            If True, the neighbour cube will be interrogated for grid point
            neighbours that were identified using a land constraint. This means
            that the grid points should be land points except for sites where
            none were found within the search radius when the neighbour cube
            was created. May be used with minimum_dz.
            Default is False.
        minimum_dz (bool):
            If True, the neighbour cube will be interrogated for grid point
            neighbours that were identified using the minimum height
            difference constraint. These are grid points that were found to be
            the closest in altitude to the spot site within the search radius
            defined when the neighbour cube was created. May be used with
            land_constraint.
            Default is False.
        extract_percentiles (list or int):
            If set to a percentile value or a list of percentile values,
            data corresponding to those percentiles will be returned. For
            example [25, 50, 75] will result in the 25th, 50th and 75th
            percentiles being returned from a cube of probabilities,
            percentiles or realizations.
            Note that for percentiles inputs, the desired percentile(s) must
            exist in the input cube.
            Default is None.
        ecc_bounds_warning (bool):
            If True, where calculated percentiles are outside the ECC bounds
            range, raises a warning rather than an exception.
            Default is False.
        attributes_dict (dict):
            If provided, this dictionary can be used to modify the attributes
            of the returned cube.
            Default is None.
        suppress_warnings (bool):
            Suppress warning output. This option should only be used if it
            is known that warnings will be generated but they are not required.
            Default is None.

    Returns:
        iris.cube.Cube:
           The processed cube.

    Raises:
        ValueError:
            If the percentile diagnostic cube does not contain the requested
            percentile value.
        ValueError:
            If the lapse rate cube was provided but the diagnostic being
            processed is not air temperature.
        ValueError:
            If the lapse rate cube provided does not have the name
            "air_temperature_lapse_rate"
        ValueError:
            If the lapse rate cube does not contain a single valued height
            coordinate.

    Warns:
        warning:
           If diagnostic cube is not a known probabilistic type.
        warning:
            If a lapse rate cube was provided, but the height of the
            temperature does not match that of the data used.
        warning:
            If a lapse rate cube was not provided, but the option to apply
            the lapse rate correction was enabled.

    """
    neighbour_selection_method = NeighbourSelection(
        land_constraint=land_constraint,
        minimum_dz=minimum_dz).neighbour_finding_method_name()
    plugin = SpotExtraction(
        neighbour_selection_method=neighbour_selection_method)
    result = plugin.process(neighbour_cube, diagnostic_cube)

    # If a probability or percentile diagnostic cube is provided, extract
    # the given percentile if available. This is done after the spot-extraction
    # to minimise processing time; usually there are far fewer spot sites than
    # grid points.
    if extract_percentiles is not None:
        try:
            perc_coordinate = find_percentile_coordinate(result)
        except CoordinateNotFoundError:
            if 'probability_of_' in result.name():
                result = GeneratePercentilesFromProbabilities(
                    ecc_bounds_warning=ecc_bounds_warning).process(
                        result, percentiles=extract_percentiles)
                result = iris.util.squeeze(result)
            elif result.coords('realization', dim_coords=True):
                fast_percentile_method = (False if np.ma.isMaskedArray(
                    result.data) else True)
                result = PercentileConverter(
                    'realization',
                    percentiles=extract_percentiles,
                    fast_percentile_method=fast_percentile_method).process(
                        result)
            else:
                msg = ('Diagnostic cube is not a known probabilistic type. '
                       'The {} percentile could not be extracted. Extracting '
                       'data from the cube including any leading '
                       'dimensions.'.format(extract_percentiles))
                if not suppress_warnings:
                    warnings.warn(msg)
        else:
            constraint = [
                '{}={}'.format(perc_coordinate.name(), extract_percentiles)
            ]
            perc_result = extract_subcube(result, constraint)
            if perc_result is not None:
                result = perc_result
            else:
                msg = ('The percentile diagnostic cube does not contain the '
                       'requested percentile value. Requested {}, available '
                       '{}'.format(extract_percentiles,
                                   perc_coordinate.points))
                raise ValueError(msg)
    # Check whether a lapse rate cube has been provided and we are dealing with
    # temperature data and the lapse-rate option is enabled.
    if apply_lapse_rate_correction and lapse_rate_cube:
        if not result.name() == "air_temperature":
            msg = ("A lapse rate cube was provided, but the diagnostic being "
                   "processed is not air temperature and cannot be adjusted.")
            raise ValueError(msg)

        if not lapse_rate_cube.name() == 'air_temperature_lapse_rate':
            msg = ("A cube has been provided as a lapse rate cube but does "
                   "not have the expected name air_temperature_lapse_rate: "
                   "{}".format(lapse_rate_cube.name()))
            raise ValueError(msg)

        try:
            lapse_rate_height_coord = lapse_rate_cube.coord("height")
        except (ValueError, CoordinateNotFoundError):
            msg = ("Lapse rate cube does not contain a single valued height "
                   "coordinate. This is required to ensure it is applied to "
                   "equivalent temperature data.")
            raise ValueError(msg)

        # Check the height of the temperature data matches that used to
        # calculate the lapse rates. If so, adjust temperatures using the lapse
        # rate values.
        if diagnostic_cube.coord("height") == lapse_rate_height_coord:
            plugin = SpotLapseRateAdjust(
                neighbour_selection_method=neighbour_selection_method)
            result = plugin.process(result, neighbour_cube, lapse_rate_cube)
        elif not suppress_warnings:
            warnings.warn(
                "A lapse rate cube was provided, but the height of the "
                "temperature data does not match that of the data used "
                "to calculate the lapse rates. As such the temperatures "
                "were not adjusted with the lapse rates.")

    elif apply_lapse_rate_correction and not lapse_rate_cube:
        if not suppress_warnings:
            warnings.warn(
                "A lapse rate cube was not provided, but the option to "
                "apply the lapse rate correction was enabled. No lapse rate "
                "correction could be applied.")

    # Modify final attributes as described by provided JSON file.
    if attributes_dict:
        amend_attributes(result, attributes_dict)
    # Remove the internal model_grid_hash attribute if present.
    result.attributes.pop('model_grid_hash', None)
    return result