Exemple #1
0
 def test_fails_if_data_is_not_cube(self):
     """Test it raises a Type Error if cube is not a cube."""
     msg = ('Expecting data to be an instance of '
            'iris.cube.Cube but is'
            ' {}.'.format(type(self.wg_perc)))
     with self.assertRaisesRegex(TypeError, msg):
         find_percentile_coordinate(self.wg_perc)
Exemple #2
0
 def test_fails_if_no_perc_coord(self):
     """Test it raises an Error if there is no percentile coord."""
     msg = ('No percentile coord found on')
     cube = self.cube_wg
     cube.remove_coord("percentile_over_dummy")
     with self.assertRaisesRegex(CoordinateNotFoundError, msg):
         find_percentile_coordinate(cube)
Exemple #3
0
 def test_fails_if_too_many_perc_coord(self):
     """Test it raises a Value Error if there are too many perc coords."""
     msg = ('Too many percentile coords found')
     cube = self.cube_wg
     new_perc_coord = (iris.coords.AuxCoord(
         1, long_name='percentile_over_realization', units='no_unit'))
     cube.add_aux_coord(new_perc_coord)
     with self.assertRaisesRegex(ValueError, msg):
         find_percentile_coordinate(cube)
Exemple #4
0
    def check_percentile_coord(cube):
        """
        Determines if the cube to be blended has a percentile dimension
        coordinate.

        Args:
            cube (iris.cube.Cube):
                The cube to be checked for a percentile coordinate.
        Returns:
            None or perc_coord (iris.coords.DimCoord):
                None if no percentile dimension coordinate is found. If
                such a coordinate is found it is returned.
        Raises:
            ValueError : If there is a percentile coord and it is not a
                dimension coord in the cube.
            ValueError : If there is a percentile dimension with only one
                point, we need at least two points in order to do the blending.
        """
        try:
            perc_coord = find_percentile_coordinate(cube)
            perc_dim = cube.coord_dims(perc_coord.name())
            if not perc_dim:
                msg = ('The percentile coord must be a dimension '
                       'of the cube.')
                raise ValueError(msg)
            # Check the percentile coordinate has more than one point,
            # otherwise raise an error as we won't be able to blend.
            if len(perc_coord.points) < 2.0:
                msg = ('Percentile coordinate does not have enough points'
                       ' in order to blend. Must have at least 2 percentiles.')
                raise ValueError(msg)
            return perc_coord
        except CoordinateNotFoundError:
            return None
    def process(cube, ensemble_member_numbers=None):
        """
        Rebadge percentiles as ensemble members. The ensemble member numbering
        will depend upon the number of percentiles in the input cube i.e.
        0, 1, 2, 3, ..., n-1, if there are n percentiles.

        Args:
            cube (Iris.cube.Cube):
            Cube containing a percentile coordinate, which will be rebadged as
            ensemble member.

        """
        percentile_coord = (
            find_percentile_coordinate(cube).name())

        if ensemble_member_numbers is None:
            ensemble_member_numbers = (
                np.arange(
                    len(cube.coord(percentile_coord).points)))

        cube.coord(percentile_coord).points = (
            ensemble_member_numbers)
        cube.coord(percentile_coord).rename("realization")
        cube.coord("realization").units = "1"

        return cube
Exemple #6
0
    def process(self, forecast_at_percentiles, no_of_percentiles=None,
                sampling="quantile"):
        """
        1. Concatenates cubes with a percentile coordinate.
        2. Creates a list of percentiles.
        3. Accesses the lower and upper bound pair of the forecast values,
           in order to specify lower and upper bounds for the percentiles.
        4. Interpolate the percentile coordinate into an alternative
           set of percentiles using linear interpolation.

        Args:
            forecast_at_percentiles (Iris CubeList or Iris Cube):
                Cube or CubeList expected to contain a percentile coordinate.
            no_of_percentiles (Integer or None):
                Number of percentiles
                If None, the number of percentiles within the input
                forecast_at_percentiles cube is used as the
                number of percentiles.
            sampling (String):
                Type of sampling of the distribution to produce a set of
                percentiles e.g. quantile or random.

                Accepted options for sampling are:

                * Quantile: A regular set of equally-spaced percentiles aimed
                     at dividing a Cumulative Distribution Function into
                     blocks of equal probability.
                * Random: A random set of ordered percentiles.

        Returns:
            forecast_at_percentiles (iris.cube.Cube):
                Cube with forecast values at the desired set of percentiles.
                The percentile coordinate is always the zeroth dimension.

        """
        forecast_at_percentiles = concatenate_cubes(forecast_at_percentiles)

        percentile_coord = (
            find_percentile_coordinate(forecast_at_percentiles).name())

        if no_of_percentiles is None:
            no_of_percentiles = (
                len(forecast_at_percentiles.coord(
                    percentile_coord).points))

        percentiles = choose_set_of_percentiles(
            no_of_percentiles, sampling=sampling)

        cube_units = forecast_at_percentiles.units
        bounds_pairing = (
            get_bounds_of_distribution(
                forecast_at_percentiles.name(), cube_units))

        forecast_at_percentiles = self._interpolate_percentiles(
            forecast_at_percentiles, percentiles, bounds_pairing,
            percentile_coord)
        return forecast_at_percentiles
    def process(
            self, post_processed_forecast, raw_forecast,
            random_ordering=False, random_seed=None):
        """
        Reorder post-processed forecast using the ordering of the
        raw ensemble.

        Args:
            post_processed_forecast (iris.cube.Cube):
                The cube containing the post-processed
                forecast realizations.
            raw_forecast (iris.cube.Cube):
                The cube containing the raw (not post-processed)
                forecast.

        Keyword Args:
            random_ordering (bool):
                If random_ordering is True, the post-processed forecasts are
                reordered randomly, rather than using the ordering of the
                raw ensemble.
            random_seed (int):
                If random_seed is an integer, the integer value is used for
                the random seed.
                If random_seed is None, no random seed is set, so the random
                values generated are not reproducible.

        Returns:
            post-processed_forecast_realizations (iris.cube.Cube):
                Cube containing the new ensemble realizations where all points
                within the dataset have been reordered in comparison to the
                input percentiles.
        """

        percentile_coord_name = (
            find_percentile_coordinate(post_processed_forecast).name())

        post_processed_forecast_percentiles = (
            enforce_coordinate_ordering(
                post_processed_forecast, percentile_coord_name))
        raw_forecast_realizations = enforce_coordinate_ordering(
            raw_forecast, "realization")
        raw_forecast_realizations = (
            self._recycle_raw_ensemble_realizations(
                post_processed_forecast_percentiles, raw_forecast_realizations,
                percentile_coord_name))
        post_processed_forecast_realizations = self.rank_ecc(
            post_processed_forecast_percentiles, raw_forecast_realizations,
            random_ordering=random_ordering,
            random_seed=random_seed)
        post_processed_forecast_realizations = (
            RebadgePercentilesAsRealizations.process(
                post_processed_forecast_realizations))

        post_processed_forecast_realizations = (
            enforce_coordinate_ordering(
                post_processed_forecast_realizations, "realization"))
        return post_processed_forecast_realizations
Exemple #8
0
 def setUp(self):
     """ Set up a percentiles cube, plugin instance and orography cube """
     self.percentiles_cube = set_up_percentiles_cube()
     self.percentile_coordinate = find_percentile_coordinate(
         self.percentiles_cube)
     self.new_name = "probability"
     self.plugin_instance = ProbabilitiesFromPercentiles2D(
         self.percentiles_cube, self.new_name)
     self.orography_cube = set_up_threshold_cube()
    def process(cube, ensemble_realization_numbers=None):
        """
        Rebadge percentiles as ensemble realizations. The ensemble
        realization numbering will depend upon the number of percentiles in
        the input cube i.e. 0, 1, 2, 3, ..., n-1, if there are n percentiles.

        Args:
            cube (iris.cube.Cube):
                Cube containing a percentile coordinate, which will be
                rebadged as ensemble realization.

        Keyword Args:
            ensemble_realization_numbers (numpy.ndarray):
                An array containing the ensemble numbers required in the output
                realization coordinate. Default is None, meaning the
                realization coordinate will be numbered 0, 1, 2 ... n-1 for n
                percentiles on the input cube.
        Raises:
            InvalidCubeError:
                If the realization coordinate already exists on the cube.
        """
        percentile_coord_name = (
            find_percentile_coordinate(cube).name())

        if ensemble_realization_numbers is None:
            ensemble_realization_numbers = (
                np.arange(
                    len(cube.coord(percentile_coord_name).points)))

        cube.coord(percentile_coord_name).points = (
            ensemble_realization_numbers)

        # we can't rebadge if the realization coordinate already exists:
        try:
            realization_coord = cube.coord('realization')
        except CoordinateNotFoundError:
            realization_coord = None

        if realization_coord:
            raise InvalidCubeError(
                "Cannot rebadge percentile coordinate to realization "
                "coordinate because a realization coordinate already exists.")

        cube.coord(percentile_coord_name).rename("realization")
        cube.coord("realization").units = "1"

        return cube
    def __init__(self, percentiles_cube, output_name=None):
        """
        Initialise class. Sets an inverse_ordering (bool) switch to true for
        cases where the percentiled data increases in the opposite sense to the
        percentile coordinate:

                e.g.  0th Percentile - Value = 10
                     10th Percentile - Value = 5
                     20th Percentile - Value = 0

        Args:
            percentiles_cube (iris.cube.Cube):
                The percentiled field from which probabilities will be obtained
                using the input cube. This cube should contain a percentiles
                dimension, with fields of values that correspond to these
                percentiles. The cube passed to the process method will contain
                values of the same diagnostic (e.g. height) as this reference
                cube.
            output_name (str):
                The name of the cube being created,
                e.g.'probability_of_snowfall'.
        """
        self.percentile_coordinate = find_percentile_coordinate(
            percentiles_cube)
        if self.percentile_coordinate.points.shape[0] < 2:
            msg = ("Percentile coordinate has only one value. Interpolation "
                   "using ProbabilitiesFromPercentiles2D requires multiple "
                   "values are provided.")
            raise ValueError(msg)
        self.percentiles_cube = percentiles_cube

        if output_name is not None:
            self.output_name = output_name
        else:
            self.output_name = "probability_of_{}".format(
                percentiles_cube.name())

        # Set inverse_ordering switch
        percentile_slices = percentiles_cube.slices_over(
            self.percentile_coordinate)
        self.inverse_ordering = False
        first_percentile = percentile_slices.next().data
        for percentile_values in percentile_slices:
            last_percentile = percentile_values.data
        if (first_percentile - last_percentile >= 0).all():
            self.inverse_ordering = True
Exemple #11
0
    def process(self, cube):
        """
        Create a cube containing the percentiles as a new dimension.

        What's generated by default is:
            * 15 percentiles - (0%, 5%, 10%, 20%, 25%, 30%, 40%, 50%, 60%,
              70%, 75%, 80%, 90%, 95%, 100%)

        Args:
            cube (iris.cube.Cube):
                Given the collapse coordinate, convert the set of values
                along that coordinate into a PDF and extract percentiles.

        Returns:
            cube (iris.cube.Cube):
                A single merged cube of all the cubes produced by each
                percentile collapse.

        """
        # Store data type and enforce the same type on return.
        data_type = cube.dtype
        # Test that collapse coords are present in cube before proceeding.
        n_collapse_coords = len(self.collapse_coord)
        n_valid_coords = sum([
            test_coord == coord.name() for coord in cube.coords()
            for test_coord in self.collapse_coord
        ])
        # Rename the percentile coordinate to "percentile" and also
        # makes sure that the associated unit is %.
        if n_valid_coords == n_collapse_coords:
            result = cube.collapsed(
                self.collapse_coord,
                iris.analysis.PERCENTILE,
                percent=self.percentiles,
                fast_percentile_method=self.fast_percentile_method)
            result.data = result.data.astype(data_type)
            for coord in self.collapse_coord:
                result.remove_coord(coord)
            percentile_coord = find_percentile_coordinate(result)
            result.coord(percentile_coord).rename('percentile')
            result.coord(percentile_coord).units = '%'
            return result

        raise CoordinateNotFoundError(
            "Coordinate '{}' not found in cube passed to {}.".format(
                self.collapse_coord, self.__class__.__name__))
    def extract_percentile_data(cube, req_percentile, standard_name):
        """Extract percentile data from cube.

        Args:
            cube (iris.cube.Cube):
                Cube contain one or more percentiles.
            req_percentile (float):
                Required percentile value
            standard_name (str):
                Standard name of the data.

        Returns:
            (tuple) : tuple containing:
                **result** (iris.cube.Cube):
                    Cube containing the required percentile data
                **perc_coord** (iris.coords.Coord):
                    Percentile coordinate.

        """
        if not isinstance(cube, iris.cube.Cube):
            msg = ('Expecting {0:s} data to be an instance of '
                   'iris.cube.Cube but is'
                   ' {1}.'.format(standard_name, type(cube)))
            raise TypeError(msg)
        perc_coord = find_percentile_coordinate(cube)
        if cube.standard_name != standard_name:
            msg = ('Warning mismatching name for data expecting'
                   ' {0:s} but found {1:s}'.format(standard_name,
                                                   cube.standard_name))
            warnings.warn(msg)
        constraint = (iris.Constraint(
            coord_values={perc_coord.name(): req_percentile}))
        result = cube.extract(constraint)
        if result is None:
            msg = ('Could not find required percentile '
                   '{0:3.1f} in cube'.format(req_percentile))
            raise ValueError(msg)
        return result, perc_coord
Exemple #13
0
    def process(self, cube, weights=None):
        """Calculate weighted blend across the chosen coord, for either
           probabilistic or percentile data. If there is a percentile
           coordinate on the cube, it will blend using the
           PercentileBlendingAggregator but the percentile coordinate must
           have at least two points.

        Args:
            cube (iris.cube.Cube):
                   Cube to blend across the coord.
            weights (Optional list or np.array of weights):
                     or None (equivalent to equal weights).

        Returns:
            result (iris.cube.Cube):
                     containing the weighted blend across the chosen coord.

        Raises:
            TypeError : If the first argument not a cube.
            ValueError : If there is a percentile coord and it is not a
                           dimension coord in the cube.
            ValueError : If there is a percentile dimension with only one
                            point, we need at least two points in order to do
                            the blending.
            ValueError : If there are more than one percentile coords
                           in the cube.
            ValueError : If there is a percentile dimension on the cube and the
                         mode for blending is 'weighted_maximum'
            ValueError : If the weights shape do not match the dimension
                           of the coord we are blending over.
        Warns:
            Warning : If trying to blend across a scalar coordinate with only
                        one value. Returns the original cube in this case.

        """
        if not isinstance(cube, iris.cube.Cube):
            msg = ('The first argument must be an instance of '
                   'iris.cube.Cube but is'
                   ' {0:s}.'.format(type(cube)))
            raise TypeError(msg)

        # Check to see if the data is percentile data

        try:
            perc_coord = find_percentile_coordinate(cube)
            perc_dim = cube.coord_dims(perc_coord.name())
            if not perc_dim:
                msg = ('The percentile coord must be a dimension '
                       'of the cube.')
                raise ValueError(msg)
            # Check the percentile coordinate has more than one point,
            # otherwise raise an error as we won't be able to blend.
            if len(perc_coord.points) < 2.0:
                msg = ('Percentile coordinate does not have enough points'
                       ' in order to blend. Must have at least 2 percentiles.')
                raise ValueError(msg)
        except CoordinateNotFoundError:
            perc_coord = None
            perc_dim = None

        # If we have a percentile dimension and the mode is 'max' raise an
        # exception.
        if perc_coord and self.mode == 'weighted_maximum':
            msg = ('The "weighted_maximum" mode cannot be used with'
                   ' percentile data.')
            raise ValueError(msg)

        # check weights array matches coordinate shape if not None
        if weights is not None:
            if np.array(weights).shape != cube.coord(self.coord).points.shape:
                msg = ('The weights array must match the shape '
                       'of the coordinate in the input cube; '
                       'weight shape is '
                       '{0:s}'.format(np.array(weights).shape) +
                       ', cube shape is '
                       '{0:s}'.format(cube.coord(self.coord).points.shape))
                raise ValueError(msg)

        # If coord to blend over is a scalar_coord warn
        # and return original cube.
        coord_dim = cube.coord_dims(self.coord)
        if not coord_dim:
            msg = ('Trying to blend across a scalar coordinate with only one'
                   ' value. Returning original cube')
            warnings.warn(msg)
            result = cube
        else:
            try:
                cube.coord('threshold')
            except iris.exceptions.CoordinateNotFoundError:
                slices_over_threshold = [cube]
            else:
                if self.coord != 'threshold':
                    slices_over_threshold = cube.slices_over('threshold')
                else:
                    slices_over_threshold = [cube]

            cubelist = iris.cube.CubeList([])
            for cube_thres in slices_over_threshold:
                # Blend the cube across the coordinate
                # Use percentile Aggregator if required
                if perc_coord and self.mode == "weighted_mean":
                    percentiles = np.array(perc_coord.points, dtype=float)
                    perc_dim, = cube_thres.coord_dims(perc_coord.name())
                    # Set equal weights if none are provided
                    if weights is None:
                        num = len(cube_thres.coord(self.coord).points)
                        weights = np.ones(num) / float(num)
                    # Set up aggregator
                    PERCENTILE_BLEND = (Aggregator(
                        'weighted_mean',
                        PercentileBlendingAggregator.aggregate))

                    cube_new = cube_thres.collapsed(self.coord,
                                                    PERCENTILE_BLEND,
                                                    arr_percent=percentiles,
                                                    arr_weights=weights,
                                                    perc_dim=perc_dim)

                # Else do a simple weighted average
                elif self.mode == "weighted_mean":
                    # Equal weights are used as default.
                    weights_array = None
                    # Else broadcast the weights to be used by the aggregator.
                    coord_dim_thres = cube_thres.coord_dims(self.coord)
                    if weights is not None:
                        weights_array = (iris.util.broadcast_to_shape(
                            np.array(weights), cube_thres.shape,
                            coord_dim_thres))
                    orig_cell_methods = cube_thres.cell_methods
                    # Calculate the weighted average.
                    cube_new = cube_thres.collapsed(self.coord,
                                                    iris.analysis.MEAN,
                                                    weights=weights_array)
                    # Update the name of the cell_method created by Iris to
                    # 'weighted_mean' to be consistent.
                    new_cell_methods = cube_new.cell_methods
                    extra_cm = (set(new_cell_methods) -
                                set(orig_cell_methods)).pop()
                    add_renamed_cell_method(cube_new, extra_cm,
                                            'weighted_mean')

                # Else use the maximum probability aggregator.
                elif self.mode == "weighted_maximum":
                    # Set equal weights if none are provided
                    if weights is None:
                        num = len(cube_thres.coord(self.coord).points)
                        weights = np.ones(num) / float(num)
                    # Set up aggregator
                    MAX_PROBABILITY = (Aggregator(
                        'weighted_maximum',
                        MaxProbabilityAggregator.aggregate))

                    cube_new = cube_thres.collapsed(self.coord,
                                                    MAX_PROBABILITY,
                                                    arr_weights=weights)
                cubelist.append(cube_new)
            result = cubelist.merge_cube()
            if isinstance(cubelist[0].data, np.ma.core.MaskedArray):
                result.data = np.ma.array(result.data)
        # If set adjust values of collapsed coordinates.
        if self.coord_adjust is not None:
            for crd in result.coords():
                if cube.coord_dims(crd.name()) == coord_dim:
                    pnts = cube.coord(crd.name()).points
                    crd.points = np.array(self.coord_adjust(pnts),
                                          dtype=crd.points.dtype)

        return result
def main(argv=None):
    """Load in arguments for applying coefficients for Ensemble Model Output
       Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
       Regression (NGR). The coefficients are applied to the forecast
       that is supplied, so as to calibrate the forecast. The calibrated
       forecast is written to a netCDF file.
    """
    parser = ArgParser(
        description='Apply coefficients for Ensemble Model Output '
        'Statistics (EMOS), otherwise known as Non-homogeneous '
        'Gaussian Regression (NGR). The supported input formats '
        'are realizations, probabilities and percentiles. '
        'The forecast will be converted to realizations before '
        'applying the coefficients and then converted back to '
        'match the input format.')
    # Filepaths for the forecast, EMOS coefficients and the output.
    parser.add_argument(
        'forecast_filepath',
        metavar='FORECAST_FILEPATH',
        help='A path to an input NetCDF file containing the forecast to be '
        'calibrated. The input format could be either realizations, '
        'probabilities or percentiles.')
    parser.add_argument('coefficients_filepath',
                        metavar='COEFFICIENTS_FILEPATH',
                        help='A path to an input NetCDF file containing the '
                        'coefficients used for calibration.')
    parser.add_argument('output_filepath',
                        metavar='OUTPUT_FILEPATH',
                        help='The output path for the processed NetCDF')
    # Optional arguments.
    parser.add_argument(
        '--num_realizations',
        metavar='NUMBER_OF_REALIZATIONS',
        default=None,
        type=np.int32,
        help='Optional argument to specify the number of '
        'ensemble realizations to produce. '
        'If the current forecast is input as probabilities or '
        'percentiles then this argument is used to create the requested '
        'number of realizations. In addition, this argument is used to '
        'construct the requested number of realizations from the mean '
        'and variance output after applying the EMOS coefficients.'
        'Default will be the number of realizations in the raw input '
        'file, if realizations are provided as input, otherwise if the '
        'input format is probabilities or percentiles, then an error '
        'will be raised if no value is provided.')
    parser.add_argument(
        '--random_ordering',
        default=False,
        action='store_true',
        help='Option to reorder the post-processed forecasts randomly. If not '
        'set, the ordering of the raw ensemble is used. This option is '
        'only valid when the input format is realizations.')
    parser.add_argument(
        '--random_seed',
        metavar='RANDOM_SEED',
        default=None,
        help='Option to specify a value for the random seed for testing '
        'purposes, otherwise, the default random seed behaviour is '
        'utilised. The random seed is used in the generation of the '
        'random numbers used for either the random_ordering option to '
        'order the input percentiles randomly, rather than use the '
        'ordering from the raw ensemble, or for splitting tied values '
        'within the raw ensemble, so that the values from the input '
        'percentiles can be ordered to match the raw ensemble.')
    parser.add_argument(
        '--ecc_bounds_warning',
        default=False,
        action='store_true',
        help='If True, where the percentiles exceed the ECC bounds range, '
        'raise a warning rather than an exception. This occurs when the '
        'current forecast is in the form of probabilities and is '
        'converted to percentiles, as part of converting the input '
        'probabilities into realizations.')
    parser.add_argument(
        '--predictor_of_mean',
        metavar='PREDICTOR_OF_MEAN',
        choices=['mean', 'realizations'],
        default='mean',
        help='String to specify the predictor used to calibrate the forecast '
        'mean. Currently the ensemble mean ("mean") and the ensemble '
        'realizations ("realizations") are supported as options. '
        'Default: "mean".')

    args = parser.parse_args(args=argv)

    current_forecast = load_cube(args.forecast_filepath)
    coeffs = load_cube(args.coefficients_filepath)

    original_current_forecast = current_forecast.copy()

    msg = ("The current forecast has been provided as {0}. "
           "These {0} need to be converted to realizations "
           "for ensemble calibration. The args.num_realizations "
           "argument is used to define the number of realizations "
           "to construct from the input {0}, so if the "
           "current forecast is provided as {0} then "
           "args.num_realizations must be defined.")

    try:
        find_percentile_coordinate(current_forecast)
        input_forecast_type = "percentiles"
    except CoordinateNotFoundError:
        input_forecast_type = "realizations"

    if current_forecast.name().startswith("probability_of"):
        input_forecast_type = "probabilities"
        # If probabilities, convert to percentiles.
        conversion_plugin = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=args.ecc_bounds_warning)
    elif input_forecast_type == "percentiles":
        # If percentiles, resample percentiles so that the percentiles are
        # evenly spaced.
        conversion_plugin = ResamplePercentiles(
            ecc_bounds_warning=args.ecc_bounds_warning)

    # If percentiles, resample percentiles and then rebadge.
    # If probabilities, generate percentiles and then rebadge.
    if input_forecast_type in ["percentiles", "probabilities"]:
        if not args.num_realizations:
            raise ValueError(msg.format(input_forecast_type))
        current_forecast = conversion_plugin.process(
            current_forecast, no_of_percentiles=args.num_realizations)
        current_forecast = (
            RebadgePercentilesAsRealizations().process(current_forecast))

    # Default number of ensemble realizations is the number in
    # the raw forecast.
    if not args.num_realizations:
        args.num_realizations = len(
            current_forecast.coord('realization').points)

    # Apply coefficients as part of Ensemble Model Output Statistics (EMOS).
    ac = ApplyCoefficientsFromEnsembleCalibration(
        current_forecast,
        coeffs,
        predictor_of_mean_flag=args.predictor_of_mean)
    calibrated_predictor, calibrated_variance = ac.process()

    # If input forecast is probabilities, convert output into probabilities.
    # If input forecast is percentiles, convert output into percentiles.
    # If input forecast is realizations, convert output into realizations.
    if input_forecast_type == "probabilities":
        result = GenerateProbabilitiesFromMeanAndVariance().process(
            calibrated_predictor, calibrated_variance,
            original_current_forecast)
    elif input_forecast_type == "percentiles":
        perc_coord = find_percentile_coordinate(original_current_forecast)
        result = GeneratePercentilesFromMeanAndVariance().process(
            calibrated_predictor,
            calibrated_variance,
            percentiles=perc_coord.points)
    elif input_forecast_type == "realizations":
        # Ensemble Copula Coupling to generate realizations
        # from mean and variance.
        percentiles = GeneratePercentilesFromMeanAndVariance().process(
            calibrated_predictor,
            calibrated_variance,
            no_of_percentiles=args.num_realizations)
        result = EnsembleReordering().process(
            percentiles,
            current_forecast,
            random_ordering=args.random_ordering,
            random_seed=args.random_seed)
    save_netcdf(result, args.output_filepath)
Exemple #15
0
def process(current_forecast,
            coeffs,
            num_realizations=None,
            random_ordering=False,
            random_seed=None,
            ecc_bounds_warning=False,
            predictor_of_mean='mean'):
    """Applying coefficients for Ensemble Model Output Statistics.

    Load in arguments for applying coefficients for Ensemble Model Output
    Statistics (EMOS), otherwise known as Non-homogeneous Gaussian
    Regression (NGR). The coefficients are applied to the forecast
    that is supplied, so as to calibrate the forecast. The calibrated
    forecast is written to a cube. If no coefficients are provided the input
    forecast is returned unchanged.

    Args:
        current_forecast (iris.cube.Cube):
            A Cube containing the forecast to be calibrated. The input format
            could be either realizations, probabilities or percentiles.
        coeffs (iris.cube.Cube or None):
            A cube containing the coefficients used for calibration or None.
            If none then then current_forecast is returned unchanged.
        num_realizations (numpy.int32):
            Optional argument to specify the number of ensemble realizations
            to produce. If the current forecast is input as probabilities or
            percentiles then this argument is used to create the requested
            number of realizations. In addition, this argument is used to
            construct the requested number of realizations from the mean and
            variance output after applying the EMOS coefficients.
            Default is None.
        random_ordering (bool):
            Option to reorder the post-processed forecasts randomly. If not
            set, the ordering of the raw ensemble is used. This option is
            only valid when the input format is realizations.
            Default is False.
        random_seed (int):
            Option to specify a value for the random seed for testing
            purposes, otherwise the default random seen behaviour is utilised.
            The random seed is used in the generation of the random numbers
            used for either the random_ordering option to order the input
            percentiles randomly, rather than use the ordering from the raw
            ensemble, or for splitting tied values within the raw ensemble,
            so that the values from the input percentiles can be ordered to
            match the raw ensemble.
            Default is None.
        ecc_bounds_warning (bool):
            If True, where the percentiles exceed the ECC bounds range,
            raises a warning rather than an exception. This occurs when the
            current forecasts is in the form of probabilities and is
            converted to percentiles, as part of converting the input
            probabilities into realizations.
            Default is False.
        predictor_of_mean (str):
            String to specify the predictor used to calibrate the forecast
            mean. Currently the ensemble mean "mean" as the ensemble
            realization "realization" are supported as options.
            Default is 'mean'

    Returns:
        result (iris.cube.Cube):
            The calibrated forecast cube.

    Raises:
        ValueError:
            If the current forecast is a coefficients cube.
        ValueError:
            If the coefficients cube does not have the right name of
            "emos_coefficients".
        ValueError:
            If the forecast type is 'percentiles' or 'probabilities' while no
            num_realizations are given.

    """
    if coeffs is None:
        msg = ("There are no coefficients provided for calibration. The "
               "uncalibrated forecast will be returned.")
        warnings.warn(msg)
        return current_forecast

    elif coeffs.name() != 'emos_coefficients':
        msg = ("The current coefficients cube does not have the "
               "name 'emos_coefficients'")
        raise ValueError(msg)

    if current_forecast.name() == 'emos_coefficients':
        msg = "The current forecast cube has the name 'emos_coefficients'"
        raise ValueError(msg)

    original_current_forecast = current_forecast.copy()
    try:
        find_percentile_coordinate(current_forecast)
        input_forecast_type = "percentiles"
    except CoordinateNotFoundError:
        input_forecast_type = "realizations"

    if current_forecast.name().startswith("probability_of"):
        input_forecast_type = "probabilities"
        # If probabilities, convert to percentiles.
        conversion_plugin = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=ecc_bounds_warning)
    elif input_forecast_type == "percentiles":
        # If percentiles, resample percentiles so that the percentiles are
        # evenly spaced.
        conversion_plugin = ResamplePercentiles(
            ecc_bounds_warning=ecc_bounds_warning)

    # If percentiles, re-sample percentiles and then re-badge.
    # If probabilities, generate percentiles and then re-badge.
    if input_forecast_type in ["percentiles", "probabilities"]:
        if not num_realizations:
            raise ValueError(
                "The current forecast has been provided as {0}. "
                "These {0} need to be converted to realizations "
                "for ensemble calibration. The num_realizations "
                "argument is used to define the number of realizations "
                "to construct from the input {0}, so if the "
                "current forecast is provided as {0} then "
                "num_realizations must be defined.".format(
                    input_forecast_type))
        current_forecast = conversion_plugin.process(
            current_forecast, no_of_percentiles=num_realizations)
        current_forecast = (
            RebadgePercentilesAsRealizations().process(current_forecast))

    # Default number of ensemble realizations is the number in
    # the raw forecast.
    if not num_realizations:
        num_realizations = len(current_forecast.coord('realization').points)

    # Apply coefficients as part of Ensemble Model Output Statistics (EMOS).
    ac = ApplyCoefficientsFromEnsembleCalibration(
        current_forecast, coeffs, predictor_of_mean_flag=predictor_of_mean)
    calibrated_predictor, calibrated_variance = ac.process()

    # If input forecast is probabilities, convert output into probabilities.
    # If input forecast is percentiles, convert output into percentiles.
    # If input forecast is realizations, convert output into realizations.
    if input_forecast_type == "probabilities":
        result = GenerateProbabilitiesFromMeanAndVariance().process(
            calibrated_predictor, calibrated_variance,
            original_current_forecast)
    elif input_forecast_type == "percentiles":
        perc_coord = find_percentile_coordinate(original_current_forecast)
        result = GeneratePercentilesFromMeanAndVariance().process(
            calibrated_predictor,
            calibrated_variance,
            percentiles=perc_coord.points)
    elif input_forecast_type == "realizations":
        # Ensemble Copula Coupling to generate realizations
        # from mean and variance.
        percentiles = GeneratePercentilesFromMeanAndVariance().process(
            calibrated_predictor,
            calibrated_variance,
            no_of_percentiles=num_realizations)
        result = EnsembleReordering().process(percentiles,
                                              current_forecast,
                                              random_ordering=random_ordering,
                                              random_seed=random_seed)
    return result
Exemple #16
0
 def test_basic(self):
     """Test that the function returns a Coord."""
     perc_coord = find_percentile_coordinate(self.cube_wg)
     self.assertIsInstance(perc_coord, iris.coords.Coord)
     self.assertEqual(perc_coord.name(), "percentile_over_dummy")
Exemple #17
0
def main(argv=None):
    """Load in arguments and start spotdata extraction process."""
    parser = ArgParser(
        description="Extract diagnostic data from gridded fields for spot data"
        " sites. It is possible to apply a temperature lapse rate adjustment"
        " to temperature data that helps to account for differences between"
        " the spot sites real altitude and that of the grid point from which"
        " the temperature data is extracted.")

    # Input and output files required.
    parser.add_argument("neighbour_filepath", metavar="NEIGHBOUR_FILEPATH",
                        help="Path to a NetCDF file of spot-data neighbours. "
                        "This file also contains the spot site information.")
    parser.add_argument("diagnostic_filepath", metavar="DIAGNOSTIC_FILEPATH",
                        help="Path to a NetCDF file containing the diagnostic "
                             "data to be extracted.")
    parser.add_argument("temperature_lapse_rate_filepath",
                        metavar="LAPSE_RATE_FILEPATH", nargs='?',
                        help="(Optional) Filepath to a NetCDF file containing"
                        " temperature lapse rates. If this cube is provided,"
                        " and a screen temperature cube is being processed,"
                        " the lapse rates will be used to adjust the"
                        " temperatures to better represent each spot's"
                        " site-altitude.")
    parser.add_argument("output_filepath", metavar="OUTPUT_FILEPATH",
                        help="The output path for the resulting NetCDF")

    parser.add_argument(
        "--apply_lapse_rate_correction",
        default=False, action="store_true",
        help="If the option is set and a lapse rate cube has been "
        "provided, extracted screen temperatures will be adjusted to "
        "better match the altitude of the spot site for which they have "
        "been extracted.")

    method_group = parser.add_argument_group(
        title="Neighbour finding method",
        description="If none of these options are set, the nearest grid point "
        "to a spot site will be used without any other constraints.")
    method_group.add_argument(
        "--land_constraint", default=False, action='store_true',
        help="If set the neighbour cube will be interrogated for grid point"
        " neighbours that were identified using a land constraint. This means"
        " that the grid points should be land points except for sites where"
        " none were found within the search radius when the neighbour cube was"
        " created. May be used with minimum_dz.")
    method_group.add_argument(
        "--minimum_dz", default=False, action='store_true',
        help="If set the neighbour cube will be interrogated for grid point"
        " neighbours that were identified using a minimum height difference"
        " constraint. These are grid points that were found to be the closest"
        " in altitude to the spot site within the search radius defined when"
        " the neighbour cube was created. May be used with land_constraint.")

    percentile_group = parser.add_argument_group(
        title="Extract percentiles",
        description="Extract particular percentiles from probabilistic, "
        "percentile, or realization inputs. If deterministic input is "
        "provided a warning is raised and all leading dimensions are included "
        "in the returned spot-data cube.")
    percentile_group.add_argument(
        "--extract_percentiles", default=None, nargs='+', type=int,
        help="If set to a percentile value or a list of percentile values, "
        "data corresponding to those percentiles will be returned. For "
        "example setting '--extract_percentiles 25 50 75' will result in the "
        "25th, 50th, and 75th percentiles being returned from a cube of "
        "probabilities, percentiles, or realizations. Note that for "
        "percentile inputs, the desired percentile(s) must exist in the input "
        "cube.")
    parser.add_argument(
        "--ecc_bounds_warning", default=False, action="store_true",
        help="If True, where calculated percentiles are outside the ECC "
        "bounds range, raise a warning rather than an exception.")

    meta_group = parser.add_argument_group("Metadata")
    meta_group.add_argument(
        "--metadata_json", metavar="METADATA_JSON", default=None,
        help="If provided, this JSON file can be used to modify the metadata "
        "of the returned netCDF file. Defaults to None.")

    output_group = parser.add_argument_group("Suppress Verbose output")
    # This CLI may be used to prepare data for verification without knowing the
    # form of the input, be it deterministic, realizations or probabilistic.
    # A warning is normally raised when attempting to extract a percentile from
    # deterministic data as this is not possible; the spot-extraction of the
    # entire cube is returned. When preparing data for verification we know
    # that we will produce a large number of these warnings when passing in
    # deterministic data. This option to suppress warnings is provided to
    # reduce the amount of unneeded logging information that is written out.

    output_group.add_argument(
        "--suppress_warnings", default=False, action="store_true",
        help="Suppress warning output. This option should only be used if "
        "it is known that warnings will be generated but they are not "
        "required.")

    args = parser.parse_args(args=argv)
    neighbour_cube = load_cube(args.neighbour_filepath)
    diagnostic_cube = load_cube(args.diagnostic_filepath)

    neighbour_selection_method = NeighbourSelection(
        land_constraint=args.land_constraint,
        minimum_dz=args.minimum_dz).neighbour_finding_method_name()

    plugin = SpotExtraction(
        neighbour_selection_method=neighbour_selection_method)
    result = plugin.process(neighbour_cube, diagnostic_cube)

    # If a probability or percentile diagnostic cube is provided, extract
    # the given percentile if available. This is done after the spot-extraction
    # to minimise processing time; usually there are far fewer spot sites than
    # grid points.
    if args.extract_percentiles:
        try:
            perc_coordinate = find_percentile_coordinate(result)
        except CoordinateNotFoundError:
            if 'probability_of_' in result.name():
                result = GeneratePercentilesFromProbabilities(
                    ecc_bounds_warning=args.ecc_bounds_warning).process(
                        result, percentiles=args.extract_percentiles)
                result = iris.util.squeeze(result)
            elif result.coords('realization', dim_coords=True):
                fast_percentile_method = (
                    False if np.ma.isMaskedArray(result.data) else True)
                result = PercentileConverter(
                    'realization', percentiles=args.extract_percentiles,
                    fast_percentile_method=fast_percentile_method).process(
                        result)
            else:
                msg = ('Diagnostic cube is not a known probabilistic type. '
                       'The {} percentile could not be extracted. Extracting '
                       'data from the cube including any leading '
                       'dimensions.'.format(
                           args.extract_percentiles))
                if not args.suppress_warnings:
                    warnings.warn(msg)
        else:
            constraint = ['{}={}'.format(perc_coordinate.name(),
                                         args.extract_percentiles)]
            perc_result = extract_subcube(result, constraint)
            if perc_result is not None:
                result = perc_result
            else:
                msg = ('The percentile diagnostic cube does not contain the '
                       'requested percentile value. Requested {}, available '
                       '{}'.format(args.extract_percentiles,
                                   perc_coordinate.points))
                raise ValueError(msg)

    # Check whether a lapse rate cube has been provided and we are dealing with
    # temperature data and the lapse-rate option is enabled.
    if (args.temperature_lapse_rate_filepath and
            args.apply_lapse_rate_correction):

        if not result.name() == "air_temperature":
            msg = ("A lapse rate cube was provided, but the diagnostic being "
                   "processed is not air temperature and cannot be adjusted.")
            raise ValueError(msg)

        lapse_rate_cube = load_cube(args.temperature_lapse_rate_filepath)
        if not lapse_rate_cube.name() == 'air_temperature_lapse_rate':
            msg = ("A cube has been provided as a lapse rate cube but does "
                   "not have the expected name air_temperature_lapse_rate: "
                   "{}".format(lapse_rate_cube.name()))
            raise ValueError(msg)

        try:
            lapse_rate_height_coord = lapse_rate_cube.coord("height")
        except (ValueError, CoordinateNotFoundError):
            msg = ("Lapse rate cube does not contain a single valued height "
                   "coordinate. This is required to ensure it is applied to "
                   "equivalent temperature data.")
            raise ValueError(msg)

        # Check the height of the temperature data matches that used to
        # calculate the lapse rates. If so, adjust temperatures using the lapse
        # rate values.
        if diagnostic_cube.coord("height") == lapse_rate_height_coord:
            plugin = SpotLapseRateAdjust(
                neighbour_selection_method=neighbour_selection_method)
            result = plugin.process(result, neighbour_cube, lapse_rate_cube)
        else:
            msg = ("A lapse rate cube was provided, but the height of "
                   "the temperature data does not match that of the data used "
                   "to calculate the lapse rates. As such the temperatures "
                   "were not adjusted with the lapse rates.")
            if not args.suppress_warnings:
                warnings.warn(msg)
    elif (args.apply_lapse_rate_correction and
          not args.temperature_lapse_rate_filepath):
        msg = ("A lapse rate cube was not provided, but the option to "
               "apply the lapse rate correction was enabled. No lapse rate "
               "correction could be applied.")
        if not args.suppress_warnings:
            warnings.warn(msg)

    # Modify final metadata as described by provided JSON file.
    if args.metadata_json:
        with open(args.metadata_json, 'r') as input_file:
            metadata_dict = json.load(input_file)
        result = amend_metadata(result, **metadata_dict)

    # Remove the internal model_grid_hash attribute if present.
    result.attributes.pop('model_grid_hash', None)

    # Save the spot data cube.
    save_netcdf(result, args.output_filepath)
def main(argv=None):
    """Do ensemble calibration using the EnsembleCalibration plugin.
    """
    parser = ArgParser(
        description='Apply the requested ensemble calibration method using '
        'the current forecast (to be calibrated) in the form of '
        'realizations, probabilities, or percentiles, historical '
        'forecasts in the form of realizations and historical truth data '
        '(to use in calibration). The mean and variance output from the '
        'EnsembleCalibration plugin can be written to an output file '
        'if required. If the current forecast is supplied in the form of '
        'probabilities or percentiles, these are converted to realizations '
        'prior to calibration. After calibration, the mean and variance '
        'computed in the calibration are converted to match the format of the '
        'current forecast i.e. if realizations are input, realizations '
        'are output, if probabilities are input, probabilities are output, '
        'and if percentiles are input, percentiles are output.'
        'If realizations are input, realizations are regenerated using '
        'Ensemble Copula Coupling.')
    # Arguments for EnsembleCalibration
    parser.add_argument(
        'units',
        metavar='UNITS_TO_CALIBRATE_IN',
        help='The unit that calibration should be undertaken in. The current '
        'forecast, historical forecast and truth will be converted as '
        'required.')
    parser.add_argument(
        'distribution',
        metavar='DISTRIBUTION',
        choices=['gaussian', 'truncated gaussian'],
        help='The distribution that will be used for calibration. This will '
        'be dependent upon the input phenomenon. This has to be '
        'supported by the minimisation functions in '
        'ContinuousRankedProbabilityScoreMinimisers.')
    # Filepaths for current, historic and truth data.
    parser.add_argument(
        'input_filepath',
        metavar='INPUT_FILE',
        help='A path to an input NetCDF file containing the current forecast '
        'to be processed. The file provided could be in the form of '
        'realizations, probabilities or percentiles.')
    parser.add_argument(
        'historic_filepath',
        metavar='HISTORIC_DATA_FILE',
        help='A path to an input NetCDF file containing the historic '
        'forecast(s) used for calibration. The file provided must be in '
        'the form of realizations.')
    parser.add_argument(
        'truth_filepath',
        metavar='TRUTH_DATA_FILE',
        help='A path to an input NetCDF file containing the historic truth '
        'analyses used for calibration.')
    parser.add_argument('output_filepath',
                        metavar='OUTPUT_FILE',
                        help='The output path for the processed NetCDF')
    # Optional arguments.
    parser.add_argument(
        '--predictor_of_mean',
        metavar='CALIBRATE_MEAN_FLAG',
        choices=['mean', 'realizations'],
        default='mean',
        help='String to specify the input to calculate the calibrated mean. '
        'Currently the ensemble mean ("mean") and the ensemble '
        'realizations ("realizations") are supported as the predictors. '
        'Default: "mean".')
    parser.add_argument(
        '--save_mean',
        metavar='MEAN_FILE',
        default=False,
        help='Option to save the mean output from EnsembleCalibration plugin. '
        'If used, a path to save the output to must be provided.')
    parser.add_argument(
        '--save_variance',
        metavar='VARIANCE_FILE',
        default=False,
        help='Option to save the variance output from EnsembleCalibration '
        'plugin. If used, a path to save the output to must be provided.')
    parser.add_argument(
        '--num_realizations',
        metavar='NUMBER_OF_REALIZATIONS',
        default=None,
        type=np.int32,
        help='Optional argument to specify the number of '
        'ensemble realizations to produce. '
        'If the current forecast is input as probabilities or '
        'percentiles then this argument is used to create the requested '
        'number of realizations. In addition, this argument is used to '
        'construct the requested number of realizations from the mean '
        'and variance output after applying the EMOS coefficients.'
        'Default will be the number of realizations in the raw input '
        'file, if realizations are provided as input, otherwise if the '
        'input format is probabilities or percentiles, then an error '
        'will be raised if no value is provided.')
    parser.add_argument(
        '--random_ordering',
        default=False,
        action='store_true',
        help='Option to reorder the post-processed forecasts randomly. If not '
        'set, the ordering of the raw ensemble is used. This option is '
        'only valid when the input format is realizations.')
    parser.add_argument(
        '--random_seed',
        metavar='RANDOM_SEED',
        default=None,
        help='Option to specify a value for the random seed for testing '
        'purposes, otherwise, the default random seed behaviour is '
        'utilised. The random seed is used in the generation of the '
        'random numbers used for either the random_ordering option to '
        'order the input percentiles randomly, rather than use the '
        'ordering from the raw ensemble, or for splitting tied values '
        'within the raw ensemble, so that the values from the input '
        'percentiles can be ordered to match the raw ensemble.')
    parser.add_argument(
        '--ecc_bounds_warning',
        default=False,
        action='store_true',
        help='If True, where the percentiles exceed the ECC bounds range, '
        'raise a warning rather than an exception. This occurs when the '
        'current forecast is in the form of probabilities and is '
        'converted to percentiles, as part of converting the input '
        'probabilities into realizations.')
    parser.add_argument(
        '--max_iterations',
        metavar='MAX_ITERATIONS',
        type=np.int32,
        default=1000,
        help='The maximum number of iterations allowed until the minimisation '
        'has converged to a stable solution. If the maximum number of '
        'iterations is reached, but the minimisation has not yet '
        'converged to a stable solution, then the available solution is '
        'used anyway, and a warning is raised. This may be modified for '
        'testing purposes but otherwise kept fixed. If the '
        'predictor_of_mean is "realizations", then the number of '
        'iterations may require increasing, as there will be more '
        'coefficients to solve for.')
    args = parser.parse_args(args=argv)

    current_forecast = load_cube(args.input_filepath)
    historic_forecast = load_cube(args.historic_filepath)
    truth = load_cube(args.truth_filepath)

    original_current_forecast = current_forecast.copy()

    msg = ("The current forecast has been provided as {0}. "
           "These {0} need to be converted to realizations "
           "for ensemble calibration. The args.num_realizations "
           "argument is used to define the number of realizations "
           "to construct from the input {0}, so if the "
           "current forecast is provided as {0} then "
           "args.num_realizations must be defined.")

    try:
        find_percentile_coordinate(current_forecast)
        input_forecast_type = "percentiles"
    except CoordinateNotFoundError:
        input_forecast_type = "realizations"

    if current_forecast.name().startswith("probability_of"):
        input_forecast_type = "probabilities"
        # If probabilities, convert to percentiles.
        conversion_plugin = GeneratePercentilesFromProbabilities(
            ecc_bounds_warning=args.ecc_bounds_warning)
    elif input_forecast_type == "percentiles":
        # If percentiles, resample percentiles so that the percentiles are
        # evenly spaced.
        conversion_plugin = ResamplePercentiles(
            ecc_bounds_warning=args.ecc_bounds_warning)

    # If percentiles, resample percentiles and then rebadge.
    # If probabilities, generate percentiles and then rebadge.
    if input_forecast_type in ["percentiles", "probabilities"]:
        if not args.num_realizations:
            raise ValueError(msg.format(input_forecast_type))
        current_forecast = conversion_plugin.process(
            current_forecast, no_of_percentiles=args.num_realizations)
        current_forecast = (
            RebadgePercentilesAsRealizations().process(current_forecast))

    # Default number of ensemble realizations is the number in
    # the raw forecast.
    if not args.num_realizations:
        args.num_realizations = len(
            current_forecast.coord('realization').points)

    # Ensemble-Calibration to calculate the mean and variance.
    forecast_predictor, forecast_variance = EnsembleCalibration(
        args.distribution,
        args.units,
        predictor_of_mean_flag=args.predictor_of_mean,
        max_iterations=args.max_iterations).process(current_forecast,
                                                    historic_forecast, truth)

    # If required, save the mean and variance.
    if args.save_mean:
        save_netcdf(forecast_predictor, args.save_mean)
    if args.save_variance:
        save_netcdf(forecast_variance, args.save_variance)

    # If input forecast is probabilities, convert output into probabilities.
    # If input forecast is percentiles, convert output into percentiles.
    # If input forecast is realizations, convert output into realizations.
    if input_forecast_type == "probabilities":
        result = GenerateProbabilitiesFromMeanAndVariance().process(
            forecast_predictor, forecast_variance, original_current_forecast)
    elif input_forecast_type == "percentiles":
        perc_coord = find_percentile_coordinate(original_current_forecast)
        result = GeneratePercentilesFromMeanAndVariance().process(
            forecast_predictor,
            forecast_variance,
            percentiles=perc_coord.points)
    elif input_forecast_type == "realizations":
        # Ensemble Copula Coupling to generate realizations
        # from mean and variance.
        percentiles = GeneratePercentilesFromMeanAndVariance().process(
            forecast_predictor,
            forecast_variance,
            no_of_percentiles=args.num_realizations)
        result = EnsembleReordering().process(
            percentiles,
            current_forecast,
            random_ordering=args.random_ordering,
            random_seed=args.random_seed)
    save_netcdf(result, args.output_filepath)
Exemple #19
0
    def process(self,
                post_processed_forecast,
                raw_forecast,
                random_ordering=False,
                random_seed=None):
        """
        Reorder post-processed forecast using the ordering of the
        raw ensemble.

        Args:
            post_processed_forecast (Iris Cube or CubeList):
                The cube or cubelist containing the post-processed
                forecast realizations.
            raw_forecast (Iris Cube or CubeList):
                The cube or cubelist containing the raw (not post-processed)
                forecast.
            random_ordering (Logical):
                If random_ordering is True, the post-processed forecasts are
                reordered randomly, rather than using the ordering of the
                raw ensemble.
            random_seed (Integer or None):
                If random_seed is an integer, the integer value is used for
                the random seed.
                If random_seed is None, no random seed is set, so the random
                values generated are not reproducible.

        Returns:
            post-processed_forecast_realizations (cube):
                Cube containing the new ensemble realizations where all points
                within the dataset have been reordered in comparison to the
                input percentiles.
        """
        if isinstance(post_processed_forecast, iris.cube.CubeList):
            percentile_coord = (find_percentile_coordinate(
                post_processed_forecast[0]).name())
        else:
            percentile_coord = (
                find_percentile_coordinate(post_processed_forecast).name())

        post_processed_forecast_percentiles = concatenate_cubes(
            post_processed_forecast, coords_to_slice_over=[percentile_coord])
        post_processed_forecast_percentiles = (enforce_coordinate_ordering(
            post_processed_forecast_percentiles, percentile_coord))
        raw_forecast_realizations = concatenate_cubes(raw_forecast)
        raw_forecast_realizations = enforce_coordinate_ordering(
            raw_forecast_realizations, "realization")
        raw_forecast_realizations = (self._recycle_raw_ensemble_realizations(
            post_processed_forecast_percentiles, raw_forecast_realizations,
            percentile_coord))
        post_processed_forecast_realizations = self.rank_ecc(
            post_processed_forecast_percentiles,
            raw_forecast_realizations,
            random_ordering=random_ordering,
            random_seed=random_seed)
        post_processed_forecast_realizations = (
            RebadgePercentilesAsRealizations.process(
                post_processed_forecast_realizations))

        post_processed_forecast_realizations = (enforce_coordinate_ordering(
            post_processed_forecast_realizations, "realization"))
        return post_processed_forecast_realizations
def process(neighbour_cube,
            diagnostic_cube,
            lapse_rate_cube=None,
            apply_lapse_rate_correction=False,
            land_constraint=False,
            minimum_dz=False,
            extract_percentiles=None,
            ecc_bounds_warning=False,
            metadata_dict=None,
            suppress_warnings=False):
    """Module to run spot data extraction.

    Extract diagnostic data from gridded fields for spot data sites. It is
    possible to apply a temperature lapse rate adjustment to temperature data
    that helps to account for differences between the spot site's real altitude
    and that of the grid point from which the temperature data is extracted.

    Args:
        neighbour_cube (iris.cube.Cube):
            Cube of spot-data neighbours and the spot site information.
        diagnostic_cube (iris.cube.Cube):
            Cube containing the diagnostic data to be extracted.
        lapse_rate_cube (iris.cube.Cube):
            Cube containing temperature lapse rates. If this cube is provided
            and a screen temperature cube is being processed, the lapse rates
            will be used to adjust the temperature to better represent each
            spot's site-altitude.
        apply_lapse_rate_correction (bool):
            If True, and a lapse rate cube has been provided, extracted
            screen temperature will be adjusted to better match the altitude
            of the spot site for which they have been extracted.
            Default is False.
        land_constraint (bool):
            If True, the neighbour cube will be interrogated for grid point
            neighbours that were identified using a land constraint. This means
            that the grid points should be land points except for sites where
            none were found within the search radius when the neighbour cube
            was created. May be used with minimum_dz.
            Default is False.
        minimum_dz (bool):
            If True, the neighbour cube will be interrogated for grid point
            neighbours that were identified using the minimum height
            difference constraint. These are grid points that were found to be
            the closest in altitude to the spot site within the search radius
            defined when the neighbour cube was created. May be used with
            land_constraint.
            Default is False.
        extract_percentiles (list or int):
            If set to a percentile value or a list of percentile values,
            data corresponding to those percentiles will be returned. For
            example [25, 50, 75] will result in the 25th, 50th and 75th
            percentiles being returned from a cube of probabilities,
            percentiles or realizations.
            Note that for percentiles inputs, the desired percentile(s) must
            exist in the input cube.
            Default is None.
        ecc_bounds_warning (bool):
            If True, where calculated percentiles are outside the ECC bounds
            range, raises a warning rather than an exception.
            Default is False.
        metadata_dict (dict):
            If provided, this dictionary can be used to modify the metadata
            of the returned cube.
            Default is None.
        suppress_warnings (bool):
            Suppress warning output. This option should only be used if it
            is known that warnings will be generated but they are not required.
            Default is None.

    Returns:
        result (iris.cube.Cube):
           The processed cube.

    Raises:
        ValueError:
            If the percentile diagnostic cube does not contain the requested
            percentile value.
        ValueError:
            If the lapse rate cube was provided but the diagnostic being
            processed is not air temperature.
        ValueError:
            If the lapse rate cube provided does not have the name
            "air_temperature_lapse_rate"
        ValueError:
            If the lapse rate cube does not contain a single valued height
            coordinate.

    Warns:
        warning:
           If diagnostic cube is not a known probabilistic type.
        warning:
            If a lapse rate cube was provided, but the height of the
            temperature does not match that of the data used.
        warning:
            If a lapse rate cube was not provided, but the option to apply
            the lapse rate correction was enabled.

    """
    neighbour_selection_method = NeighbourSelection(
        land_constraint=land_constraint,
        minimum_dz=minimum_dz).neighbour_finding_method_name()
    plugin = SpotExtraction(
        neighbour_selection_method=neighbour_selection_method)
    result = plugin.process(neighbour_cube, diagnostic_cube)

    # If a probability or percentile diagnostic cube is provided, extract
    # the given percentile if available. This is done after the spot-extraction
    # to minimise processing time; usually there are far fewer spot sites than
    # grid points.
    if extract_percentiles is not None:
        try:
            perc_coordinate = find_percentile_coordinate(result)
        except CoordinateNotFoundError:
            if 'probability_of_' in result.name():
                result = GeneratePercentilesFromProbabilities(
                    ecc_bounds_warning=ecc_bounds_warning).process(
                        result, percentiles=extract_percentiles)
                result = iris.util.squeeze(result)
            elif result.coords('realization', dim_coords=True):
                fast_percentile_method = (False if np.ma.isMaskedArray(
                    result.data) else True)
                result = PercentileConverter(
                    'realization',
                    percentiles=extract_percentiles,
                    fast_percentile_method=fast_percentile_method).process(
                        result)
            else:
                msg = ('Diagnostic cube is not a known probabilistic type. '
                       'The {} percentile could not be extracted. Extracting '
                       'data from the cube including any leading '
                       'dimensions.'.format(extract_percentiles))
                if not suppress_warnings:
                    warnings.warn(msg)
        else:
            constraint = [
                '{}={}'.format(perc_coordinate.name(), extract_percentiles)
            ]
            perc_result = extract_subcube(result, constraint)
            if perc_result is not None:
                result = perc_result
            else:
                msg = ('The percentile diagnostic cube does not contain the '
                       'requested percentile value. Requested {}, available '
                       '{}'.format(extract_percentiles,
                                   perc_coordinate.points))
                raise ValueError(msg)
    # Check whether a lapse rate cube has been provided and we are dealing with
    # temperature data and the lapse-rate option is enabled.
    if apply_lapse_rate_correction and lapse_rate_cube:
        if not result.name() == "air_temperature":
            msg = ("A lapse rate cube was provided, but the diagnostic being "
                   "processed is not air temperature and cannot be adjusted.")
            raise ValueError(msg)

        if not lapse_rate_cube.name() == 'air_temperature_lapse_rate':
            msg = ("A cube has been provided as a lapse rate cube but does "
                   "not have the expected name air_temperature_lapse_rate: "
                   "{}".format(lapse_rate_cube.name()))
            raise ValueError(msg)

        try:
            lapse_rate_height_coord = lapse_rate_cube.coord("height")
        except (ValueError, CoordinateNotFoundError):
            msg = ("Lapse rate cube does not contain a single valued height "
                   "coordinate. This is required to ensure it is applied to "
                   "equivalent temperature data.")
            raise ValueError(msg)

        # Check the height of the temperature data matches that used to
        # calculate the lapse rates. If so, adjust temperatures using the lapse
        # rate values.
        if diagnostic_cube.coord("height") == lapse_rate_height_coord:
            plugin = SpotLapseRateAdjust(
                neighbour_selection_method=neighbour_selection_method)
            result = plugin.process(result, neighbour_cube, lapse_rate_cube)
        elif not suppress_warnings:
            warnings.warn(
                "A lapse rate cube was provided, but the height of the "
                "temperature data does not match that of the data used "
                "to calculate the lapse rates. As such the temperatures "
                "were not adjusted with the lapse rates.")

    elif apply_lapse_rate_correction and not lapse_rate_cube:
        if not suppress_warnings:
            warnings.warn(
                "A lapse rate cube was not provided, but the option to "
                "apply the lapse rate correction was enabled. No lapse rate "
                "correction could be applied.")

    # Modify final metadata as described by provided JSON file.
    if metadata_dict:
        result = amend_metadata(result, **metadata_dict)
    # Remove the internal model_grid_hash attribute if present.
    result.attributes.pop('model_grid_hash', None)
    return result