Esempio n. 1
0
    def test_5d_cube(self):
        """
        Test that the utility returns the expected data values
        when a 5d cube is input.
        """
        cube1 = set_up_temperature_cube()
        height_coord = iris.coords.AuxCoord([5], standard_name="height")
        cube1.add_aux_coord(height_coord)

        cube2 = set_up_temperature_cube()
        height_coord = iris.coords.AuxCoord([10], standard_name="height")
        cube2.add_aux_coord(height_coord)

        cubes = iris.cube.CubeList([cube1, cube2])
        cube = cubes.merge_cube()

        data = np.array([[226.15, 230.15, 232.15], [237.4, 241.4, 243.4],
                         [248.65, 252.65, 254.65], [259.9, 263.9, 265.9],
                         [271.15, 275.15, 277.15], [282.4, 286.4, 288.4],
                         [293.65, 297.65, 299.65], [304.9, 308.9, 310.9],
                         [316.15, 320.15, 322.15], [226.15, 230.15, 232.15],
                         [237.4, 241.4, 243.4], [248.65, 252.65, 254.65],
                         [259.9, 263.9, 265.9], [271.15, 275.15, 277.15],
                         [282.4, 286.4, 288.4], [293.65, 297.65, 299.65],
                         [304.9, 308.9, 310.9], [316.15, 320.15, 322.15]])

        result = convert_cube_data_to_2d(cube)
        self.assertArrayAlmostEqual(result, data, decimal=5)
Esempio n. 2
0
    def test_basic_realizations_predictor(self):
        """
        Test that the plugin returns a numpy array.
        The ensemble realizations are the predictor.
        """
        initial_guess = [5, 1, 0, 1, 1, 1]
        initial_guess = np.array(initial_guess, dtype=np.float32)
        cube = set_up_wind_speed_cube()

        forecast_predictor = cube.copy()
        forecast_variance = cube.collapsed(
            "realization", iris.analysis.VARIANCE)
        truth = cube.collapsed("realization", iris.analysis.MAX)

        forecast_predictor_data = (
            convert_cube_data_to_2d(
                forecast_predictor).astype(np.float32))
        forecast_variance_data = (
            forecast_variance.data.flatten().astype(np.float32))
        truth_data = truth.data.flatten().astype(np.float32)

        sqrt_pi = np.sqrt(np.pi).astype(np.float32)

        predictor_of_mean_flag = "realizations"

        plugin = Plugin()
        result = plugin.truncated_normal_crps_minimiser(
            initial_guess, forecast_predictor_data, truth_data,
            forecast_variance_data, sqrt_pi, predictor_of_mean_flag)

        self.assertIsInstance(result, np.float64)
        self.assertAlmostEqual(result, 533.487612959)
Esempio n. 3
0
    def test_basic_members_predictor(self):
        """
        Test that the plugin returns a numpy float array with ensemble members
        as predictor.
        """
        initial_guess = [5, 1, 0, 1, 1, 1]
        initial_guess = np.array(initial_guess, dtype=np.float32)
        cube = set_up_temperature_cube()

        forecast_predictor = cube.copy()
        forecast_variance = cube.collapsed("realization",
                                           iris.analysis.VARIANCE)
        truth = cube.collapsed("realization", iris.analysis.MAX)

        forecast_predictor_data = (
            convert_cube_data_to_2d(forecast_predictor).astype(np.float32))
        forecast_variance_data = (forecast_variance.data.flatten().astype(
            np.float32))
        truth_data = truth.data.flatten().astype(np.float32)

        sqrt_pi = np.sqrt(np.pi).astype(np.float32)

        predictor_of_mean_flag = "members"

        plugin = Plugin()
        result = plugin.normal_crps_minimiser(initial_guess,
                                              forecast_predictor_data,
                                              truth_data,
                                              forecast_variance_data, sqrt_pi,
                                              predictor_of_mean_flag)

        self.assertIsInstance(result, np.float64)
        self.assertAlmostEqual(result, 4886.94724835)
Esempio n. 4
0
    def test_change_coordinate(self):
        """
        Test that the utility returns the expected data values
        when the cube is sliced along the longitude dimension.
        """
        data = self.data.flatten().reshape(9, 3).T.reshape(9, 3)

        result = convert_cube_data_to_2d(self.cube, coord="longitude")
        self.assertArrayAlmostEqual(result, data)
Esempio n. 5
0
    def test_no_transpose(self):
        """
        Test that the utility returns the expected data values
        when the cube is not transposed after slicing.
        """
        data = self.data.T

        result = convert_cube_data_to_2d(self.cube, transpose=False)
        self.assertArrayAlmostEqual(result, data)
Esempio n. 6
0
    def test_1d_cube(self):
        """
        Test that the utility returns the expected data values
        when a 1d cube is input.
        """
        cube = set_up_temperature_cube()
        cube = cube[0, 0, 0, :]
        data = np.array([[226.15, 237.4, 248.65]]).T

        result = convert_cube_data_to_2d(cube)
        self.assertArrayAlmostEqual(result, data, decimal=5)
Esempio n. 7
0
    def test_3d_cube(self):
        """
        Test that the utility returns the expected data values
        when a 3d cube is input.
        """
        cube = set_up_temperature_cube()
        cube = cube[0]
        data = np.array([[226.15, 237.4, 248.65, 259.9, 271.15,
                          282.4, 293.65, 304.9, 316.15]]).T

        result = convert_cube_data_to_2d(cube)
        self.assertArrayAlmostEqual(result, data)
Esempio n. 8
0
 def setUp(self):
     """Set up expected inputs."""
     super().setUp()
     # Set up cubes and associated data arrays for temperature.
     self.forecast_predictor_mean = (
         self.historic_temperature_forecast_cube.collapsed(
             "realization", iris.analysis.MEAN))
     self.forecast_predictor_realizations = (
         self.historic_temperature_forecast_cube.copy())
     self.forecast_variance = (
         self.historic_temperature_forecast_cube.collapsed(
             "realization", iris.analysis.VARIANCE))
     self.truth = (self.historic_temperature_forecast_cube.collapsed(
         "realization", iris.analysis.MAX))
     self.forecast_predictor_data = (
         self.forecast_predictor_mean.data.flatten().astype(np.float64))
     self.forecast_predictor_data_realizations = (convert_cube_data_to_2d(
         self.historic_temperature_forecast_cube.copy()).astype(np.float64))
     self.forecast_variance_data = (
         self.forecast_variance.data.flatten().astype(np.float64))
     self.truth_data = self.truth.data.flatten().astype(np.float64)
Esempio n. 9
0
 def test_check_values(self):
     """Test that the utility returns the expected data values."""
     result = convert_cube_data_to_2d(self.cube)
     self.assertArrayAlmostEqual(result, self.data)
Esempio n. 10
0
 def test_basic(self):
     """Test that the utility returns an iris.cube.Cube."""
     result = convert_cube_data_to_2d(self.cube)
     self.assertIsInstance(result, np.ndarray)
Esempio n. 11
0
    def crps_minimiser_wrapper(self, initial_guess, forecast_predictor, truth,
                               forecast_var, predictor_of_mean_flag,
                               distribution):
        """
        Function to pass a given minimisation function to the scipy minimize
        function to estimate optimised values for the coefficients.

        Args:
            initial_guess (List):
                List of optimised coefficients.
                Order of coefficients is [c, d, a, b].
            forecast_predictor (iris.cube.Cube):
                Cube containing the fields to be used as the predictor,
                either the ensemble mean or the ensemble realizations.
            truth (iris.cube.Cube):
                Cube containing the field, which will be used as truth.
            forecast_var (iris.cube.Cube):
                Cube containg the field containing the ensemble variance.
            predictor_of_mean_flag (String):
                String to specify the input to calculate the calibrated mean.
                Currently the ensemble mean ("mean") and the ensemble
                realizations ("realizations") are supported as the predictors.
            distribution (String):
                String used to access the appropriate minimisation function
                within self.minimisation_dict.

        Returns:
            optimised_coeffs (List):
                List of optimised coefficients.
                Order of coefficients is [c, d, a, b].

        """
        def calculate_percentage_change_in_last_iteration(allvecs):
            """
            Calculate the percentage change that has occurred within
            the last iteration of the minimisation. If the percentage change
            between the last iteration and the last-but-one iteration exceeds
            the threshold, a warning message is printed.

            Args:
                allvecs : List
                    List of numpy arrays containing the optimised coefficients,
                    after each iteration.
            """
            last_iteration_percentage_change = np.absolute(
                (allvecs[-1] - allvecs[-2]) / allvecs[-2]) * 100
            if (np.any(last_iteration_percentage_change >
                       self.TOLERATED_PERCENTAGE_CHANGE)):
                np.set_printoptions(suppress=True)
                msg = ("\nThe final iteration resulted in a percentage change "
                       "that is greater than the accepted threshold of 5% "
                       "i.e. {}. "
                       "\nA satisfactory minimisation has not been achieved. "
                       "\nLast iteration: {}, "
                       "\nLast-but-one iteration: {}"
                       "\nAbsolute difference: {}\n").format(
                           last_iteration_percentage_change, allvecs[-1],
                           allvecs[-2], np.absolute(allvecs[-2] - allvecs[-1]))
                warnings.warn(msg)

        try:
            minimisation_function = self.minimisation_dict[distribution]
        except KeyError as err:
            msg = ("Distribution requested {} is not supported in {}"
                   "Error message is {}".format(distribution,
                                                self.minimisation_dict, err))
            raise KeyError(msg)

        # Ensure predictor_of_mean_flag is valid.
        check_predictor_of_mean_flag(predictor_of_mean_flag)

        if predictor_of_mean_flag.lower() in ["mean"]:
            forecast_predictor_data = forecast_predictor.data.flatten()
            truth_data = truth.data.flatten()
            forecast_var_data = forecast_var.data.flatten()
        elif predictor_of_mean_flag.lower() in ["realizations"]:
            truth_data = truth.data.flatten()
            forecast_predictor = (enforce_coordinate_ordering(
                forecast_predictor, "realization"))
            forecast_predictor_data = convert_cube_data_to_2d(
                forecast_predictor)
            forecast_var_data = forecast_var.data.flatten()

        initial_guess = np.array(initial_guess, dtype=np.float32)
        forecast_predictor_data = forecast_predictor_data.astype(np.float32)
        forecast_var_data = forecast_var_data.astype(np.float32)
        truth_data = truth_data.astype(np.float32)
        sqrt_pi = np.sqrt(np.pi).astype(np.float32)

        optimised_coeffs = minimize(minimisation_function,
                                    initial_guess,
                                    args=(forecast_predictor_data, truth_data,
                                          forecast_var_data, sqrt_pi,
                                          predictor_of_mean_flag),
                                    method="Nelder-Mead",
                                    options={
                                        "maxiter": self.MAX_ITERATIONS,
                                        "return_all": True
                                    })
        if not optimised_coeffs.success:
            msg = ("Minimisation did not result in convergence after "
                   "{} iterations. \n{}".format(self.MAX_ITERATIONS,
                                                optimised_coeffs.message))
            warnings.warn(msg)
        calculate_percentage_change_in_last_iteration(optimised_coeffs.allvecs)
        return optimised_coeffs.x
    def process(self, initial_guess, forecast_predictor, truth, forecast_var,
                predictor_of_mean_flag, distribution):
        """
        Function to pass a given function to the scipy minimize
        function to estimate optimised values for the coefficients.

        If the predictor_of_mean_flag is the ensemble mean, this function
        estimates values for alpha, beta, gamma and delta based on the
        equation:
        N(alpha + beta * ensemble_mean, gamma + delta * ensemble_variance),
        where N is a chosen distribution.

        If the predictor_of_mean_flag is the ensemble realizations, this
        function estimates values for alpha, beta, gamma and delta based on the
        equation:

        .. math::
          N(alpha + beta0 * realization0 + beta1 * realization1,

          gamma + delta * ensemble\\_variance)

        where N is a chosen distribution and the number of beta terms
        depends on the number of realizations provided.

        Args:
            initial_guess (list):
                List of optimised coefficients.
                Order of coefficients is [gamma, delta, alpha, beta].
            forecast_predictor (iris.cube.Cube):
                Cube containing the fields to be used as the predictor,
                either the ensemble mean or the ensemble realizations.
            truth (iris.cube.Cube):
                Cube containing the field, which will be used as truth.
            forecast_var (iris.cube.Cube):
                Cube containg the field containing the ensemble variance.
            predictor_of_mean_flag (str):
                String to specify the input to calculate the calibrated mean.
                Currently the ensemble mean ("mean") and the ensemble
                realizations ("realizations") are supported as the predictors.
            distribution (str):
                String used to access the appropriate function for use in the
                minimisation within self.minimisation_dict.

        Returns:
            optimised_coeffs (list):
                List of optimised coefficients.
                Order of coefficients is [gamma, delta, alpha, beta].

        Raises:
            KeyError: If the distribution is not supported.

        Warns:
            Warning: If the minimisation did not converge.

        """
        def calculate_percentage_change_in_last_iteration(allvecs):
            """
            Calculate the percentage change that has occurred within
            the last iteration of the minimisation. If the percentage change
            between the last iteration and the last-but-one iteration exceeds
            the threshold, a warning message is printed.

            Args:
                allvecs (list):
                    List of numpy arrays containing the optimised coefficients,
                    after each iteration.

            Warns:
                Warning: If a satisfactory minimisation has not been achieved.
            """
            last_iteration_percentage_change = np.absolute(
                (allvecs[-1] - allvecs[-2]) / allvecs[-2]) * 100
            if (np.any(last_iteration_percentage_change >
                       self.TOLERATED_PERCENTAGE_CHANGE)):
                np.set_printoptions(suppress=True)
                msg = ("The final iteration resulted in a percentage change "
                       "that is greater than the accepted threshold of 5% "
                       "i.e. {}. "
                       "\nA satisfactory minimisation has not been achieved. "
                       "\nLast iteration: {}, "
                       "\nLast-but-one iteration: {}"
                       "\nAbsolute difference: {}\n").format(
                           last_iteration_percentage_change, allvecs[-1],
                           allvecs[-2], np.absolute(allvecs[-2] - allvecs[-1]))
                warnings.warn(msg)

        try:
            minimisation_function = self.minimisation_dict[distribution]
        except KeyError as err:
            msg = ("Distribution requested {} is not supported in {}"
                   "Error message is {}".format(distribution,
                                                self.minimisation_dict, err))
            raise KeyError(msg)

        # Ensure predictor_of_mean_flag is valid.
        check_predictor_of_mean_flag(predictor_of_mean_flag)

        if predictor_of_mean_flag.lower() == "mean":
            forecast_predictor_data = forecast_predictor.data.flatten()
            truth_data = truth.data.flatten()
            forecast_var_data = forecast_var.data.flatten()
        elif predictor_of_mean_flag.lower() == "realizations":
            truth_data = truth.data.flatten()
            forecast_predictor = (enforce_coordinate_ordering(
                forecast_predictor, "realization"))
            forecast_predictor_data = convert_cube_data_to_2d(
                forecast_predictor)
            forecast_var_data = forecast_var.data.flatten()

        # Increased precision is needed for stable coefficient calculation.
        # The resulting coefficients are cast to float32 prior to output.
        initial_guess = np.array(initial_guess, dtype=np.float64)
        forecast_predictor_data = forecast_predictor_data.astype(np.float64)
        forecast_var_data = forecast_var_data.astype(np.float64)
        truth_data = truth_data.astype(np.float64)
        sqrt_pi = np.sqrt(np.pi).astype(np.float64)

        optimised_coeffs = minimize(minimisation_function,
                                    initial_guess,
                                    args=(forecast_predictor_data, truth_data,
                                          forecast_var_data, sqrt_pi,
                                          predictor_of_mean_flag),
                                    method="Nelder-Mead",
                                    options={
                                        "maxiter": self.max_iterations,
                                        "return_all": True
                                    })

        if not optimised_coeffs.success:
            msg = ("Minimisation did not result in convergence after "
                   "{} iterations. \n{}".format(self.max_iterations,
                                                optimised_coeffs.message))
            warnings.warn(msg)
        calculate_percentage_change_in_last_iteration(optimised_coeffs.allvecs)
        return optimised_coeffs.x.astype(np.float32)
Esempio n. 13
0
    def _interpolate_percentiles(self, forecast_at_percentiles,
                                 desired_percentiles, bounds_pairing,
                                 percentile_coord):
        """
        Interpolation of forecast for a set of percentiles from an initial
        set of percentiles to a new set of percentiles. This is constructed
        by linearly interpolating between the original set of percentiles
        to a new set of percentiles.

        Args:
            forecast_at_percentiles (Iris CubeList or Iris Cube):
                Cube or CubeList expected to contain a percentile coordinate.
            desired_percentiles (Numpy array):
                Array of the desired percentiles.
            bounds_pairing (Tuple):
                Lower and upper bound to be used as the ends of the
                cumulative distribution function.
            percentile_coord (String):
                Name of required percentile coordinate.
        Returns:
            percentile_cube (iris cube.Cube):
                Cube containing values for the required diagnostic e.g.
                air_temperature at the required percentiles.

        """
        original_percentiles = (
            forecast_at_percentiles.coord(percentile_coord).points)

        # Ensure that the percentile dimension is first, so that the
        # conversion to a 2d array produces data in the desired order.
        forecast_at_percentiles = (enforce_coordinate_ordering(
            forecast_at_percentiles, percentile_coord))
        forecast_at_reshaped_percentiles = convert_cube_data_to_2d(
            forecast_at_percentiles, coord=percentile_coord)

        original_percentiles, forecast_at_reshaped_percentiles = (
            self._add_bounds_to_percentiles_and_forecast_at_percentiles(
                original_percentiles, forecast_at_reshaped_percentiles,
                bounds_pairing))

        forecast_at_interpolated_percentiles = (np.empty(
            (len(desired_percentiles),
             forecast_at_reshaped_percentiles.shape[0]),
            dtype=np.float32))
        for index in range(forecast_at_reshaped_percentiles.shape[0]):
            forecast_at_interpolated_percentiles[:, index] = np.interp(
                desired_percentiles, original_percentiles,
                forecast_at_reshaped_percentiles[index, :])

        # Reshape forecast_at_percentiles, so the percentiles dimension is
        # first, and any other dimension coordinates follow.
        forecast_at_percentiles_data = (restore_non_probabilistic_dimensions(
            forecast_at_interpolated_percentiles, forecast_at_percentiles,
            percentile_coord, len(desired_percentiles)))

        for template_cube in forecast_at_percentiles.slices_over(
                percentile_coord):
            template_cube.remove_coord(percentile_coord)
            break
        percentile_cube = create_cube_with_percentiles(
            desired_percentiles,
            template_cube,
            forecast_at_percentiles_data,
            custom_name=percentile_coord)
        return percentile_cube
Esempio n. 14
0
    def _apply_params(self, forecast_predictors, forecast_vars):
        """
        Function to apply EMOS coefficients to all required dates.

        Args:
            forecast_predictors (iris.cube.Cube):
                Cube containing the forecast predictor e.g. ensemble mean
                or ensemble realizations.
            forecast_vars (iris.cube.Cube):
                Cube containing the forecast variance e.g. ensemble variance.

        Returns:
            (tuple) : tuple containing:
                **calibrated_forecast_predictor** (iris.cube.Cube):
                    Cube containing the calibrated version of the
                    ensemble predictor, either the ensemble mean or
                    the ensemble realizations.
                **calibrated_forecast_variance** (iris.cube.Cube):
                    Cube containing the calibrated version of the
                    ensemble variance, either the ensemble mean or
                    the ensemble realizations.
        """
        optimised_coeffs = (
            dict(zip(self.coefficients_cube.coord("coefficient_name").points,
                     self.coefficients_cube.data)))

        # Calculate the predicted mean based on whether the coefficients
        # were estimated using the mean as the predictor or using the
        # ensemble realizations as the predictor.
        if self.predictor_of_mean_flag.lower() == "mean":
            # Calculate predicted mean = a + b*X, where X is the
            # raw ensemble mean. In this case, b = beta.
            a_and_b = [optimised_coeffs["alpha"], optimised_coeffs["beta"]]
            forecast_predictor_flat = forecast_predictors.data.flatten()
            col_of_ones = (
                np.ones(forecast_predictor_flat.shape, dtype=np.float32))
            ones_and_mean = (
                np.column_stack((col_of_ones, forecast_predictor_flat)))
            predicted_mean = np.dot(ones_and_mean, a_and_b)
            calibrated_forecast_predictor = forecast_predictors
        elif self.predictor_of_mean_flag.lower() == "realizations":
            # Calculate predicted mean = a + b*X, where X is the
            # raw ensemble mean. In this case, b = beta^2.
            beta_values = np.array([], dtype=np.float32)
            for key in optimised_coeffs.keys():
                if key.startswith("beta"):
                    beta_values = np.append(beta_values, optimised_coeffs[key])
            a_and_b = np.append(optimised_coeffs["alpha"], beta_values**2)
            forecast_predictor_flat = (
                convert_cube_data_to_2d(forecast_predictors))
            forecast_var_flat = forecast_vars.data.flatten()
            col_of_ones = np.ones(forecast_var_flat.shape, dtype=np.float32)
            ones_and_mean = (
                np.column_stack((col_of_ones, forecast_predictor_flat)))
            predicted_mean = np.dot(ones_and_mean, a_and_b)
            # Calculate mean of ensemble realizations, as only the
            # calibrated ensemble mean will be returned.
            calibrated_forecast_predictor = (
                forecast_predictors.collapsed(
                    "realization", iris.analysis.MEAN))

        xlen = len(forecast_predictors.coord(axis="x").points)
        ylen = len(forecast_predictors.coord(axis="y").points)

        calibrated_forecast_predictor.data = (
            np.reshape(predicted_mean, (ylen, xlen)))

        calibrated_forecast_var = forecast_vars
        # Calculating the predicted variance, based on the
        # raw variance S^2, where predicted variance = c + dS^2,
        # where c = (gamma)^2 and d = (delta)^2
        calibrated_forecast_var.data = (
            optimised_coeffs["gamma"]**2 +
            optimised_coeffs["delta"]**2 * forecast_vars.data)

        return calibrated_forecast_predictor, calibrated_forecast_var
Esempio n. 15
0
    def _interpolate_percentiles(
            self, forecast_at_percentiles, desired_percentiles,
            bounds_pairing):
        """
        Interpolation of forecast for a set of percentiles from an initial
        set of percentiles to a new set of percentiles. This is constructed
        by linearly interpolating between the original set of percentiles
        to a new set of percentiles.

        Parameters
        ----------
        forecast_at_percentiles : Iris CubeList or Iris Cube
            Cube or CubeList expected to contain a percentile coordinate.
        desired_percentiles : Numpy array
            Array of the desired percentiles.
        bounds_pairing : Tuple
            Lower and upper bound to be used as the ends of the
            cumulative distribution function.

        Returns
        -------
        percentile_cube : Iris cube
            Cube containing values for the required diagnostic e.g.
            air_temperature at the required percentiles.

        """
        original_percentiles = (
            forecast_at_percentiles.coord(
                "percentile_over_realization").points)

        # Ensure that the percentile dimension is first, so that the
        # conversion to a 2d array produces data in the desired order.
        forecast_at_percentiles = (
            ensure_dimension_is_the_zeroth_dimension(
                forecast_at_percentiles, "percentile_over_realization"))
        forecast_at_reshaped_percentiles = convert_cube_data_to_2d(
            forecast_at_percentiles, coord="percentile_over_realization")

        original_percentiles, forecast_at_reshaped_percentiles = (
            self._add_bounds_to_percentiles_and_forecast_at_percentiles(
                original_percentiles, forecast_at_reshaped_percentiles,
                bounds_pairing))

        forecast_at_interpolated_percentiles = (
            np.empty(
                (len(desired_percentiles),
                 forecast_at_reshaped_percentiles.shape[0])))
        for index in range(forecast_at_reshaped_percentiles.shape[0]):
            forecast_at_interpolated_percentiles[:, index] = np.interp(
                desired_percentiles, original_percentiles,
                forecast_at_reshaped_percentiles[index, :])

        # Reshape forecast_at_percentiles, so the percentiles dimension is
        # first, and any other dimension coordinates follow.
        forecast_at_percentiles_data = (
            restore_non_probabilistic_dimensions(
                forecast_at_interpolated_percentiles, forecast_at_percentiles,
                "percentile_over_realization", len(desired_percentiles)))

        for template_cube in forecast_at_percentiles.slices_over(
                "percentile_over_realization"):
            template_cube.remove_coord("percentile_over_realization")
            break
        percentile_cube = create_cube_with_percentiles(
            desired_percentiles, template_cube, forecast_at_percentiles_data)
        return percentile_cube
    def compute_initial_guess(self,
                              truth,
                              forecast_predictor,
                              predictor_of_mean_flag,
                              estimate_coefficients_from_linear_model_flag,
                              no_of_realizations=None):
        """
        Function to compute initial guess of the alpha, beta, gamma
        and delta components of the EMOS coefficients by linear regression
        of the forecast predictor and the truth, if requested. Otherwise,
        default values for the coefficients will be used.

        If the predictor_of_mean_flag is "mean", then the order of
        the initial_guess is [gamma, delta, alpha, beta]. Otherwise, if the
        predictor_of_mean_flag is "realizations" then the order of the
        initial_guess is [gamma, delta, alpha, beta0, beta1, beta2], where
        the number of beta variables will correspond to the number of
        realizations. In this example initial guess with three beta
        variables, there will correspondingly be three realizations.

        The coefficients relate to adjustments to the ensemble mean or the
        ensemble realizations, and adjustments to the ensemble variance:

        .. math::
            alpha + beta * ensemble\\_mean

        or

        .. math::
            alpha + beta0 * realization1 + beta1 * realization2

        .. math::
            gamma + delta * ensemble\\_variance

        The default values for the initial guesses are in
        [gamma, delta, alpha, beta] ordering:
        * For the ensemble mean, the default initial guess: [0, 1, 0, 1]
        assumes that the raw forecast is skilful and the expected adjustments
        are small.
        * For the ensemble realizations, the default initial guess is
        effectively: [0, 1, 0, 1/3., 1/3., 1/3.], such that
        each realization is assumed to have equal weight.

        If linear regression is enabled, the alpha and beta coefficients
        associated with the ensemble mean or ensemble realizations are
        modified based on the results from the linear regression fit.

        Args:
            truth (iris.cube.Cube):
                Cube containing the field, which will be used as truth.
            forecast_predictor (iris.cube.Cube):
                Cube containing the fields to be used as the predictor,
                either the ensemble mean or the ensemble realizations.
            predictor_of_mean_flag (str):
                String to specify the input to calculate the calibrated mean.
                Currently the ensemble mean ("mean") and the ensemble
                realizations ("realizations") are supported as the predictors.
            estimate_coefficients_from_linear_model_flag (bool):
                Flag whether coefficients should be estimated from
                the linear regression, or static estimates should be used.
            no_of_realizations (int):
                Number of realizations, if ensemble realizations are to be
                used as predictors. Default is None.

        Returns:
            initial_guess (list):
                List of coefficients to be used as initial guess.
                Order of coefficients is [gamma, delta, alpha, beta].

        """

        if (predictor_of_mean_flag.lower() == "mean"
                and not estimate_coefficients_from_linear_model_flag):
            initial_guess = [0, 1, 0, 1]
        elif (predictor_of_mean_flag.lower() == "realizations"
              and not estimate_coefficients_from_linear_model_flag):
            initial_guess = [0, 1, 0] + np.repeat(
                np.sqrt(1. / no_of_realizations), no_of_realizations).tolist()
        elif estimate_coefficients_from_linear_model_flag:
            if predictor_of_mean_flag.lower() == "mean":
                # Find all values that are not NaN.
                truth_not_nan = ~np.isnan(truth.data.flatten())
                forecast_not_nan = ~np.isnan(forecast_predictor.data.flatten())
                combined_not_nan = (np.all(np.row_stack(
                    [truth_not_nan, forecast_not_nan]),
                                           axis=0))
                if not any(combined_not_nan):
                    gradient, intercept = ([np.nan, np.nan])
                else:
                    gradient, intercept, _, _, _ = (stats.linregress(
                        forecast_predictor.data.flatten()[combined_not_nan],
                        truth.data.flatten()[combined_not_nan]))
                initial_guess = [0, 1, intercept, gradient]
            elif predictor_of_mean_flag.lower() == "realizations":
                if self.statsmodels_found:
                    truth_data = truth.data.flatten()
                    forecast_predictor = (enforce_coordinate_ordering(
                        forecast_predictor, "realization"))
                    forecast_data = np.array(convert_cube_data_to_2d(
                        forecast_predictor, transpose=False),
                                             dtype=np.float32)
                    # Find all values that are not NaN.
                    truth_not_nan = ~np.isnan(truth_data)
                    forecast_not_nan = ~np.isnan(forecast_data)
                    combined_not_nan = (np.all(np.row_stack(
                        [truth_not_nan, forecast_not_nan]),
                                               axis=0))
                    val = self.sm.add_constant(
                        forecast_data[:, combined_not_nan].T)
                    est = self.sm.OLS(truth_data[combined_not_nan], val).fit()
                    intercept = est.params[0]
                    gradient = est.params[1:]
                    initial_guess = [0, 1, intercept] + gradient.tolist()
                else:
                    initial_guess = (
                        [0, 1, 0] + np.repeat(np.sqrt(1. / no_of_realizations),
                                              no_of_realizations).tolist())
        return np.array(initial_guess, dtype=np.float32)
Esempio n. 17
0
    def compute_initial_guess(self,
                              truth,
                              forecast_predictor,
                              predictor_of_mean_flag,
                              estimate_coefficients_from_linear_model_flag,
                              no_of_realizations=None):
        """
        Function to compute initial guess of the a and beta components of the
        EMOS coefficients by linear regression of the forecast predictor
        and the truth, if requested. Otherwise, default values for a and b
        will be used.

        Default values have been chosen based on Figure 8 in the
        2017 ensemble calibration report available on the Science Plugin
        Documents Confluence page.

        Args:
            truth (Iris cube):
                Cube containing the field, which will be used as truth.
            forecast_predictor (Iris cube):
                Cube containing the fields to be used as the predictor,
                either the ensemble mean or the ensemble realizations.
            predictor_of_mean_flag (String):
                String to specify the input to calculate the calibrated mean.
                Currently the ensemble mean ("mean") and the ensemble
                realizations ("realizations") are supported as the predictors.
            estimate_coefficients_from_linear_model_flag (Logical):
                Flag whether coefficients should be estimated from
                the linear regression, or static estimates should be used.
            no_of_realizations (Int):
                Number of realizations, if ensemble realizations are to be
                used as predictors. Default is None.

        Returns:
            initial_guess (List):
                List of coefficients to be used as initial guess.
                Order of coefficients is [c, d, a, b].

        """

        if (predictor_of_mean_flag.lower() in ["mean"]
                and not estimate_coefficients_from_linear_model_flag):
            initial_guess = [1, 1, 0, 1]
        elif (predictor_of_mean_flag.lower() in ["realizations"]
              and not estimate_coefficients_from_linear_model_flag):
            initial_guess = [1, 1, 0] + np.repeat(1,
                                                  no_of_realizations).tolist()
        elif estimate_coefficients_from_linear_model_flag:
            if predictor_of_mean_flag.lower() in ["mean"]:
                # Find all values that are not NaN.
                truth_not_nan = ~np.isnan(truth.data.flatten())
                forecast_not_nan = ~np.isnan(forecast_predictor.data.flatten())
                combined_not_nan = (np.all(np.row_stack(
                    [truth_not_nan, forecast_not_nan]),
                                           axis=0))
                if not any(combined_not_nan):
                    gradient, intercept = ([np.nan, np.nan])
                else:
                    gradient, intercept, _, _, _ = (stats.linregress(
                        forecast_predictor.data.flatten()[combined_not_nan],
                        truth.data.flatten()[combined_not_nan]))
                initial_guess = [1, 1, intercept, gradient]
            elif predictor_of_mean_flag.lower() in ["realizations"]:
                if self.statsmodels_found:
                    truth_data = truth.data.flatten()
                    forecast_predictor = (enforce_coordinate_ordering(
                        forecast_predictor, "realization"))
                    forecast_data = np.array(
                        convert_cube_data_to_2d(forecast_predictor,
                                                transpose=False))
                    # Find all values that are not NaN.
                    truth_not_nan = ~np.isnan(truth_data)
                    forecast_not_nan = ~np.isnan(forecast_data)
                    combined_not_nan = (np.all(np.row_stack(
                        [truth_not_nan, forecast_not_nan]),
                                               axis=0))
                    val = self.sm.add_constant(
                        forecast_data[:, combined_not_nan].T)
                    est = self.sm.OLS(truth_data[combined_not_nan], val).fit()
                    intercept = est.params[0]
                    gradient = est.params[1:]
                    initial_guess = [1, 1, intercept] + gradient.tolist()
                else:
                    initial_guess = ([1, 1, 0] +
                                     np.repeat(1, no_of_realizations).tolist())
        return initial_guess
Esempio n. 18
0
    def _apply_params(self, forecast_predictors, forecast_vars,
                      optimised_coeffs, coeff_names, predictor_of_mean_flag):
        """
        Function to apply EMOS coefficients to all required dates.

        Args:
            forecast_predictors (Iris cube):
                Cube containing the forecast predictor e.g. ensemble mean
                or ensemble realizations.
            forecast_vars (Iris cube.):
                Cube containing the forecast variance e.g. ensemble variance.
            optimised_coeffs (List):
                Coefficients for all dates.
            coeff_names (List):
                Coefficient names.
            predictor_of_mean_flag (String):
                String to specify the input to calculate the calibrated mean.
                Currently the ensemble mean ("mean") and the ensemble
                realizations ("realizations") are supported as the predictors.

        Returns:
            (tuple) : tuple containing:
                **calibrated_forecast_predictor_all_dates** (CubeList):
                    List of cubes containing the calibrated forecast predictor.
                **calibrated_forecast_var_all_dates** (CubeList):
                    List of cubes containing the calibrated forecast variance.
                **calibrated_forecast_coefficients_all_dates** (CubeList):
                    List of cubes containing the coefficients used for
                    calibration.

        """
        calibrated_forecast_predictor_all_dates = iris.cube.CubeList()
        calibrated_forecast_var_all_dates = iris.cube.CubeList()
        calibrated_forecast_coefficients_all_dates = iris.cube.CubeList()

        for forecast_predictor, forecast_var in zip(
                forecast_predictors.slices_over("time"),
                forecast_vars.slices_over("time")):

            date = iris_time_to_datetime(
                forecast_predictor.coord("time").copy())[0]
            constr = iris.Constraint(time=date)
            forecast_predictor_at_date = forecast_predictor.extract(constr)
            forecast_var_at_date = forecast_var.extract(constr)

            # If the coefficients are not available for the date, use the
            # raw ensemble forecast as the calibrated ensemble forecast.
            if date not in optimised_coeffs.keys():
                msg = ("Ensemble calibration not available "
                       "for forecasts with start time of {}. "
                       "Coefficients not available".format(
                           date.strftime("%Y%m%d%H%M")))
                warnings.warn(msg)
                calibrated_forecast_predictor_at_date = (
                    forecast_predictor_at_date.copy())
                calibrated_forecast_var_at_date = forecast_var_at_date.copy()
                optimised_coeffs[date] = np.full(len(coeff_names), np.nan)
                coeff_cubes = self._create_coefficient_cube(
                    forecast_predictor_at_date, optimised_coeffs, coeff_names)
            else:
                optimised_coeffs_at_date = (optimised_coeffs[date])

                # Assigning coefficients to coefficient names.
                if len(optimised_coeffs_at_date) == len(coeff_names):
                    optimised_coeffs_at_date = dict(
                        zip(coeff_names, optimised_coeffs_at_date))
                elif len(optimised_coeffs_at_date) > len(coeff_names):
                    excess_beta = (
                        optimised_coeffs_at_date[len(coeff_names):].tolist())
                    optimised_coeffs_at_date = (dict(
                        list(zip(coeff_names, optimised_coeffs_at_date))))
                    optimised_coeffs_at_date["beta"] = np.array(
                        [optimised_coeffs_at_date["beta"]] + excess_beta)
                else:
                    msg = ("Number of coefficient names {} with names {} "
                           "is not equal to the number of "
                           "optimised_coeffs_at_date values {} "
                           "with values {} or the number of "
                           "coefficients is not greater than the "
                           "number of coefficient names. Can not continue "
                           "if the number of coefficient names out number "
                           "the number of coefficients".format(
                               len(coeff_names), coeff_names,
                               len(optimised_coeffs_at_date),
                               optimised_coeffs_at_date))
                    raise ValueError(msg)

                if predictor_of_mean_flag.lower() in ["mean"]:
                    # Calculate predicted mean = a + b*X, where X is the
                    # raw ensemble mean. In this case, b = beta.
                    beta = [
                        optimised_coeffs_at_date["a"],
                        optimised_coeffs_at_date["beta"]
                    ]
                    forecast_predictor_flat = (
                        forecast_predictor_at_date.data.flatten())
                    new_col = np.ones(forecast_predictor_flat.shape)
                    all_data = np.column_stack(
                        (new_col, forecast_predictor_flat))
                    predicted_mean = np.dot(all_data, beta)
                    calibrated_forecast_predictor_at_date = (
                        forecast_predictor_at_date)
                elif predictor_of_mean_flag.lower() in ["realizations"]:
                    # Calculate predicted mean = a + b*X, where X is the
                    # raw ensemble mean. In this case, b = beta^2.
                    beta = np.concatenate([[optimised_coeffs_at_date["a"]],
                                           optimised_coeffs_at_date["beta"]**2
                                           ])
                    forecast_predictor = (enforce_coordinate_ordering(
                        forecast_predictor, "realization"))
                    forecast_predictor_flat = (
                        convert_cube_data_to_2d(forecast_predictor_at_date))
                    forecast_var_flat = forecast_var_at_date.data.flatten()

                    new_col = np.ones(forecast_var_flat.shape)
                    all_data = (np.column_stack(
                        (new_col, forecast_predictor_flat)))
                    predicted_mean = np.dot(all_data, beta)
                    # Calculate mean of ensemble realizations, as only the
                    # calibrated ensemble mean will be returned.
                    calibrated_forecast_predictor_at_date = (
                        forecast_predictor_at_date.collapsed(
                            "realization", iris.analysis.MEAN))

                xlen = len(forecast_predictor_at_date.coord(axis="x").points)
                ylen = len(forecast_predictor_at_date.coord(axis="y").points)
                predicted_mean = np.reshape(predicted_mean, (ylen, xlen))
                calibrated_forecast_predictor_at_date.data = predicted_mean

                # Calculating the predicted variance, based on the
                # raw variance S^2, where predicted variance = c + dS^2,
                # where c = (gamma)^2 and d = (delta)^2
                predicted_var = (optimised_coeffs_at_date["gamma"]**2 +
                                 optimised_coeffs_at_date["delta"]**2 *
                                 forecast_var_at_date.data)

                calibrated_forecast_var_at_date = forecast_var_at_date
                calibrated_forecast_var_at_date.data = predicted_var

                coeff_cubes = self._create_coefficient_cube(
                    calibrated_forecast_predictor_at_date,
                    optimised_coeffs[date], coeff_names)

            calibrated_forecast_predictor_all_dates.append(
                calibrated_forecast_predictor_at_date)
            calibrated_forecast_var_all_dates.append(
                calibrated_forecast_var_at_date)
            calibrated_forecast_coefficients_all_dates.extend(coeff_cubes)

        return (calibrated_forecast_predictor_all_dates,
                calibrated_forecast_var_all_dates,
                calibrated_forecast_coefficients_all_dates)
Esempio n. 19
0
    def _probabilities_to_percentiles(self, forecast_probabilities,
                                      percentiles, bounds_pairing):
        """
        Conversion of probabilities to percentiles through the construction
        of an cumulative distribution function. This is effectively
        constructed by linear interpolation from the probabilities associated
        with each threshold to a set of percentiles.

        Args:
            forecast_probabilities (Iris cube):
                Cube with a threshold coordinate.
            percentiles (Numpy array):
                Array of percentiles, at which the corresponding values will be
                calculated.
            bounds_pairing (Tuple):
                Lower and upper bound to be used as the ends of the
                cumulative distribution function.
        Returns:
            percentile_cube (Iris cube):
                Cube containing values for the required diagnostic e.g.
                air_temperature at the required percentiles.

        """
        threshold_coord = forecast_probabilities.coord("threshold")
        threshold_unit = forecast_probabilities.coord("threshold").units
        threshold_points = threshold_coord.points

        # Ensure that the percentile dimension is first, so that the
        # conversion to a 2d array produces data in the desired order.
        forecast_probabilities = (enforce_coordinate_ordering(
            forecast_probabilities, threshold_coord.name()))
        prob_slices = convert_cube_data_to_2d(forecast_probabilities,
                                              coord=threshold_coord.name())

        # The requirement below for a monotonically changing probability
        # across thresholds can be thwarted by precision errors of order 1E-10,
        # as such, here we round to a precision of 9 decimal places.
        prob_slices = np.around(prob_slices, 9)

        # Invert probabilities for data thresholded above thresholds.
        relation = forecast_probabilities.attributes['relative_to_threshold']
        if relation == 'above':
            probabilities_for_cdf = 1 - prob_slices
        elif relation == 'below':
            probabilities_for_cdf = prob_slices
        else:
            msg = ("Probabilities to percentiles only implemented for "
                   "thresholds above or below a given value."
                   "The relation to threshold is given as {}".format(relation))
            raise NotImplementedError(msg)

        threshold_points, probabilities_for_cdf = (
            self._add_bounds_to_thresholds_and_probabilities(
                threshold_points, probabilities_for_cdf, bounds_pairing))

        if np.any(np.diff(probabilities_for_cdf) < 0):
            msg = ("The probability values used to construct the "
                   "Cumulative Distribution Function (CDF) "
                   "must be ascending i.e. in order to yield "
                   "a monotonically increasing CDF."
                   "The probabilities are {}".format(probabilities_for_cdf))
            warnings.warn(msg)

        # Convert percentiles into fractions.
        percentiles = np.array([x / 100.0 for x in percentiles],
                               dtype=np.float32)

        forecast_at_percentiles = (np.empty(
            (len(percentiles), probabilities_for_cdf.shape[0]),
            dtype=np.float32))
        for index in range(probabilities_for_cdf.shape[0]):
            forecast_at_percentiles[:, index] = np.interp(
                percentiles, probabilities_for_cdf[index, :], threshold_points)

        # Convert percentiles back into percentages.
        percentiles = np.array([x * 100.0 for x in percentiles],
                               dtype=np.float32)

        # Reshape forecast_at_percentiles, so the percentiles dimension is
        # first, and any other dimension coordinates follow.
        forecast_at_percentiles = (restore_non_probabilistic_dimensions(
            forecast_at_percentiles, forecast_probabilities,
            threshold_coord.name(), len(percentiles)))

        for template_cube in forecast_probabilities.slices_over(
                threshold_coord.name()):
            template_cube.rename(template_cube.name().replace(
                "probability_of_", ""))
            template_cube.remove_coord(threshold_coord.name())
            template_cube.attributes.pop('relative_to_threshold')
            break
        percentile_cube = create_cube_with_percentiles(
            percentiles,
            template_cube,
            forecast_at_percentiles,
            custom_name='percentile',
            cube_unit=threshold_unit)
        return percentile_cube
Esempio n. 20
0
    def _probabilities_to_percentiles(self, forecast_probabilities,
                                      percentiles, bounds_pairing):
        """
        Conversion of probabilities to percentiles through the construction
        of an cumulative distribution function. This is effectively
        constructed by linear interpolation from the probabilities associated
        with each threshold to a set of percentiles.

        Parameters
        ----------
        forecast_probabilities : Iris cube
            Cube with a threshold coordinate.
        percentiles : Numpy array
            Array of percentiles, at which the corresponding values will be
            calculated.
        bounds_pairing : Tuple
            Lower and upper bound to be used as the ends of the
            cumulative distribution function.

        Returns
        -------
        percentile_cube : Iris cube
            Cube containing values for the required diagnostic e.g.
            air_temperature at the required percentiles.

        """
        threshold_coord = forecast_probabilities.coord("threshold")
        threshold_points = threshold_coord.points

        # Ensure that the percentile dimension is first, so that the
        # conversion to a 2d array produces data in the desired order.
        forecast_probabilities = (ensure_dimension_is_the_zeroth_dimension(
            forecast_probabilities, threshold_coord.name()))
        prob_slices = convert_cube_data_to_2d(forecast_probabilities,
                                              coord=threshold_coord.name())

        # Invert probabilities
        probabilities_for_cdf = 1 - prob_slices

        threshold_points, probabilities_for_cdf = (
            self._add_bounds_to_thresholds_and_probabilities(
                threshold_points, probabilities_for_cdf, bounds_pairing))

        if np.any(np.diff(probabilities_for_cdf) < 0):
            msg = ("The probability values used to construct the "
                   "Cumulative Distribution Function (CDF) "
                   "must be ascending i.e. in order to yield "
                   "a monotonically increasing CDF."
                   "The probabilities are {}".format(probabilities_for_cdf))
            raise ValueError(msg)

        # Convert percentiles into fractions.
        percentiles = [x / 100.0 for x in percentiles]

        forecast_at_percentiles = (np.empty(
            (len(percentiles), probabilities_for_cdf.shape[0])))
        for index in range(probabilities_for_cdf.shape[0]):
            forecast_at_percentiles[:, index] = np.interp(
                percentiles, probabilities_for_cdf[index, :], threshold_points)

        # Convert percentiles back into percentages.
        percentiles = [x * 100.0 for x in percentiles]

        # Reshape forecast_at_percentiles, so the percentiles dimension is
        # first, and any other dimension coordinates follow.
        forecast_at_percentiles = (restore_non_probabilistic_dimensions(
            forecast_at_percentiles, forecast_probabilities,
            threshold_coord.name(), len(percentiles)))

        for template_cube in forecast_probabilities.slices_over(
                threshold_coord.name()):
            template_cube.remove_coord(threshold_coord.name())
            break
        percentile_cube = create_cube_with_percentiles(
            percentiles, template_cube, forecast_at_percentiles)
        return percentile_cube