Beispiel #1
0
 def test_inconsistent_mask_along_leading_dim(self):
     """Test an inconsistently masked array raises an error."""
     mask = np.array([
         [[True, False, False, True], [True, False, True, True]],
         [[True, False, True, True], [True, False, True, True]],
         [[True, False, True, True], [True, False, True, False]],
     ])
     masked_data_array = np.ma.MaskedArray(self.data_array, mask)
     expected_message = "The mask on the input array is not the same"
     with self.assertRaisesRegex(ValueError, expected_message):
         flatten_ignoring_masked_data(masked_data_array,
                                      preserve_leading_dimension=True)
Beispiel #2
0
 def test_1D_input(self):
     """Test input array is unchanged when input in 1D"""
     data_array = self.data_array.flatten()
     expected_result = data_array.copy()
     result = flatten_ignoring_masked_data(data_array)
     self.assertArrayAlmostEqual(result, expected_result)
     self.assertEqual(result.dtype, np.float32)
Beispiel #3
0
 def test_basic_not_masked_preserver_leading_dim(self):
     """Test a basic unmasked array, with preserve_leading_dimension"""
     result = flatten_ignoring_masked_data(self.data_array,
                                           preserve_leading_dimension=True)
     self.assertArrayAlmostEqual(result,
                                 self.expected_result_preserve_leading_dim)
     self.assertEqual(result.dtype, np.float32)
Beispiel #4
0
 def test_basic_masked(self):
     """Test a basic masked array"""
     masked_data_array = np.ma.MaskedArray(self.data_array, self.mask)
     expected_result = np.array([1.0, 5.0, 9.0, 13.0, 17.0, 21.0], dtype=np.float32)
     result = flatten_ignoring_masked_data(masked_data_array)
     self.assertArrayAlmostEqual(result, expected_result)
     self.assertEqual(result.dtype, np.float32)
Beispiel #5
0
 def test_all_masked(self):
     """Test empty array is returned when all points are masked."""
     mask = np.ones((3, 2, 4)) * True
     masked_data_array = np.ma.MaskedArray(self.data_array, mask)
     expected_result = np.array([], dtype=np.float32)
     result = flatten_ignoring_masked_data(masked_data_array)
     self.assertArrayAlmostEqual(result, expected_result)
     self.assertEqual(result.dtype, np.float32)
Beispiel #6
0
 def test_4D_input_not_masked_preserve_leading_dim(self):
     """Test input array is unchanged when input in 4D.
        This should give the same answer as the corresponding 3D array."""
     data_array = self.data_array.reshape((3, 2, 2, 2))
     result = flatten_ignoring_masked_data(data_array,
                                           preserve_leading_dimension=True)
     self.assertArrayAlmostEqual(result,
                                 self.expected_result_preserve_leading_dim)
     self.assertEqual(result.dtype, np.float32)
Beispiel #7
0
    def test_basic_masked_preserver_leading_dim(self):
        """Test a basic masked array, with preserve_leading_dimension"""

        masked_data_array = np.ma.MaskedArray(self.data_array, self.mask)
        expected_result = np.array([[1.0, 5.0], [9.0, 13.0], [17.0, 21.0]],
                                   dtype=np.float32)
        result = flatten_ignoring_masked_data(masked_data_array,
                                              preserve_leading_dimension=True)
        self.assertArrayAlmostEqual(result, expected_result)
        self.assertEqual(result.dtype, np.float32)
Beispiel #8
0
 def test_basic_not_masked(self):
     """Test a basic unmasked array"""
     expected_result = np.arange(0, 24, 1, dtype=np.float32)
     result = flatten_ignoring_masked_data(self.data_array)
     self.assertArrayAlmostEqual(result, expected_result)
     self.assertEqual(result.dtype, np.float32)
    def compute_initial_guess(
            self, truth, forecast_predictor, predictor,
            estimate_coefficients_from_linear_model_flag,
            no_of_realizations=None):
        """
        Function to compute initial guess of the alpha, beta, gamma
        and delta components of the EMOS coefficients by linear regression
        of the forecast predictor and the truth, if requested. Otherwise,
        default values for the coefficients will be used.

        If the predictor is "mean", then the order of the initial_guess is
        [gamma, delta, alpha, beta]. Otherwise, if the predictor is
        "realizations" then the order of the initial_guess is
        [gamma, delta, alpha, beta0, beta1, beta2], where the number of beta
        variables will correspond to the number of realizations. In this
        example initial guess with three beta variables, there will
        correspondingly be three realizations.

        The default values for the initial guesses are in
        [gamma, delta, alpha, beta] ordering:

        * For the ensemble mean, the default initial guess: [0, 1, 0, 1]
          assumes that the raw forecast is skilful and the expected adjustments
          are small.

        * For the ensemble realizations, the default initial guess is
          effectively: [0, 1, 0, 1/3., 1/3., 1/3.], such that
          each realization is assumed to have equal weight.

        If linear regression is enabled, the alpha and beta coefficients
        associated with the ensemble mean or ensemble realizations are
        modified based on the results from the linear regression fit.

        Args:
            truth (iris.cube.Cube):
                Cube containing the field, which will be used as truth.
            forecast_predictor (iris.cube.Cube):
                Cube containing the fields to be used as the predictor,
                either the ensemble mean or the ensemble realizations.
            predictor (str):
                String to specify the form of the predictor used to calculate
                the location parameter when estimating the EMOS coefficients.
                Currently the ensemble mean ("mean") and the ensemble
                realizations ("realizations") are supported as the predictors.
            estimate_coefficients_from_linear_model_flag (bool):
                Flag whether coefficients should be estimated from
                the linear regression, or static estimates should be used.
            no_of_realizations (int):
                Number of realizations, if ensemble realizations are to be
                used as predictors. Default is None.

        Returns:
            list of float:
                List of coefficients to be used as initial guess.
                Order of coefficients is [gamma, delta, alpha, beta].

        """
        if (predictor.lower() == "mean" and
                not estimate_coefficients_from_linear_model_flag):
            initial_guess = [0, 1, 0, 1]
        elif (predictor.lower() == "realizations" and
              not estimate_coefficients_from_linear_model_flag):
            initial_guess = [0, 1, 0] + np.repeat(
                np.sqrt(1. / no_of_realizations), no_of_realizations).tolist()
        elif estimate_coefficients_from_linear_model_flag:
            truth_flattened = flatten_ignoring_masked_data(truth.data)
            if predictor.lower() == "mean":
                forecast_predictor_flattened = flatten_ignoring_masked_data(
                    forecast_predictor.data)
                if (truth_flattened.size == 0) or (
                        forecast_predictor_flattened.size == 0):
                    gradient, intercept = ([np.nan, np.nan])
                else:
                    gradient, intercept, _, _, _ = (
                        stats.linregress(
                            forecast_predictor_flattened, truth_flattened))
                initial_guess = [0, 1, intercept, gradient]
            elif predictor.lower() == "realizations":
                if self.statsmodels_found:
                    enforce_coordinate_ordering(
                        forecast_predictor, "realization")
                    forecast_predictor_flattened = (
                        flatten_ignoring_masked_data(
                            forecast_predictor.data,
                            preserve_leading_dimension=True))
                    val = self.sm.add_constant(forecast_predictor_flattened.T)
                    est = self.sm.OLS(truth_flattened, val).fit()
                    intercept = est.params[0]
                    gradient = est.params[1:]
                    initial_guess = [0, 1, intercept]+gradient.tolist()
                else:
                    initial_guess = (
                        [0, 1, 0] +
                        np.repeat(np.sqrt(1./no_of_realizations),
                                  no_of_realizations).tolist())
        return np.array(initial_guess, dtype=np.float32)
    def process(
            self, initial_guess, forecast_predictor, truth, forecast_var,
            predictor, distribution):
        """
        Function to pass a given function to the scipy minimize
        function to estimate optimised values for the coefficients.

        Further information is available in the :mod:`module level docstring \
<improver.calibration.ensemble_calibration>`.

        Args:
            initial_guess (list):
                List of optimised coefficients.
                Order of coefficients is [gamma, delta, alpha, beta].
            forecast_predictor (iris.cube.Cube):
                Cube containing the fields to be used as the predictor,
                either the ensemble mean or the ensemble realizations.
            truth (iris.cube.Cube):
                Cube containing the field, which will be used as truth.
            forecast_var (iris.cube.Cube):
                Cube containing the field containing the ensemble variance.
            predictor (str):
                String to specify the form of the predictor used to calculate
                the location parameter when estimating the EMOS coefficients.
                Currently the ensemble mean ("mean") and the ensemble
                realizations ("realizations") are supported as the predictors.
            distribution (str):
                String used to access the appropriate function for use in the
                minimisation within self.minimisation_dict.

        Returns:
            list of float:
                List of optimised coefficients.
                Order of coefficients is [gamma, delta, alpha, beta].

        Raises:
            KeyError: If the distribution is not supported.

        Warns:
            Warning: If the minimisation did not converge.

        """
        def calculate_percentage_change_in_last_iteration(allvecs):
            """
            Calculate the percentage change that has occurred within
            the last iteration of the minimisation. If the percentage change
            between the last iteration and the last-but-one iteration exceeds
            the threshold, a warning message is printed.

            Args:
                allvecs (list):
                    List of numpy arrays containing the optimised coefficients,
                    after each iteration.

            Warns:
                Warning: If a satisfactory minimisation has not been achieved.
            """
            last_iteration_percentage_change = np.absolute(
                (allvecs[-1] - allvecs[-2]) / allvecs[-2])*100
            if (np.any(last_iteration_percentage_change >
                       self.TOLERATED_PERCENTAGE_CHANGE)):
                np.set_printoptions(suppress=True)
                msg = ("The final iteration resulted in a percentage change "
                       "that is greater than the accepted threshold of 5% "
                       "i.e. {}. "
                       "\nA satisfactory minimisation has not been achieved. "
                       "\nLast iteration: {}, "
                       "\nLast-but-one iteration: {}"
                       "\nAbsolute difference: {}\n").format(
                           last_iteration_percentage_change, allvecs[-1],
                           allvecs[-2], np.absolute(allvecs[-2]-allvecs[-1]))
                warnings.warn(msg)

        try:
            minimisation_function = self.minimisation_dict[distribution]
        except KeyError as err:
            msg = ("Distribution requested {} is not supported in {}"
                   "Error message is {}".format(
                       distribution, self.minimisation_dict, err))
            raise KeyError(msg)

        # Ensure predictor is valid.
        check_predictor(predictor)

        # Flatten the data arrays and remove any missing data.
        truth_data = flatten_ignoring_masked_data(truth.data)
        forecast_var_data = flatten_ignoring_masked_data(forecast_var.data)
        if predictor.lower() == "mean":
            forecast_predictor_data = flatten_ignoring_masked_data(
                forecast_predictor.data)
        elif predictor.lower() == "realizations":
            enforce_coordinate_ordering(forecast_predictor, "realization")
            # Need to transpose this array so there are columns for each
            # ensemble member rather than rows.
            forecast_predictor_data = flatten_ignoring_masked_data(
                forecast_predictor.data, preserve_leading_dimension=True).T

        # Increased precision is needed for stable coefficient calculation.
        # The resulting coefficients are cast to float32 prior to output.
        initial_guess = np.array(initial_guess, dtype=np.float64)
        forecast_predictor_data = forecast_predictor_data.astype(np.float64)
        forecast_var_data = forecast_var_data.astype(np.float64)
        truth_data = truth_data.astype(np.float64)
        sqrt_pi = np.sqrt(np.pi).astype(np.float64)
        optimised_coeffs = minimize(
            minimisation_function, initial_guess,
            args=(forecast_predictor_data, truth_data,
                  forecast_var_data, sqrt_pi, predictor),
            method="Nelder-Mead", tol=self.tolerance,
            options={"maxiter": self.max_iterations, "return_all": True})

        if not optimised_coeffs.success:
            msg = ("Minimisation did not result in convergence after "
                   "{} iterations. \n{}".format(
                       self.max_iterations, optimised_coeffs.message))
            warnings.warn(msg)
        calculate_percentage_change_in_last_iteration(optimised_coeffs.allvecs)
        return optimised_coeffs.x.astype(np.float32)