def test_emcam_lowest_bin_non_monotonic(reliability_table_slice):
    """Test expected values are returned where the lowest observation
    count bin is non-monotonic."""
    expected_data = np.array([[1000, 500, 500, 750], [250, 500, 750, 1000],
                              [2000, 1000, 1000, 1000]])

    expected_bin_coord_points = np.array([0.2, 0.5, 0.7, 0.9],
                                         dtype=np.float32)

    expected_bin_coord_bounds = np.array(
        [[0.0, 0.4], [0.4, 0.6], [0.6, 0.8], [0.8, 1.0]],
        dtype=np.float32,
    )

    reliability_table_slice.data = np.array([
        [1000, 0, 250, 500, 750],  # Observation count
        [0, 250, 500, 750, 1000],  # Sum of forecast probability
        [1000, 1000, 1000, 1000, 1000],  # Forecast count
    ])
    result = Plugin()._enforce_min_count_and_montonicity(
        reliability_table_slice.copy())
    assert_array_equal(result.data, expected_data)
    assert_allclose(
        result.coord("probability_bin").points, expected_bin_coord_points)
    assert_allclose(
        result.coord("probability_bin").bounds, expected_bin_coord_bounds)
def test_emcam_combine_undersampled_bins_non_monotonic(
        reliability_table_slice):
    """Test expected values are returned when a bin is below the minimum
    forecast count when the observed frequency is non-monotonic."""

    expected_data = np.array([[1000, 425, 1000], [1000, 425, 1000],
                              [2000, 600, 1000]])
    expected_bin_coord_points = np.array([0.2, 0.6, 0.9], dtype=np.float32)
    expected_bin_coord_bounds = np.array(
        [[0.0, 0.4], [0.4, 0.8], [0.8, 1.0]],
        dtype=np.float32,
    )
    reliability_table_slice.data = np.array(
        [
            [750, 250, 50, 375, 1000],  # Observation count
            [750, 250, 50, 375, 1000],  # Sum of forecast probability
            [1000, 1000, 100, 500, 1000],  # Forecast count
        ],
        dtype=np.float32,
    )

    result = Plugin()._enforce_min_count_and_montonicity(
        reliability_table_slice.copy())
    assert_array_equal(result.data, expected_data)
    assert_allclose(
        result.coord("probability_bin").points, expected_bin_coord_points)
    assert_allclose(
        result.coord("probability_bin").bounds, expected_bin_coord_bounds)
 def setUp(self):
     """Set up monotonic bins as default and plugin for testing."""
     super().setUp()
     self.obs_count = np.array([0, 250, 500, 750, 1000], dtype=np.float32)
     self.forecast_probability_sum = np.array([0, 250, 500, 750, 1000],
                                              dtype=np.float32)
     self.plugin = Plugin()
Example #4
0
def process(
    reliability_table: cli.inputcube,
    *,
    minimum_forecast_count: int = 200,
):
    """
    Manipulate a reliability table to ensure sufficient sample counts in
    as many bins as possible by combining bins with low sample counts.
    Also enforces a monotonic observation frequency.

    Args:
        reliability_table (iris.cube.Cube):
            The reliability table that needs to be manipulated after the
            spatial dimensions have been aggregated.
        minimum_forecast_count (int):
            The minimum number of forecast counts in a forecast probability
            bin for it to be used in calibration.
            The default value of 200 is that used in Flowerdew 2014.

    Returns:
        iris.cube.CubeList:
            The reliability table that has been manipulated to ensure
            sufficient sample counts in each probability bin and a monotonic
            observation frequency.
            The cubelist contains a separate cube for each threshold in
            the original reliability table.
    """
    from improver.calibration.reliability_calibration import ManipulateReliabilityTable

    plugin = ManipulateReliabilityTable(
        minimum_forecast_count=minimum_forecast_count)
    return plugin(reliability_table)
class Test__combine_undersampled_bins(Test_setup):
    """Test the _combine_undersampled_bins method."""
    def setUp(self):
        """Set up monotonic bins as default and plugin for testing."""
        super().setUp()
        self.obs_count = np.array([0, 250, 500, 750, 1000], dtype=np.float32)
        self.forecast_probability_sum = np.array([0, 250, 500, 750, 1000],
                                                 dtype=np.float32)
        self.plugin = Plugin()

    def test_no_undersampled_bins(self):
        """Test no bins are combined when no bins are under-sampled."""
        forecast_count = np.array([1000, 1000, 1000, 1000, 1000],
                                  dtype=np.float32)

        result = self.plugin._combine_undersampled_bins(
            self.obs_count,
            self.forecast_probability_sum,
            forecast_count,
            self.probability_bin_coord,
        )

        assert_array_equal(
            result[:3],
            [self.obs_count, self.forecast_probability_sum, forecast_count])
        self.assertEqual(result[3], self.probability_bin_coord)

    def test_poorly_sampled_bins(self):
        """Test when all bins are poorly sampled and the minimum forecast count
        cannot be reached."""
        obs_count = np.array([0, 2, 5, 8, 10], dtype=np.float32)
        forecast_probability_sum = np.array([0, 2, 5, 8, 10], dtype=np.float32)
        forecast_count = np.array([10, 10, 10, 10, 10], dtype=np.float32)

        expected = np.array([
            [25],  # Observation count
            [25],  # Sum of forecast probability
            [50],  # Forecast count
        ])

        result = self.plugin._combine_undersampled_bins(
            obs_count,
            forecast_probability_sum,
            forecast_count,
            self.probability_bin_coord,
        )

        assert_array_equal(result[:3], expected)
        expected_bin_coord_points = np.array([0.5], dtype=np.float32)
        expected_bin_coord_bounds = np.array(
            [[0.0, 1.0]],
            dtype=np.float32,
        )
        assert_allclose(expected_bin_coord_points, result[3].points)
        assert_allclose(expected_bin_coord_bounds, result[3].bounds)

    def test_one_undersampled_bin_at_top(self):
        """Test when the highest probability bin is under-sampled."""
        obs_count = np.array([0, 250, 500, 750, 100], dtype=np.float32)
        forecast_probability_sum = np.array([0, 250, 500, 750, 100],
                                            dtype=np.float32)
        forecast_count = np.array([1000, 1000, 1000, 1000, 100],
                                  dtype=np.float32)

        expected = np.array([
            [0, 250, 500, 850],  # Observation count
            [0, 250, 500, 850],  # Sum of forecast probability
            [1000, 1000, 1000, 1100],  # Forecast count
        ])
        result = self.plugin._combine_undersampled_bins(
            obs_count,
            forecast_probability_sum,
            forecast_count,
            self.probability_bin_coord,
        )

        assert_array_equal(result[:3], expected)
        expected_bin_coord_points = np.array([0.1, 0.3, 0.5, 0.8],
                                             dtype=np.float32)
        expected_bin_coord_bounds = np.array(
            [[0.0, 0.2], [0.2, 0.4], [0.4, 0.6], [0.6, 1.0]],
            dtype=np.float32,
        )
        assert_allclose(expected_bin_coord_points, result[3].points)
        assert_allclose(expected_bin_coord_bounds, result[3].bounds)

    def test_one_undersampled_bin_at_bottom(self):
        """Test when the lowest probability bin is under-sampled."""
        forecast_count = np.array([100, 1000, 1000, 1000, 1000],
                                  dtype=np.float32)

        expected = np.array([
            [250, 500, 750, 1000],  # Observation count
            [250, 500, 750, 1000],  # Sum of forecast probability
            [1100, 1000, 1000, 1000],  # Forecast count
        ])
        result = self.plugin._combine_undersampled_bins(
            self.obs_count,
            self.forecast_probability_sum,
            forecast_count,
            self.probability_bin_coord,
        )

        assert_array_equal(result[:3], expected)
        expected_bin_coord_points = np.array([0.2, 0.5, 0.7, 0.9],
                                             dtype=np.float32)
        expected_bin_coord_bounds = np.array(
            [[0.0, 0.4], [0.4, 0.6], [0.6, 0.8], [0.8, 1.0]],
            dtype=np.float32,
        )
        assert_allclose(expected_bin_coord_points, result[3].points)
        assert_allclose(expected_bin_coord_bounds, result[3].bounds)

    def test_one_undersampled_bin_lower_neighbour(self):
        """Test for one under-sampled bin that is combined with its lower
        neighbour."""
        obs_count = np.array([0, 250, 50, 1500, 1000], dtype=np.float32)
        forecast_probability_sum = np.array([0, 250, 50, 1500, 1000],
                                            dtype=np.float32)
        forecast_count = np.array([1000, 1000, 100, 2000, 1000],
                                  dtype=np.float32)

        expected = np.array([
            [0, 300, 1500, 1000],  # Observation count
            [0, 300, 1500, 1000],  # Sum of forecast probability
            [1000, 1100, 2000, 1000],  # Forecast count
        ])
        result = self.plugin._combine_undersampled_bins(
            obs_count,
            forecast_probability_sum,
            forecast_count,
            self.probability_bin_coord,
        )

        assert_array_equal(result[:3], expected)
        expected_bin_coord_points = np.array([0.1, 0.4, 0.7, 0.9],
                                             dtype=np.float32)
        expected_bin_coord_bounds = np.array(
            [[0.0, 0.2], [0.2, 0.6], [0.6, 0.8], [0.8, 1.0]],
            dtype=np.float32,
        )
        assert_allclose(expected_bin_coord_points, result[3].points)
        assert_allclose(expected_bin_coord_bounds, result[3].bounds)

    def test_one_undersampled_bin_upper_neighbour(self):
        """Test for one under-sampled bin that is combined with its upper
        neighbour."""
        obs_count = np.array([0, 500, 50, 750, 1000], dtype=np.float32)
        forecast_probability_sum = np.array([0, 500, 50, 750, 1000],
                                            dtype=np.float32)
        forecast_count = np.array([1000, 2000, 100, 1000, 1000],
                                  dtype=np.float32)

        expected = np.array([
            [0, 500, 800, 1000],  # Observation count
            [0, 500, 800, 1000],  # Sum of forecast probability
            [1000, 2000, 1100, 1000],  # Forecast count
        ])
        result = self.plugin._combine_undersampled_bins(
            obs_count,
            forecast_probability_sum,
            forecast_count,
            self.probability_bin_coord,
        )

        assert_array_equal(result[:3], expected)
        expected_bin_coord_points = np.array([0.1, 0.3, 0.6, 0.9],
                                             dtype=np.float32)
        expected_bin_coord_bounds = np.array(
            [[0.0, 0.2], [0.2, 0.4], [0.4, 0.8], [0.8, 1.0]],
            dtype=np.float32,
        )
        assert_allclose(expected_bin_coord_points, result[3].points)
        assert_allclose(expected_bin_coord_bounds, result[3].bounds)

    def test_two_undersampled_bins(self):
        """Test when two bins are under-sampled."""
        obs_count = np.array([0, 12, 250, 75, 250], dtype=np.float32)
        forecast_probability_sum = np.array([0, 12, 250, 75, 250],
                                            dtype=np.float32)
        forecast_count = np.array([1000, 50, 500, 100, 250], dtype=np.float32)

        expected = np.array([
            [0, 262, 325],  # Observation count
            [0, 262, 325],  # Sum of forecast probability
            [1000, 550, 350],  # Forecast count
        ])
        result = self.plugin._combine_undersampled_bins(
            obs_count,
            forecast_probability_sum,
            forecast_count,
            self.probability_bin_coord,
        )

        assert_array_equal(result[:3], expected)
        expected_bin_coord_points = np.array([0.1, 0.4, 0.8], dtype=np.float32)
        expected_bin_coord_bounds = np.array(
            [[0.0, 0.2], [0.2, 0.6], [0.6, 1.0]],
            dtype=np.float32,
        )
        assert_allclose(expected_bin_coord_points, result[3].points)
        assert_allclose(expected_bin_coord_bounds, result[3].bounds)

    def test_two_equal_undersampled_bins(self):
        """Test when two bins are under-sampled and the under-sampled bins have
        an equal forecast count."""
        obs_count = np.array([0, 25, 250, 75, 250], dtype=np.float32)
        forecast_probability_sum = np.array([0, 25, 250, 75, 250],
                                            dtype=np.float32)
        forecast_count = np.array([1000, 100, 500, 100, 250], dtype=np.float32)

        expected = np.array([
            [0, 275, 325],  # Observation count
            [0, 275, 325],  # Sum of forecast probability
            [1000, 600, 350],  # Forecast count
        ])

        result = self.plugin._combine_undersampled_bins(
            obs_count,
            forecast_probability_sum,
            forecast_count,
            self.probability_bin_coord,
        )

        assert_array_equal(result[:3], expected)
        expected_bin_coord_points = np.array([0.1, 0.4, 0.8], dtype=np.float32)
        expected_bin_coord_bounds = np.array(
            [[0.0, 0.2], [0.2, 0.6], [0.6, 1.0]],
            dtype=np.float32,
        )
        assert_allclose(expected_bin_coord_points, result[3].points)
        assert_allclose(expected_bin_coord_bounds, result[3].bounds)

    def test_three_equal_undersampled_bin_neighbours(self):
        """Test when three neighbouring bins are under-sampled."""
        obs_count = np.array([0, 25, 50, 75, 250], dtype=np.float32)
        forecast_probability_sum = np.array([0, 25, 50, 75, 250],
                                            dtype=np.float32)
        forecast_count = np.array([1000, 100, 100, 100, 250], dtype=np.float32)

        expected = np.array([
            [0, 150, 250],  # Observation count
            [0, 150, 250],  # Sum of forecast probability
            [1000, 300, 250],  # Forecast count
        ])

        result = self.plugin._combine_undersampled_bins(
            obs_count,
            forecast_probability_sum,
            forecast_count,
            self.probability_bin_coord,
        )

        assert_array_equal(result[:3], expected)
        expected_bin_coord_points = np.array([0.1, 0.5, 0.9], dtype=np.float32)
        expected_bin_coord_bounds = np.array(
            [[0.0, 0.2], [0.2, 0.8], [0.8, 1.0]],
            dtype=np.float32,
        )
        assert_allclose(expected_bin_coord_points, result[3].points)
        assert_allclose(expected_bin_coord_bounds, result[3].bounds)