def test_alt_implemenation_consistency(self): obs = np.random.RandomState(123).randn(100) forecasts = np.random.RandomState(456).randn(100, 100) thresholds = np.linspace(-2, 2, num=10) actual = threshold_brier_score(obs, forecasts, thresholds) desired = _threshold_brier_score_vectorized(obs, forecasts, thresholds) assert_allclose(actual, desired, atol=1e-10) obs[np.random.RandomState(231).rand(100) < 0.2] = np.nan forecasts[np.random.RandomState(231).rand(100, 100) < 0.2] = np.nan forecasts[:, ::8] = np.nan forecasts[::8, :] = np.nan actual = threshold_brier_score(obs, forecasts, thresholds) desired = _threshold_brier_score_vectorized(obs, forecasts, thresholds) assert_allclose(actual, desired, atol=1e-10)
def test_crps_consistency(self): # verify that we can integrate the brier scores to calculate CRPS obs = np.random.RandomState(123).rand(100) forecasts = np.random.RandomState(456).rand(100, 100) thresholds = np.linspace(0, 1, num=10000) td = threshold_brier_score(obs, forecasts, thresholds) actual = td.sum(1) * (thresholds[1] - thresholds[0]) desired = crps_ensemble(obs, forecasts) assert_allclose(actual, desired, atol=1e-4)
def test_xr_threshold_brier_score_dask(a_dask, b_dask): threshold = .5 actual = xr_threshold_brier_score(a_dask, b_dask, threshold) expected = threshold_brier_score(a_dask, b_dask, threshold) expected = xr.DataArray(expected, coords=a_dask.coords) # test for numerical identity of xr_threshold and threshold assert_identical(actual, expected) # test that xr_crps_ensemble returns chunks assert actual.chunks is not None # show that crps_ensemble returns no chunks assert expected.chunks is None
def test_errors(self): with self.assertRaisesRegexp(ValueError, 'must be scalar or 1-dim'): threshold_brier_score(1, [0, 1, 2], [[1]]) with self.assertRaisesRegexp(ValueError, 'must be sorted'): threshold_brier_score(1, [0, 1, 2], [1, 0.5]) with self.assertRaisesRegexp(ValueError, 'must have matching shapes'): threshold_brier_score([1, 2], [0, 1, 2], [0.5])
def test_examples(self): examples = [ (0, 0, 0, 0), (0, 0, [0], [0]), (0, 0, [-1, 0, 1], [0, 0, 0]), (0, [-1, 1], [-2, 0, 2], [0, 0.25, 0]), ([0, np.nan], [0, 0], [0], [[0], [np.nan]]), (np.nan, [-1, 1], [0, 1], [np.nan, np.nan]), (0, [-1, 1, np.nan], [-2, 0, 2], [0, 0.25, 0]), (0, [0, 0, 0, 1], [0], [0.0625]), ] for observations, forecasts, thresholds, expected in examples: assert_allclose( threshold_brier_score(observations, forecasts, thresholds), expected)
def test_threshold_brier_score_dask(o_dask, f_prob_dask, keep_attrs): threshold = 0.5 actual = threshold_brier_score(o_dask, f_prob_dask, threshold, keep_attrs=keep_attrs) expected = properscoring.threshold_brier_score(o_dask, f_prob_dask, threshold, axis=0) expected = xr.DataArray(expected, coords=o_dask.coords).mean() # test for numerical identity of xskillscore threshold and properscorin threshold assert_identical(actual, expected.assign_attrs(**actual.attrs)) # test that xskillscore crps_ensemble returns chunks assert actual.chunks is not None # show that properscoring crps_ensemble returns no chunks assert expected.chunks is None if keep_attrs: assert actual.attrs == o_dask.attrs else: assert actual.attrs == {}
def brier(self, threshold: float, mod_col: str = 'modeled', obs_col: str = 'observed', time_col: str = 'time', weights=None): """ Calculate Brier score using the properscoring package. See :py:fun:`threshold_brier_score() <threshold_brier_score>` in :py:mod:`properscoring`. Grouping is not necessary because BRIER returns a value per forecast. Grouping would happen when computing BRIERS. The Eval object generally wants one observation per modeled data point, that is overkill for this function but we handle it in a consistent manner with the rest of Evaluation. Args: mod_col: Column name of modelled data obs_col: Column name of observed data. Returns: BRIER for each ensemble forecast against the observations. """ # Grouping is not necessary because BRIER if isinstance(self.data, pd.DataFrame): # This is a bit hackish to get the indices columns indices = list( set(self.data.columns.tolist()) - set([mod_col, obs_col])) data = self.data.set_index(indices) modeled = data[mod_col] observed = data[obs_col] modeled = modeled.unstack(level='time').to_numpy().transpose() observed = observed.mean(axis=0, level='time').to_numpy() result = ps.threshold_brier_score(observed, modeled, threshold=threshold) return result else: raise ValueError( 'Xarray not currently implemented for Brier score.')
def test_threshold_brier_score_api_and_inputs( o, f_prob, keep_attrs, input_type, chunk_bool ): """Test that threshold_brier_score keeps attributes, chunking, input types and equals properscoring.threshold_brier_score.""" o, f_prob = modify_inputs(o, f_prob, input_type, chunk_bool) threshold = 0.5 actual = threshold_brier_score(o, f_prob, threshold, keep_attrs=keep_attrs) if input_type == "DataArray": # properscoring allows only DataArrays expected = properscoring.threshold_brier_score(o, f_prob, threshold, axis=0) expected = xr.DataArray(expected, coords=o.coords).mean() expected["threshold"] = threshold # test for numerical identity of xs threshold and properscoring threshold if keep_attrs: expected = expected.assign_attrs(**actual.attrs) assert_identical(actual, expected) # test that returns chunks assert_chunk(actual, chunk_bool) # test that attributes are kept assert_keep_attrs(actual, o, keep_attrs) # test that input types equal output types assign_type_input_output(actual, o)