def test_crps_empirical(num_samples, event_shape): truth = torch.randn(event_shape) pred = truth + 0.1 * torch.randn((num_samples, ) + event_shape) actual = crps_empirical(pred, truth) assert actual.shape == truth.shape expected = (pred - truth).abs().mean( 0) - 0.5 * (pred - pred.unsqueeze(1)).abs().mean([0, 1]) assert_close(actual, expected)
def eval_crps(pred, truth): """ Evaluate continuous ranked probability score, averaged over all data elements. **References** [1] Tilmann Gneiting, Adrian E. Raftery (2007) `Strictly Proper Scoring Rules, Prediction, and Estimation` https://www.stat.washington.edu/raftery/Research/PDF/Gneiting2007jasa.pdf :param torch.Tensor pred: Forecasted samples. :param torch.Tensor truth: Ground truth. :rtype: float """ return crps_empirical(pred, truth).mean().cpu().item()
def eval_one(args, result): logging.debug('evaluating') pred = result['forecast'] truth = result['truth'] t, n, n = truth.shape assert pred.shape == (args.num_samples, t, n, n) # Evaluate point estimate using Mean Absolute Error. mae = float((pred.median(dim=0).values - truth).abs().mean()) # Evaluate uncertainty using negative Continuous Ranked Probability Score. crps = float(crps_empirical(pred, truth).mean()) result = {'MAE': mae, 'CRPS': crps, 'ELBO': result['log_prob']} logging.info(result) return result
def eval_crps(pred, truth): """ Like pyro.contrib.forecast.eval_crps but does not average over batch dimensions. """ logger.info("Evaluating CRPS...") return crps_empirical(pred, truth).reshape(truth.shape[:-2] + (-1,)).mean(-1)