def crps_for_quantiles(probabilistic_forecasts,
                       measurements,
                       quantiles=np.linspace(0.1, 0.9, 9)):
    """ Computes the CRPS score with quantile representation.

        This variant is the variant proposed in Hersbach H. Decomposition of the Continuous Ranked Probability Score for
        Ensemble Prediction Systems. Weather Forecast. 2000;15(5):559-570.

        Parameters
        ----------
            probabilistic_forecasts: array_like
               Either list of "M" scipy.stats.rv_continuous distributions
               https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_continuous.html
               OR
               2D-numpy array with quantile forecasts with dimensionality M x Q,
               where "Q" is number of quantiles.
            measurements: array_like
               List or numpy array with "M" measurements / observations.
            quantiles: array_like
               List of "Q" values of the quantiles to be evaluated.

        Returns
        -------
            mean_crps: float
                The mean CRIGN over all probabilistic_forecast - measurement pairs.
            single_crps: array, shape (M,)
                CRIGN value for each probabilistic_forecast - measurement pair.
    """

    # convert to quantile representation if necessary
    if isinstance(probabilistic_forecasts[0],
                  scipy.stats._distn_infrastructure.rv_frozen):
        quantile_forecasts = probdists_2_quantiles.probdists_2_quantiles(
            probabilistic_forecasts, quantiles)
    else:
        quantile_forecasts = np.array(probabilistic_forecasts)

    measurements = np.atleast_2d(measurements).T

    alpha_mat = np.diff(np.hstack([quantile_forecasts, measurements]))
    alpha_mat = np.maximum(0, alpha_mat)
    alpha_mat = np.minimum(
        alpha_mat,
        np.maximum(
            0,
            np.repeat(measurements, quantile_forecasts.shape[1], axis=1) -
            quantile_forecasts))

    beta_mat = np.diff(np.hstack([measurements, quantile_forecasts]))
    beta_mat = np.maximum(0, beta_mat)
    beta_mat = np.minimum(
        beta_mat,
        np.maximum(
            0, quantile_forecasts -
            np.repeat(measurements, quantile_forecasts.shape[1], axis=1)))

    single_crps = np.matmul(alpha_mat, np.power(quantiles, 2)) + np.matmul(
        beta_mat, np.power(quantiles - 1, 2))

    return np.mean(single_crps), single_crps
    def test_asymptotical_correctness(self):
        """Compares the CRIGN variants using quantiles and integration."""

        dfbig = pd.read_csv(
            'probabilistic_regression_tools/scores/tests/wf3.csv')
        df = dfbig.iloc[0:100, :]
        X = df.drop(['Time', 'ForecastingTime', 'PowerGeneration'], axis=1)
        y = df['PowerGeneration']

        mdl = linear_model.LinearRegression()
        mdl.fit(X, y)

        prob_mdl = Homoscedastic_Mdl.Homoscedastic_Mdl(mdl)
        prob_mdl.fit(X, y)

        ypred = prob_mdl.predict(X)
        quantile_vals = np.linspace(0.01, 0.99, 99)

        quantile_forecasts = probdists_2_quantiles.probdists_2_quantiles(
            ypred, quantiles=quantile_vals)

        crignv1, _ = crign.crign(ypred, y)
        crignv2, _ = crign_for_quantiles.crign_for_quantiles(
            quantile_forecasts, y.as_matrix(), quantiles=quantile_vals)

        isgood = np.isclose(crignv1, crignv2, rtol=0.05)
        assert_true(isgood,
                    msg="CRIGN variants are asymptotically not the same.")
def quantilescore(probabilistic_forecasts,
                  measurements,
                  quantiles=np.linspace(0.1, 0.9, 9)):
    """ Computes the quantile score (qs).

        Definition of the score is taken from
        Bentzien S, Friederichs P. Decomposition and graphical portrayal of the quantile score.
        Q J R Meteorol Soc. 2014;140(683):1924-1934.

        Parameters
        ----------
            probabilistic_forecasts: list
               List of "M" scipy.stats.rv_continuous distributions
               https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_continuous.html
               OR
               2D-numpy array with quantile forecasts with dimensionality M x Q,
               where "Q" is number of quantiles.
            measurements: array_like
               List or numpy array with "M" measurements / observations.
           quantiles: array_like
               List of "Q" values of the quantiles to be evaluated.

        Returns
        -------
            mean_qs: array, shape (Q,)
                The mean qs over all probabilistic_forecast - measurement pairs for each quantile.
            single_qs: array, shape (M,Q)
                qs value for each probabilistic_forecast - measurement pair for each quantile.
    """

    if isinstance(probabilistic_forecasts[0],
                  scipy.stats._distn_infrastructure.rv_frozen):
        quantile_forecasts = probdists_2_quantiles.probdists_2_quantiles(
            probabilistic_forecasts, quantiles=quantiles)
    else:
        quantile_forecasts = np.array(probabilistic_forecasts)

    quantiles = np.atleast_1d(quantiles)
    meas_rep = np.tile(
        np.array(measurements).reshape(len(measurements), 1),
        [1, quantiles.size])

    distances = meas_rep - quantile_forecasts
    single_qs = _pinball_loss(distances, quantiles)

    return np.mean(single_qs, axis=0), single_qs
def crps_with_decomposition(probabilistic_forecasts,
                            measurements,
                            quantiles=np.linspace(0.1, 0.9, 9)):
    """ Computes the CRPS score and its decompositions.

    The decomposition is described in
    Tödter J, Ahrens B. Generalization of the Ignorance Score: Continuous Ranked Version and Its Decomposition.
    Mon Weather Rev. 2012;140(6):2005-2017.

    Parameters
    ----------
        probabilistic_forecasts: array_like
           Either list of "M" scipy.stats.rv_continuous distributions
           https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_continuous.html
           OR
           2D-numpy array with quantile forecasts with dimensionality M x Q,
           where "Q" is number of quantiles.
        measurements: array_like
           List or numpy array with "M" measurements / observations.
        quantiles: array_like
           List of "Q" values of the quantiles to be evaluated.
    Returns
    -------
        crps: float
            The mean crps over all probabilistic_forecast - measurement pairs reconstructed from the decomposition.
        crps_rel: float
            Reliability error of the crps.
        crps_res: float
            Resolution of the crps.
        crps_unc: float
            Uncertainty of the crps.
    """
    # convert to quantile representation if necessary
    if isinstance(probabilistic_forecasts[0],
                  scipy.stats._distn_infrastructure.rv_frozen):
        quantile_forecasts = probdists_2_quantiles(probabilistic_forecasts,
                                                   quantiles)
    else:
        quantile_forecasts = np.array(probabilistic_forecasts)

    # sort in case of quantile crossing
    quantile_forecasts = np.sort(quantile_forecasts, axis=1)

    # get all possible intervals for weighting
    all_data_pre = np.hstack(
        [quantile_forecasts, np.atleast_2d(measurements).T])
    all_data = np.sort(np.unique(all_data_pre))
    intervals = np.diff(all_data)

    bs_all = np.zeros(all_data.size - 1)
    bs_rel = np.zeros(all_data.size - 1)
    bs_res = np.zeros(all_data.size - 1)
    bs_unc = np.zeros(all_data.size - 1)

    for idx, val in enumerate(intervals):
        # define binary problem
        threshold = all_data[idx]
        binary_measurement = measurements <= threshold
        quant2 = threshold <= quantile_forecasts
        quant2[:, -1] = 1  # last quantile in any case if no other is hit

        # get highest fitting probability category
        cs = np.cumsum(quant2, axis=1)
        hit_dim1, hit_dim2 = np.where(cs == 1)
        probs = quantiles[hit_dim2].T

        # compute score for binary problem
        [bs_all[idx], bs_rel[idx], bs_res[idx],
         bs_unc[idx]] = brierscore_with_decomposition(probs,
                                                      binary_measurement)

    # weighting
    crps = np.sum(intervals * bs_all)
    crps_rel = np.sum(intervals * bs_rel)
    crps_res = np.sum(intervals * bs_res)
    crps_unc = np.sum(intervals * bs_unc)

    return crps, crps_rel, crps_res, crps_unc
Ejemplo n.º 5
0
def intervalscore(probabilistic_forecasts,
                  measurements,
                  quantiles=np.linspace(0.1, 0.9, 9)):
    """ Computes the intervalscore (is).

        Definition of the score is taken from
        Gneiting T, Raftery AE. Strictly Proper Scoring Rules, Prediction, and Estimation.
        J Am Stat Assoc. 2007;102(477):359-378.

        Parameters
        ----------
            probabilistic_forecasts: list
               List of "M" scipy.stats.rv_continuous distributions
               https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_continuous.html
               OR
               2D-numpy array with quantile forecasts with dimensionality M x Q,
               where "Q" is number of quantiles.
            measurements: array_like
               List or numpy array with "M" measurements / observations.
           quantiles: array_like
               List of "Q" values of the quantiles to be evaluated.

        Returns
        -------
            mean_is: float
                The mean is over all probabilistic_forecast - measurement pairs.
            single_is: array, shape (M,)
                is value for each probabilistic_forecast - measurement pair.
    """

    # convert to quantile representation if necessary
    if isinstance(probabilistic_forecasts[0],
                  scipy.stats._distn_infrastructure.rv_frozen):
        quantile_forecasts = probdists_2_quantiles.probdists_2_quantiles(
            probabilistic_forecasts, quantiles)
    else:
        quantile_forecasts = np.array(probabilistic_forecasts)

    nr_meas = quantile_forecasts.shape[0]

    nr_quantiles = quantiles.size
    nr_intervals = int(quantiles.size / 2)
    alphas = 2 * quantiles[0:nr_intervals]

    if np.mod(quantiles.size, 2) > 0:
        print([
            'Number of quantileValues is odd. Dont worry, the median quantile just will not be evaluated.'
        ])

    interval_score_each_obs = np.zeros([nr_meas, nr_intervals])
    for i in range(nr_intervals):
        u_idx = nr_quantiles - i - 1
        l_idx = i

        interval_score_each_obs[:, i] = (quantile_forecasts[:, u_idx] - quantile_forecasts[:, l_idx]) \
                                        + (2 / alphas[i]) * (
                                                measurements - quantile_forecasts[:, u_idx]) * np.heaviside(
            measurements - quantile_forecasts[:, u_idx], 1) \
                                        + (2 / alphas[i]) * (
                                                quantile_forecasts[:, l_idx] - measurements) * np.heaviside(
            quantile_forecasts[:, l_idx] - measurements, 1)

    single_is = np.mean(interval_score_each_obs, 0)

    return np.mean(single_is), single_is
Ejemplo n.º 6
0
def quantilescore_with_decomposition(probabilistic_forecasts,
                                     measurements,
                                     quantiles=np.linspace(0.1, 0.9, 9),
                                     K=10,
                                     bin_averaging=False):
    """ Computes the quantile score (qs) and its decompositions.

        Definition of the score is taken from
        Bentzien S, Friederichs P. Decomposition and graphical portrayal of the quantile score.
        Q J R Meteorol Soc. 2014;140(683):1924-1934.

        Parameters
        ----------
            probabilistic_forecasts: list
                List of "M" scipy.stats.rv_continuous distributions
                https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_continuous.html
                OR
                2D-numpy array with quantile forecasts with dimensionality M x Q,
                where "Q" is number of quantiles.
            measurements: array_like
                List or numpy array with "M" measurements / observations.
            quantiles: array_like
                List of "Q" values of the quantiles to be evaluated.
            K: int
                Subsampling parameter of qs decomposition. Defines the amount of discretization steps.
            bin_averaging: bool
                Defines whether the discretized quantile forecasts are defined in the center
                or the edge of the subsampling bin. According to the authors, no averaging is more correct
                while averaging may lead to more stable results.
        Returns
        -------
            qs: array, shape (Q,)
                The mean qs over all probabilistic_forecast - measurement pairs for each quantile recomposed
                from the decomposition components. Not (!) completely identical to the non-decomposed quantilescore.
            qs_rel: array, shape (Q,)
                Reliability error of the qs for each quantile.
            qs_res: array, shape (Q,)
                Resolution of the qs for each quantile.
            qs_unc: array, shape (Q,)
                Uncertainty of the qs for each quantile.
    """

    if isinstance(probabilistic_forecasts[0],
                  scipy.stats._distn_infrastructure.rv_frozen):
        quantile_forecasts = probdists_2_quantiles.probdists_2_quantiles(
            probabilistic_forecasts, quantiles=quantiles)
    else:
        quantile_forecasts = np.array(probabilistic_forecasts)

    quantiles = np.atleast_1d(quantiles)

    # DECOMPOSITION COMPUTATION OF SCORE
    nr_q = quantiles.size
    smeasurements = np.sort(measurements)

    qs = [[] for i in range(nr_q)]
    qs_rel = [[] for i in range(nr_q)]
    qs_res = [[] for i in range(nr_q)]
    qs_unc = [[] for i in range(nr_q)]

    for i in range(0, nr_q):

        # compute the unconditional sample quantile mean_meas_tau ==> \bar{o}_tau
        # eventually average value instead of quantile upper bound ?
        qIdx = int(quantiles[i] * measurements.size) - 1
        if bin_averaging:
            # average value
            mean_meas_tau = np.mean(smeasurements[0:qIdx])
        else:
            # upper bound
            mean_meas_tau = smeasurements[qIdx]
        # uncQ = np.mean(_pinball_loss(measurements - mean_meas_tau, quantiles[i]))

        # prepare indices(for binning with k) and ranges(for weighting at the end)
        cur_qe = quantile_forecasts[:, i]
        s_idx = np.argsort(cur_qe)
        idx_borders = np.linspace(0, s_idx.size, K + 1)
        ranges = np.diff(idx_borders)

        # iterate on subsets with k (the conditional quantiles)
        entropy_k = [[] for someIdx in range(K)]
        unc_k = np.zeros([K, 1])
        res_k = np.zeros([K, 1])
        rel_k = np.zeros([K, 1])

        for k in range(K):
            # get borders of current subset
            k_start = int(idx_borders[k])
            k_end = int(idx_borders[k + 1]) - 1

            # specify subsets from borders
            idx_range = s_idx[k_start:(k_end + 1)]
            cur_qe_k = cur_qe[idx_range]
            measurements_k = measurements[idx_range]

            # create conditional measurements o_tau^(k) and p_tau^(k)
            q_idxk = int(quantiles[i] * measurements_k.size)
            smeasurements_k = np.sort(measurements_k)

            if bin_averaging:
                # average value
                mean_measurement_k = np.mean(smeasurements_k[0:q_idxk])
            else:
                # the conditional sample quantile observation o_tau^(k) upper bound
                mean_measurement_k = smeasurements_k[q_idxk]

            # the conditional discretized quantile forecast p_tau^(k)
            fcst_k = np.mean(cur_qe_k)

            # compute decomposition components
            entropy_k[k] = _pinball_loss(measurements_k - mean_measurement_k,
                                         quantiles[i])
            rel_k[k] = np.mean(
                _pinball_loss(measurements_k - fcst_k, quantiles[i]) -
                entropy_k[k])
            res_k[k] = np.mean(
                _pinball_loss(measurements_k - mean_meas_tau, quantiles[i]) -
                entropy_k[k])
            unc_k[k] = np.mean(
                _pinball_loss(measurements_k - mean_meas_tau, quantiles[i]))

        # compute the weights for overall score
        # this is N_k / N
        relative_weights = ranges / np.sum(ranges)

        # create summary values
        qs_rel[i] = np.sum(relative_weights * rel_k.ravel())
        qs_res[i] = np.sum(relative_weights * res_k.ravel())
        qs_unc[i] = np.sum(relative_weights * unc_k.ravel())
        qs[i] = qs_rel[i] - qs_res[i] + qs_unc[i]

    return np.array(qs), np.array(qs_rel), np.array(qs_res), np.array(qs_unc)