Esempio n. 1
0
def rsq(obs, sim=None, res=None, missing="drop", weighted=False, max_gap=30,
        nparam=None):
    """Compute R-squared, possibly adjusted for the number of free parameters.

    Parameters
    ----------
    obs: pandas.Series
        Series with the observed values.
    sim: pandas.Series
        Series with the simulated values.
    res: pandas.Series
        Series with the residual values. If time series for the residuals
        are provided, the sim and obs arguments are ignored.
    missing: str, optional
        string with the rule to deal with missing values. Only "drop" is
        supported now.
    weighted: bool, optional
        If weighted is True, the variances are computed using the time
        step between observations as weights. Default is False.
    max_gap: int, optional
        maximum allowed gap period in days to use for the computation of the
        weights. All time steps larger than max_gap are replace with the
        max_gap value. Default value is 30 days.
    nparam: int, optional
        number of calibrated parameters.

    Notes
    -----
    .. math:: \\rho_{adj} = 1-  \\frac{n-1}{n-n_{param}}*\\frac{rss}{tss}

    Where n is the number of observations, :math:`n_{param}` the number of
    free parameters, rss the sum of the squared residuals, and tss the total
    sum of squared residuals.

    When nparam is provided, the :math:`\\rho` is
    adjusted for the number of calibration parameters.

    """
    if res is None:
        res = sim - obs

    if missing == "drop":
        res = res.dropna()

    # Return nan if the time indices of the sim and obs don't match
    if res.index.size == 0:
        logger.warning("Time indices of the sim and obs don't match.")
        return nan

    w = _get_weights(res, weighted=weighted, max_gap=max_gap)
    mu = average(obs.to_numpy(), weights=w)
    rss = (w * res.to_numpy() ** 2.0).sum()
    tss = (w * (obs.to_numpy() - mu) ** 2.0).sum()

    if nparam:
        return 1.0 - (obs.size - 1.0) / (obs.size - nparam) * rss / tss
    else:
        return 1.0 - rss / tss
Esempio n. 2
0
def pearsonr(obs, sim, missing="drop", weighted=False, max_gap=30):
    """Compute the (weighted) Pearson correlation (r).

    Parameters
    ----------
    sim: pandas.Series
        Series with the simulated values.
    obs: pandas.Series
        Series with the observed values.
    missing: str, optional
        string with the rule to deal with missing values in the
        observed series. Only "drop" is supported now.
    weighted: bool, optional
        Weight the values by the normalized time step to account for
        irregular time series. Default is False.
    max_gap: int, optional
        maximum allowed gap period in days to use for the computation of the
        weights. All time steps larger than max_gap are replace with the
        max_gap value. Default value is 30 days.

    Notes
    -----
    The Pearson correlation (r) is computed as follows:

    .. math:: r = \\frac{\\sum_{i=1}^{N}w_i (x_i - \\bar{x})(y_i - \\bar{y})}
        {\\sqrt{\\sum_{i=1}^{N} w_i(x_i-\\bar{x})^2 \\sum_{i=1}^{N}
        w_i(y_i-\\bar{y})^2}}

    Where :math:`x` is is observed time series, :math:`y` the simulated
    time series, and :math:`N` the number of observations in the observed
    time series.

    """
    if missing == "drop":
        obs = obs.dropna()

    w = _get_weights(obs, weighted=weighted, max_gap=max_gap)
    sim = sim.reindex(obs.index).dropna().to_numpy()

    # Return nan if the time indices of the sim and obs don't match
    if sim.size == 0:
        logger.warning("Time indices of the sim and obs don't match.")
        return nan

    sim = sim - average(sim, weights=w)
    obs = obs.to_numpy() - average(obs.to_numpy(), weights=w)

    r = (w * sim * obs).sum() / \
        sqrt((w * sim ** 2).sum() * (w * obs ** 2).sum())

    return r
Esempio n. 3
0
def nse(obs, sim=None, res=None, missing="drop", weighted=False, max_gap=30):
    """Compute the (weighted) Nash-Sutcliffe Efficiency (NSE).

    Parameters
    ----------
    obs: pandas.Series
        Series with the observed values.
    sim: pandas.Series
        Series with the simulated values.
    res: pandas.Series
        Series with the residual values. If time series for the residuals
        are provided, the sim and obs arguments are ignored.
    missing: str, optional
        string with the rule to deal with missing values. Only "drop" is
        supported now.
    weighted: bool, optional
        If weighted is True, the variances are computed using the time
        step between observations as weights. Default is False.
    max_gap: int, optional
        maximum allowed gap period in days to use for the computation of the
        weights. All time steps larger than max_gap are replace with the
        max_gap value. Default value is 30 days.

    Notes
    -----
    .. math:: \\text{NSE} = 1 - \\frac{\\sum(h_s-h_o)^2}{\\sum(h_o-\\mu_{h,o})}

    References
    ----------
    .. [nash_1970] Nash, J. E., & Sutcliffe, J. V. (1970). River flow
       forecasting through conceptual models part I-A discussion of
       principles. Journal of hydrology, 10(3), 282-230.

    """
    if res is None:
        res = sim - obs

    if missing == "drop":
        res = res.dropna()

    # Return nan if the time indices of the sim and obs don't match
    if res.index.size == 0:
        logger.warning("Time indices of the sim and obs don't match.")
        return nan

    w = _get_weights(res, weighted=weighted, max_gap=max_gap)
    mu = average(obs.to_numpy(), weights=w)

    return 1 - (w * res.to_numpy() ** 2).sum() / \
           (w * (obs.to_numpy() - mu) ** 2).sum()
Esempio n. 4
0
def mae(obs=None, sim=None, res=None, missing="drop", weighted=False,
        max_gap=30):
    """Compute the (weighted) Mean Absolute Error (MAE).

    Parameters
    ----------
    sim: pandas.Series
        Series with the simulated values.
    obs: pandas.Series
        Series with the observed values.
    res: pandas.Series
        Series with the residual values. If time series for the residuals
        are provided, the sim and obs arguments are ignored.
    missing: str, optional
        string with the rule to deal with missing values. Only "drop" is
        supported now.
    weighted: bool, optional
        Weight the values by the normalized time step to account for
        irregular time series. Default is True.
    max_gap: int, optional
        maximum allowed gap period in days to use for the computation of the
        weights. All time steps larger than max_gap are replace with the
        max_gap value. Default value is 30 days.

    Notes
    -----
    The Mean Absolute Error (MAE) between two time series x and y is
    computed as follows:

    .. math:: \\text{MAE} = \\sum_{i=1}^{N} w_i |x_i - y_i|

    where :math:`N` is the number of observations in the observed time series.

    """
    if res is None:
        res = sim - obs

    if missing == "drop":
        res = res.dropna()

    # Return nan if the time indices of the sim and obs don't match
    if res.index.size == 0:
        logger.warning("Time indices of the sim and obs don't match.")
        return nan

    w = _get_weights(res, weighted=weighted, max_gap=max_gap)
    return (w * abs(res.to_numpy())).sum()
Esempio n. 5
0
def rmse(obs=None, sim=None, res=None, missing="drop", weighted=False,
         max_gap=30):
    """Compute the (weighted) Root Mean Squared Error (RMSE).

    Parameters
    ----------
    sim: pandas.Series
        Series with the simulated values.
    obs: pandas.Series
        Series with the observed values.
    res: pandas.Series
        Series with the residual values. If time series for the residuals
        are provided, the sim and obs arguments are ignored.
    missing: str, optional
        string with the rule to deal with missing values. Only "drop" is
        supported now.
    weighted: bool, optional
        Weight the values by the normalized time step to account for
        irregular time series. Default is False.
    max_gap: int, optional
        maximum allowed gap period in days to use for the computation of the
        weights. All time steps larger than max_gap are replace with the
        max_gap value. Default value is 30 days.

    Notes
    -----
    Computes the Root Mean Squared Error (RMSE) as follows:

    .. math:: \\text{RMSE} = \\sqrt{\\sum_{i=1}^{N} w_i(n_i- \\bar{n})^2}

    where :math:`N` is the number of residuals :math:`n`.

    """
    if res is None:
        res = sim - obs

    if missing == "drop":
        res = res.dropna()

    # Return nan if the time indices of the sim and obs don't match
    if res.index.size == 0:
        logger.warning("Time indices of the sim and obs don't match.")
        return nan

    w = _get_weights(res, weighted=weighted, max_gap=max_gap)
    return sqrt((w * res.to_numpy() ** 2).sum())