Esempio n. 1
0
def mase_loss(y_test, y_pred, y_train, sp=1):
    """Mean absolute scaled error.

    This scale-free error metric can be used to compare forecast methods on
    a single
    series and also to compare forecast accuracy between series. This metric
    is well
    suited to intermittent-demand series because it never gives infinite or
    undefined
    values.

    Parameters
    ----------
    y_test : pandas Series of shape = (fh,) where fh is the forecasting horizon
        Ground truth (correct) target values.
    y_pred : pandas Series of shape = (fh,)
        Estimated target values.
    y_train : pandas Series of shape = (n_obs,)
        Observed training values.
    sp : int
        Seasonal periodicity of training data.

    Returns
    -------
    loss : float
        MASE loss

    References
    ----------
    ..[1]   Hyndman, R. J. (2006). "Another look at measures of forecast
            accuracy", Foresight, Issue 4.
    """
    # input checks
    y_test = check_y(y_test)
    y_pred = check_y(y_pred)
    y_train = check_y(y_train)
    check_equal_time_index(y_test, y_pred)

    # check if training set is prior to test set
    if y_train is not None:
        check_time_index(y_train.index)
        if y_train.index.max() >= y_test.min():
            raise ValueError("Found `y_train` with time index which is not "
                             "before time index of `y_pred`")

    #  naive seasonal prediction
    y_train = np.asarray(y_train)
    y_pred_naive = y_train[:-sp]

    # mean absolute error of naive seasonal prediction
    mae_naive = np.mean(np.abs(y_train[sp:] - y_pred_naive))

    # if training data is flat, mae may be zero,
    # return np.nan to avoid divide by zero error
    # and np.inf values
    if mae_naive == 0:
        return np.nan
    else:
        return np.mean(np.abs(y_test - y_pred)) / mae_naive
Esempio n. 2
0
def remove_trend(x, coefs, time_index=None):
    """Remove trend from an array with a trend of given order along axis 0 or 1

    Parameters
    ----------
    x : array_like, shape=[n_samples, n_obs]
        Time series data, each sample is de-trended separately
    coefs : ndarray, shape=[n_samples, order + 1]
        Fitted coefficients for each sample, single column means order zero,
        two columns mean order 1
        (linear), three columns mean order 2 (quadratic), etc
    time_index : array-like, shape=[n_obs], optional (default=None)
        Time series index for which to add the trend components

    Returns
    -------
    xt : ndarray
        The de-trended series is the residual of the linear regression of the
        data on the trend of given order.

    See Also
    --------
    fit_trend
    add_trend

    References
    ----------
    Adapted from statsmodels (0.9.0), see
    https://www.statsmodels.org/dev/_modules/statsmodels/tsa/tsatools.html
    #detrend
    """
    x = check_array(x)

    # infer order from shape of given coefficients
    order = coefs.shape[1] - 1

    # special case, remove mean
    if order == 0:
        xt = x - coefs
        return xt

    else:
        if time_index is None:
            # if no time index is given, create range index
            n_obs = x.shape[1]
            time_index = np.arange(n_obs)
        else:
            # validate given time index
            time_index = check_time_index(time_index)
            if len(time_index) != x.shape[1]:
                raise ValueError(
                    'Length of passed index does not match length of passed x')

        poly_terms = np.vander(time_index, N=order + 1)
        xt = x - np.dot(poly_terms, coefs.T).T

    return xt
Esempio n. 3
0
def add_trend(x, coefs, time_index=None):
    """Add trend to array for given fitted coefficients along axis 0 or 1,
    inverse function to `remove_trend()`

    Parameters
    ----------
    x : array_like, shape=[n_samples, n_obs]
        Time series data, each sample is treated separately
    coefs : array-like, shape=[n_samples, order + 1]
        fitted coefficients of polynomial order for each sample, one column
        means order zero, two columns mean order 1
        (linear), three columns mean order 2 (quadratic), etc
    time_index : array-like, shape=[n_obs], optional (default=None)
        Time series index for which to add the trend components

    Returns
    -------
    xt : ndarray
        The series with added trend.

    See Also
    -------
    fit_trend
    remove_trend
    """
    x = check_array(x)

    #  infer order from shape of given coefficients
    order = coefs.shape[1] - 1

    # special case, add mean
    if order == 0:
        xt = x + coefs

    else:
        if time_index is None:
            n_obs = x.shape[1]
            time_index = np.arange(n_obs)

        else:
            # validate given time index
            time_index = check_time_index(time_index)

            if not len(time_index) == x.shape[1]:
                raise ValueError(
                    'Length of passed index does not match length of passed x')

        poly_terms = np.vander(time_index, N=order + 1)
        xt = x + np.dot(poly_terms, coefs.T).T

    return xt
Esempio n. 4
0
def _get_duration(x, y=None, coerce_to_int=False, unit=None):
    """Compute duration of time index `x` or durations between time
    points `x` and `y` if `y` is given

    Parameters
    ----------
    x : pd.Index, pd.Timestamp, pd.Period, int
    y : pd.Timestamp, pd.Period, int, optional (default=None)
    coerce_to_int : bool
        If True, duration is returned as integer value for given unit
    unit : str
        Time unit

    Returns
    -------
    ret : duration type
        Duration
    """
    if y is None:
        x = check_time_index(x)
        duration = x[-1] - x[0]
    else:
        assert isinstance(x, (int, np.integer, pd.Period, pd.Timestamp))
        # check types allowing (np.integer, int) combinations to pass
        assert type(x) is type(y) or (isinstance(x, (np.integer, int))
                                      and isinstance(x, (np.integer, int)))
        duration = x - y

    # coerce to integer result for given time unit
    if coerce_to_int and isinstance(
            x, (pd.PeriodIndex, pd.DatetimeIndex, pd.Period, pd.Timestamp)):
        if unit is None:
            # try to get the unit from the data if not given
            unit = _get_unit(x)
        duration = _coerce_duration_to_int(duration, unit=unit)
    return duration
Esempio n. 5
0
 def _check_y(y):
     # allow for pd.Series
     if isinstance(y, pd.Series):
         y = y.index
     return check_time_index(y)
Esempio n. 6
0
 def _set_oh_index(self, y):
     self._oh_index = check_time_index(y.index)