def fit(self, y, X=None, fh=None): """Fit to training data. Parameters ---------- y : pd.Series Target time series to which to fit the forecaster. fh : int, list or np.array, optional (default=None) The forecasters horizon with the steps ahead to to predict. X : pd.DataFrame, optional (default=None) Exogenous variables are ignored Returns ------- self : returns an instance of self. """ y, _ = check_y_X(y, X) sp = check_sp(self.sp) if sp > 1 and not self.deseasonalize: warn("`sp` is ignored when `deseasonalise`=False") if self.deseasonalize: self.deseasonalizer_ = Deseasonalizer(sp=self.sp, model="multiplicative") y = self.deseasonalizer_.fit_transform(y) # fit exponential smoothing forecaster # find theta lines: Theta lines are just SES + drift super(ThetaForecaster, self).fit(y, fh=fh) self.initial_level_ = self._fitted_forecaster.params["smoothing_level"] # compute trend self.trend_ = self._compute_trend(y) self._is_fitted = True return self
def test_deseasonalised_values(sp): transformer = Deseasonalizer(sp=sp) transformer.fit(y_train) actual = transformer.transform(y_train) r = seasonal_decompose(y_train, period=sp) expected = y_train - r.seasonal np.testing.assert_array_equal(actual, expected)
def compute_expected_y_pred(y_train, fh): # fitting yt = y_train.copy() t1 = Deseasonalizer(sp=12, model="multiplicative") yt = t1.fit_transform(yt) t2 = Detrender(PolynomialTrendForecaster(degree=1)) yt = t2.fit_transform(yt) forecaster = NaiveForecaster() forecaster.fit(yt, fh=fh) # predicting y_pred = forecaster.predict() y_pred = t2.inverse_transform(y_pred) y_pred = t1.inverse_transform(y_pred) return y_pred
def test_pipeline(): y = load_airline() y_train, y_test = temporal_train_test_split(y) forecaster = TransformedTargetForecaster([ ("t1", Deseasonalizer(sp=12, model="multiplicative")), ("t2", Detrender(PolynomialTrendForecaster(degree=1))), ("forecaster", NaiveForecaster()), ]) fh = np.arange(len(y_test)) + 1 forecaster.fit(y_train, fh=fh) actual = forecaster.predict() def compute_expected_y_pred(y_train, fh): # fitting yt = y_train.copy() t1 = Deseasonalizer(sp=12, model="multiplicative") yt = t1.fit_transform(yt) t2 = Detrender(PolynomialTrendForecaster(degree=1)) yt = t2.fit_transform(yt) forecaster = NaiveForecaster() forecaster.fit(yt, fh=fh) # predicting y_pred = forecaster.predict() y_pred = t2.inverse_transform(y_pred) y_pred = t1.inverse_transform(y_pred) return y_pred expected = compute_expected_y_pred(y_train, fh) np.testing.assert_array_equal(actual, expected)
class ThetaForecaster(ExponentialSmoothing): """Theta method for forecasting. The theta method as defined in [1]_ is equivalent to simple exponential smoothing (SES) with drift (as demonstrated in [2]_). The series is tested for seasonality using the test outlined in A&N. If deemed seasonal, the series is seasonally adjusted using a classical multiplicative decomposition before applying the theta method. The resulting forecasts are then reseasonalised. In cases where SES results in a constant forecast, the theta forecaster will revert to predicting the SES constant plus a linear trend derived from the training data. Prediction intervals are computed using the underlying state space model. Parameters ---------- initial_level : float, optional The alpha value of the simple exponential smoothing, if the value is set then this will be used, otherwise it will be estimated from the data. deseasonalize : bool, optional (default=True) If True, data is seasonally adjusted. sp : int, optional (default=1) The number of observations that constitute a seasonal period for a multiplicative deseasonaliser, which is used if seasonality is detected in the training data. Ignored if a deseasonaliser transformer is provided. Default is 1 (no seasonality). Attributes ---------- initial_level_ : float The estimated alpha value of the SES fit. drift_ : float The estimated drift of the fitted model. se_ : float The standard error of the predictions. Used to calculate prediction intervals. References ---------- .. [1] Assimakopoulos, V. and Nikolopoulos, K. The theta model: a decomposition approach to forecasting. International Journal of Forecasting 16, 521-530, 2000. https://www.sciencedirect.com/science/article/pii/S0169207000000662 .. [2] `Hyndman, Rob J., and Billah, Baki. Unmasking the Theta method. International J. Forecasting, 19, 287-290, 2003. https://www.sciencedirect.com/science/article/pii/S0169207001001431 Examples -------- >>> from sktime.datasets import load_airline >>> from sktime.forecasting.theta import ThetaForecaster >>> y = load_airline() >>> forecaster = ThetaForecaster(sp=12) >>> forecaster.fit(y) ThetaForecaster(...) >>> y_pred = forecaster.predict(fh=[1,2,3]) """ _fitted_param_names = ("initial_level", "smoothing_level") _tags = { "scitype:y": "univariate", "ignores-exogeneous-X": True, "capability:pred_int": True, "requires-fh-in-fit": False, "handles-missing-data": False, } def __init__(self, initial_level=None, deseasonalize=True, sp=1): self.sp = sp self.deseasonalize = deseasonalize self.deseasonalizer_ = None self.trend_ = None self.initial_level_ = None self.drift_ = None self.se_ = None super(ThetaForecaster, self).__init__(initial_level=initial_level, sp=sp) def _fit(self, y, X=None, fh=None): """Fit to training data. Parameters ---------- y : pd.Series Target time series to which to fit the forecaster. fh : int, list or np.array, optional (default=None) The forecasters horizon with the steps ahead to to predict. X : pd.DataFrame, optional (default=None) Exogenous variables are ignored Returns ------- self : returns an instance of self. """ sp = check_sp(self.sp) if sp > 1 and not self.deseasonalize: warn("`sp` is ignored when `deseasonalise`=False") if self.deseasonalize: self.deseasonalizer_ = Deseasonalizer(sp=self.sp, model="multiplicative") y = self.deseasonalizer_.fit_transform(y) self.initialization_method = "known" if self.initial_level else "estimated" # fit exponential smoothing forecaster # find theta lines: Theta lines are just SES + drift super(ThetaForecaster, self)._fit(y, fh=fh) self.initial_level_ = self._fitted_forecaster.params["smoothing_level"] # compute and store historical residual standard error self.sigma_ = np.sqrt(self._fitted_forecaster.sse / (len(y) - 1)) # compute trend self.trend_ = self._compute_trend(y) return self def _predict(self, fh, X=None): """Make forecasts. Parameters ---------- fh : array-like The forecasters horizon with the steps ahead to to predict. Default is one-step ahead forecast, i.e. np.array([1]). X : pd.DataFrame, optional (default=None) Exogenous time series Returns ------- y_pred : pandas.Series Returns series of predicted values. """ y_pred = super(ThetaForecaster, self)._predict(fh, X) # Add drift. drift = self._compute_drift() y_pred += drift if self.deseasonalize: y_pred = self.deseasonalizer_.inverse_transform(y_pred) return y_pred @staticmethod def _compute_trend(y): # Trend calculated through least squares regression. coefs = _fit_trend(y.values.reshape(1, -1), order=1) return coefs[0, 0] / 2 def _compute_drift(self): fh = self.fh.to_relative(self.cutoff) if np.isclose(self.initial_level_, 0.0): # SES was constant, so revert to simple trend drift = self.trend_ * fh else: # Calculate drift from SES parameters n_timepoints = len(self._y) drift = self.trend_ * ( fh + (1 - (1 - self.initial_level_) ** n_timepoints) / self.initial_level_ ) return drift def _predict_quantiles(self, fh, X=None, alpha=None): """Compute/return prediction quantiles for a forecast. private _predict_quantiles containing the core logic, called from predict_quantiles and predict_interval Parameters ---------- fh : int, list, np.array or ForecastingHorizon Forecasting horizon X : pd.DataFrame, optional (default=None) Exogenous time series alpha : list of float, optional (default=[0.5]) A list of probabilities at which quantile forecasts are computed. Returns ------- quantiles : pd.DataFrame Column has multi-index: first level is variable name from y in fit, second level being the values of alpha passed to the function. Row index is fh. Entries are quantile forecasts, for var in col index, at quantile probability in second col index, for the row index. """ # prepare return data frame index = pd.MultiIndex.from_product([["Quantiles"], alpha]) pred_quantiles = pd.DataFrame(columns=index) sem = self.sigma_ * np.sqrt( self.fh.to_relative(self.cutoff) * self.initial_level_**2 + 1 ) y_pred = self._predict(fh, X) # we assume normal additive noise with sem variance for a in alpha: pred_quantiles[("Quantiles", a)] = y_pred + norm.ppf(a) * sem # todo: should this not increase with the horizon? # i.e., sth like norm.ppf(a) * sem * fh.to_absolute(cutoff) ? # I've just refactored this so will leave it for now return pred_quantiles def _update(self, y, X=None, update_params=True): super(ThetaForecaster, self)._update( y, X, update_params=False ) # use custom update_params routine if update_params: if self.deseasonalize: y = self.deseasonalizer_.transform(self._y) # use updated y self.initial_level_ = self._fitted_forecaster.params["smoothing_level"] self.trend_ = self._compute_trend(y) return self
class ThetaForecaster(ExponentialSmoothing): """ Theta method of forecasting. The theta method as defined in [1]_ is equivalent to simple exponential smoothing (SES) with drift. This is demonstrated in [2]_. The series is tested for seasonality using the test outlined in A&N. If deemed seasonal, the series is seasonally adjusted using a classical multiplicative decomposition before applying the theta method. The resulting forecasts are then reseasonalised. In cases where SES results in a constant forecast, the theta forecaster will revert to predicting the SES constant plus a linear trend derived from the training data. Prediction intervals are computed using the underlying state space model. Parameters ---------- initial_level : float, optional The alpha value of the simple exponential smoothing, if the value is set then this will be used, otherwise it will be estimated from the data. deseasonalize : bool, optional (default=True) If True, data is seasonally adjusted. sp : int, optional (default=1) The number of observations that constitute a seasonal period for a multiplicative deseasonaliser, which is used if seasonality is detected in the training data. Ignored if a deseasonaliser transformer is provided. Default is 1 (no seasonality). Attributes ---------- initial_level_ : float The estimated alpha value of the SES fit. drift_ : float The estimated drift of the fitted model. se_ : float The standard error of the predictions. Used to calculate prediction intervals. References ---------- .. [1] `Assimakopoulos, V. and Nikolopoulos, K. The theta model: a decomposition approach to forecasting. International Journal of Forecasting 16, 521-530, 2000. <https://www.sciencedirect.com/science/article/pii /S0169207000000662>`_ .. [2] `Hyndman, Rob J., and Billah, Baki. Unmasking the Theta method. International J. Forecasting, 19, 287-290, 2003. <https://www.sciencedirect.com/science/article/pii /S0169207001001431>`_ """ _fitted_param_names = ("initial_level", "smoothing_level") def __init__(self, initial_level=None, deseasonalize=True, sp=1): self.sp = sp self.deseasonalize = deseasonalize self.deseasonalizer_ = None self.trend_ = None self.initial_level_ = None self.drift_ = None self.se_ = None super(ThetaForecaster, self).__init__(initial_level=initial_level, sp=sp) def fit(self, y, X=None, fh=None): """Fit to training data. Parameters ---------- y : pd.Series Target time series to which to fit the forecaster. fh : int, list or np.array, optional (default=None) The forecasters horizon with the steps ahead to to predict. X : pd.DataFrame, optional (default=None) Exogenous variables are ignored Returns ------- self : returns an instance of self. """ y, _ = check_y_X(y, X) sp = check_sp(self.sp) if sp > 1 and not self.deseasonalize: warn("`sp` is ignored when `deseasonalise`=False") if self.deseasonalize: self.deseasonalizer_ = Deseasonalizer(sp=self.sp, model="multiplicative") y = self.deseasonalizer_.fit_transform(y) # fit exponential smoothing forecaster # find theta lines: Theta lines are just SES + drift super(ThetaForecaster, self).fit(y, fh=fh) self.initial_level_ = self._fitted_forecaster.params["smoothing_level"] # compute trend self.trend_ = self._compute_trend(y) self._is_fitted = True return self def _predict(self, fh, X=None, return_pred_int=False, alpha=DEFAULT_ALPHA): """ Make forecasts. Parameters ---------- fh : array-like The forecasters horizon with the steps ahead to to predict. Default is one-step ahead forecast, i.e. np.array([1]). Returns ------- y_pred : pandas.Series Returns series of predicted values. """ y_pred = super(ThetaForecaster, self)._predict(fh, X, return_pred_int=False, alpha=alpha) # Add drift. drift = self._compute_drift() y_pred += drift if self.deseasonalize: y_pred = self.deseasonalizer_.inverse_transform(y_pred) if return_pred_int: pred_int = self.compute_pred_int(y_pred=y_pred, alpha=alpha) return y_pred, pred_int return y_pred @staticmethod def _compute_trend(y): # Trend calculated through least squares regression. coefs = _fit_trend(y.values.reshape(1, -1), order=1) return coefs[0, 0] / 2 def _compute_drift(self): fh = self.fh.to_relative(self.cutoff) if np.isclose(self.initial_level_, 0.0): # SES was constant, so revert to simple trend drift = self.trend_ * fh else: # Calculate drift from SES parameters n_timepoints = len(self._y) drift = self.trend_ * (fh + (1 - (1 - self.initial_level_)**n_timepoints) / self.initial_level_) return drift def _compute_pred_err(self, alphas): """ Get the prediction errors for the forecast. """ self.check_is_fitted() n_timepoints = len(self._y) self.sigma_ = np.sqrt(self._fitted_forecaster.sse / (n_timepoints - 1)) sem = self.sigma_ * np.sqrt( self.fh.to_relative(self.cutoff) * self.initial_level_**2 + 1) errors = [] for alpha in alphas: z = _zscore(1 - alpha) error = z * sem errors.append( pd.Series(error, index=self.fh.to_absolute(self.cutoff))) return errors def update(self, y, X=None, update_params=True): super(ThetaForecaster, self).update( y, X, update_params=False) # use custom update_params routine if update_params: if self.deseasonalize: y = self.deseasonalizer_.transform(self._y) # use updated y self.initial_level_ = self._fitted_forecaster.params[ "smoothing_level"] self.trend_ = self._compute_trend(y) return self
def test_transform_inverse_transform_equivalence(sp, model): transformer = Deseasonalizer(sp=sp, model=model) transformer.fit(y_train) yit = transformer.inverse_transform(transformer.transform(y_train)) np.testing.assert_array_equal(y_train.index, yit.index) np.testing.assert_array_almost_equal(y_train, yit)
def test_inverse_transform_time_index(sp, model): transformer = Deseasonalizer(sp=sp, model=model) transformer.fit(y_train) yit = transformer.inverse_transform(y_test) np.testing.assert_array_equal(yit.index, y_test.index)
class ThetaForecaster(ExponentialSmoothing): """Theta method for forecasting. The theta method as defined in [1]_ is equivalent to simple exponential smoothing (SES) with drift (as demonstrated in [2]_). The series is tested for seasonality using the test outlined in A&N. If deemed seasonal, the series is seasonally adjusted using a classical multiplicative decomposition before applying the theta method. The resulting forecasts are then reseasonalised. In cases where SES results in a constant forecast, the theta forecaster will revert to predicting the SES constant plus a linear trend derived from the training data. Prediction intervals are computed using the underlying state space model. Parameters ---------- initial_level : float, optional The alpha value of the simple exponential smoothing, if the value is set then this will be used, otherwise it will be estimated from the data. deseasonalize : bool, optional (default=True) If True, data is seasonally adjusted. sp : int, optional (default=1) The number of observations that constitute a seasonal period for a multiplicative deseasonaliser, which is used if seasonality is detected in the training data. Ignored if a deseasonaliser transformer is provided. Default is 1 (no seasonality). Attributes ---------- initial_level_ : float The estimated alpha value of the SES fit. drift_ : float The estimated drift of the fitted model. se_ : float The standard error of the predictions. Used to calculate prediction intervals. References ---------- .. [1] Assimakopoulos, V. and Nikolopoulos, K. The theta model: a decomposition approach to forecasting. International Journal of Forecasting 16, 521-530, 2000. https://www.sciencedirect.com/science/article/pii/S0169207000000662 .. [2] `Hyndman, Rob J., and Billah, Baki. Unmasking the Theta method. International J. Forecasting, 19, 287-290, 2003. https://www.sciencedirect.com/science/article/pii/S0169207001001431 Examples -------- >>> from sktime.datasets import load_airline >>> from sktime.forecasting.theta import ThetaForecaster >>> y = load_airline() >>> forecaster = ThetaForecaster(sp=12) >>> forecaster.fit(y) ThetaForecaster(...) >>> y_pred = forecaster.predict(fh=[1,2,3]) """ _fitted_param_names = ("initial_level", "smoothing_level") _tags = { "ignores-exogeneous-X": True, "capability:pred_int": True, "requires-fh-in-fit": False, "handles-missing-data": False, } def __init__(self, initial_level=None, deseasonalize=True, sp=1): self.sp = sp self.deseasonalize = deseasonalize self.deseasonalizer_ = None self.trend_ = None self.initial_level_ = None self.drift_ = None self.se_ = None super(ThetaForecaster, self).__init__(initial_level=initial_level, sp=sp) def _fit(self, y, X=None, fh=None): """Fit to training data. Parameters ---------- y : pd.Series Target time series to which to fit the forecaster. fh : int, list or np.array, optional (default=None) The forecasters horizon with the steps ahead to to predict. X : pd.DataFrame, optional (default=None) Exogenous variables are ignored Returns ------- self : returns an instance of self. """ sp = check_sp(self.sp) if sp > 1 and not self.deseasonalize: warn("`sp` is ignored when `deseasonalise`=False") if self.deseasonalize: self.deseasonalizer_ = Deseasonalizer(sp=self.sp, model="multiplicative") y = self.deseasonalizer_.fit_transform(y) self.initialization_method = "known" if self.initial_level else "estimated" # fit exponential smoothing forecaster # find theta lines: Theta lines are just SES + drift super(ThetaForecaster, self)._fit(y, fh=fh) self.initial_level_ = self._fitted_forecaster.params["smoothing_level"] # compute trend self.trend_ = self._compute_trend(y) return self def _predict(self, fh, X=None, return_pred_int=False, alpha=DEFAULT_ALPHA): """Make forecasts. Parameters ---------- fh : array-like The forecasters horizon with the steps ahead to to predict. Default is one-step ahead forecast, i.e. np.array([1]). Returns ------- y_pred : pandas.Series Returns series of predicted values. """ y_pred = super(ThetaForecaster, self)._predict(fh, X, return_pred_int=False, alpha=alpha) # Add drift. drift = self._compute_drift() y_pred += drift if self.deseasonalize: y_pred = self.deseasonalizer_.inverse_transform(y_pred) if return_pred_int: pred_int = self.compute_pred_int(y_pred=y_pred, alpha=alpha) return y_pred, pred_int return y_pred @staticmethod def _compute_trend(y): # Trend calculated through least squares regression. coefs = _fit_trend(y.values.reshape(1, -1), order=1) return coefs[0, 0] / 2 def _compute_drift(self): fh = self.fh.to_relative(self.cutoff) if np.isclose(self.initial_level_, 0.0): # SES was constant, so revert to simple trend drift = self.trend_ * fh else: # Calculate drift from SES parameters n_timepoints = len(self._y) drift = self.trend_ * (fh + (1 - (1 - self.initial_level_)**n_timepoints) / self.initial_level_) return drift def compute_pred_int(self, y_pred, alpha=DEFAULT_ALPHA): """ Compute/return prediction intervals for a forecast. Must be run *after* the forecaster has been fitted. If alpha is iterable, multiple intervals will be calculated. public method including checks & utility dispatches to core logic in _compute_pred_int Parameters ---------- y_pred : pd.Series Point predictions. alpha : float or list, optional (default=0.95) A significance level or list of significance levels. Returns ------- intervals : pd.DataFrame A table of upper and lower bounds for each point prediction in ``y_pred``. If ``alpha`` was iterable, then ``intervals`` will be a list of such tables. """ self.check_is_fitted() alphas = check_alpha(alpha) errors = self._compute_pred_err(alphas) # compute prediction intervals pred_int = [ pd.DataFrame({ "lower": y_pred - error, "upper": y_pred + error }) for error in errors ] # for a single alpha, return single pd.DataFrame if isinstance(alpha, float): return pred_int[0] # otherwise return list of pd.DataFrames return pred_int def _compute_pred_err(self, alphas): """Get the prediction errors for the forecast.""" self.check_is_fitted() n_timepoints = len(self._y) self.sigma_ = np.sqrt(self._fitted_forecaster.sse / (n_timepoints - 1)) sem = self.sigma_ * np.sqrt( self.fh.to_relative(self.cutoff) * self.initial_level_**2 + 1) errors = [] for alpha in alphas: z = _zscore(1 - alpha) error = z * sem errors.append( pd.Series(error, index=self.fh.to_absolute(self.cutoff))) return errors def _update(self, y, X=None, update_params=True): super(ThetaForecaster, self)._update( y, X, update_params=False) # use custom update_params routine if update_params: if self.deseasonalize: y = self.deseasonalizer_.transform(self._y) # use updated y self.initial_level_ = self._fitted_forecaster.params[ "smoothing_level"] self.trend_ = self._compute_trend(y) return self