Esempio n. 1
0
    def fit(self, folds=3, thetas=(-2, -1, 0, 0.25, 0.5, 0.75, 1.25, 1.5, 1.75, 2)):
        """Function to theta models based on Kevin Sheppard's code. Selects the
        best theta for the series based on KFold cross-validation

        Parameters
        ----------
        @Parameters thetas  -   tuple of float theta values to evaluate

        Returns
        ----------
        None
        """

        # Initialise the KFold object
        kf = TimeSeriesSplit(n_splits=folds)

        for i, series in enumerate(self.data.columns):
            x = self.data.loc[:self.train_ix[series] - 1, series]

            mspes = {t: np.empty((folds, 1)) for t in thetas}
            p = pd.DataFrame(None, index=["a0", "b0"], dtype=np.double)
            params = {i: p for i in range(folds)}

            fold_ix = 0
            for tr_ix, te_ix in kf.split(x):
                # Set up data
                x_tr, x_te = x.iloc[tr_ix], x.iloc[te_ix]

                t = x_tr.shape[0]
                k = x_te.shape[0]

                for theta in thetas:
                    # Estimate the different theta models
                    params[fold_ix][theta] = self.estimate(x_tr, theta)
                    # Forecast for different theta models:
                    b0 = params[fold_ix][theta]["b0"]
                    # New RHS for forecasting
                    rhs_oos = np.ones((k, 2))
                    rhs_oos[:, 1] = np.arange(k) + t + 1
                    # Exp. Smoothing term
                    fit_args = {"disp": False, "iprint": -1, "low_memory": True}
                    ses = ExponentialSmoothing(x_tr).fit(**fit_args)
                    alpha = ses.params.smoothing_level
                    # Actual forecasting
                    ses_forecast = ses.forecast(k)
                    trend = (np.arange(k) + 1 / alpha - ((1 -alpha) ** t) / alpha)
                    trend *= 0.5 * b0
                    forecast = np.array(ses_forecast + trend)
                    mspes[theta][fold_ix] = mse(x_te, forecast)

                fold_ix += 1

            # Evaluate the KFold
            for k, v in mspes.items():
                mspes[k] = np.mean(v)

            self.best_theta[series] = min(mspes, key=mspes.get)
            self.fitted[series] = self.estimate(x, self.best_theta[series])
            self.fit_success = True
Esempio n. 2
0
    def forecast(self, true_vals):
        """Function to forecast using the previously fitted models

        Parameters
        ----------
        @Parameter true_vals    -   (default None) optional pd.DataFrame of the
                                    values to forecast using the data. Assumes
                                    they are adjacent to existing data, and that
                                    the column dimension matches.

        Returns
        ----------
        None
        """
        assert self.fit_success, "Please fit model before forecasting"
        assert self.data.shape[1] == true_vals.shape[1], "Dimension mismatch"

        steps = true_vals.shape[0]

        for series in self.data.columns:
            # Set up
            x = self.data.loc[:self.train_ix[series] - 1, series]
            k = true_vals.loc[:,series].shape[0]
            t = x.shape[0]

            # Generate the dataframe in which to save the forecasts
            res = pd.DataFrame(index=np.arange(steps),columns=[series, "Theta"])
            res.loc[:, series] = true_vals.loc[:, series]

            # Smoothing parameter
            fit_args = {"disp": False, "iprint": -1, "low_memory": True}
            ses = ExponentialSmoothing(x).fit(**fit_args)
            alpha = ses.params.smoothing_level
            ses_forecast = ses.forecast(k)

            # New RHS for forecasting
            rhs_oos = np.ones((k, 2))
            rhs_oos[:, 1] = np.arange(k) + t + 1
            b0 = self.fitted[series]["b0"]
            trend = (np.arange(k) + 1 / alpha - ((1 - alpha) ** t) / alpha)
            trend *= 0.5 * b0
            res.loc[:, "Theta"] = (ses_forecast + trend).values
            self.forecasts[series] = res

            """
            temp = res.copy()
            temp.index += x.index[-1]

            plt.figure()
            plt.plot(temp.loc[:, series], label="True Forecast", color='black')
            plt.plot(x, label='Fitting Data', color='Gray')
            plt.plot(temp.loc[:, "Theta"], label="Forecast")
            plt.legend()
            plt.show()
            """

        self.forecasts_generated = True
Esempio n. 3
0
    def fit(self,
            use_mle: bool = False,
            disp: bool = False) -> "ThetaModelResults":
        r"""
        Estimate model parameters.

        Parameters
        ----------
        use_mle : bool, default False
            Estimate the parameters using MLE by fitting an ARIMA(0,1,1) with
            a drift.  If False (the default), estimates parameters using OLS
            of a constant and a time-trend and by fitting a SES to the model
            data.
        disp : bool, default True
            Display iterative output from fitting the model.

        Notes
        -----
        When using MLE, the parameters are estimated from the ARIMA(0,1,1)

        .. math::

           X_t = X_{t-1} + b_0 + (\alpha-1)\epsilon_{t-1} + \epsilon_t

        When estimating the model using 2-step estimation, the model
        parameters are estimated using the OLS regression

        .. math::

           X_t = a_0 + b_0 (t-1) + \eta_t

        and the SES

        .. math::

           \tilde{X}_{t+1} = \alpha X_{t} + (1-\alpha)\tilde{X}_{t}

        Returns
        -------
        ThetaModelResult
            Model results and forecasting
        """
        if self._deseasonalize and self._use_test:
            self._test_seasonality()
        y, seasonal = self._deseasonalize_data()
        if use_mle:
            mod = SARIMAX(y, order=(0, 1, 1), trend="c")
            res = mod.fit(disp=disp)
            params = np.asarray(res.params)
            alpha = params[1] + 1
            if alpha > 1:
                alpha = 0.9998
                res = mod.fit_constrained({"ma.L1": alpha - 1})
                params = np.asarray(res.params)
            b0 = params[0]
            sigma2 = params[-1]
            one_step = res.forecast(1) - b0
        else:
            ct = add_trend(y, "ct", prepend=True)[:, :2]
            ct[:, 1] -= 1
            _, b0 = np.linalg.lstsq(ct, y, rcond=None)[0]
            res = ExponentialSmoothing(
                y, initial_level=y[0],
                initialization_method="known").fit(disp=disp)
            alpha = res.params[0]
            sigma2 = None
            one_step = res.forecast(1)
        return ThetaModelResults(b0, alpha, sigma2, one_step, seasonal,
                                 use_mle, self)
Esempio n. 4
0
# Get Residuals
#~~~~~~~~~~~~~~
residuals = log_close - full_model.predict(0, len(close) - 1)

# Residual Anlysis
#~~~~~~~~~~~~~~~~~
ResidualAnalysis(datetime, residuals, nlags=252)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
### Model Validation
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
print('Running Model Validation\n------------------------')

# Get Model Predictions
#~~~~~~~~~~~~~~~~~~~~~~
pred_close = test_model.forecast(validation_size)

# Get Erros
#~~~~~~~~~~
error = validation_close - pred_close
err_mu, err_sigma = error.mean(), error.std()

# Plot Predictions
#~~~~~~~~~~~~~~~~~
plt.figure()
plt.plot(training_datetime[-validation_size:],
         training_close[-validation_size:],
         'b',
         linewidth=1,
         label='Training')
plt.plot(validation_datetime,