Beispiel #1
0
def run_arima(dept_id, store_id):
    # create timeseries for fbprophet
    ts = CreateTimeSeries(dept_id, store_id)

    # ARIMA
    fitted1 = SARIMAX(ts['y'].values + 1, order=(3, 1, 5), trend='c').fit()
    fitted2 = SARIMAX(ts['y'].values + 1, order=(2, 1, 2), trend='c').fit()
    y_pred = 0.5 * fitted1.forecast(28) + 0.5 * fitted2.forecast(28)
    return np.append(np.array([dept_id, store_id]), y_pred - 1)
Beispiel #2
0
 def initialize(self, order, seasonal_order):
     self.order = order
     self.seasonal_order = seasonal_order
     self.model = SARIMAX(self.ts, order=self.order, seasonal_order=self.seasonal_order,
                          enforce_stationarity=False, enforce_invertibility=False,
                          simple_differencing=False)
     self.results = self.model.fit(low_memory=True)
Beispiel #3
0
    def order_select(self, order_sum_max=15):
        self.order_sum_max = order_sum_max
        # self.seasonal_order_sum_max = seasonal_order_sum_max
        seasonal_order = (0, 0, 0, 0, 0, 0)
        order_sum = sum(seasonal_order)
        self.bic_value = pd.DataFrame(columns=['seasonal_order', 'bic'])
        new_bic = (np.inf, None)
        bic = (np.inf, None)
        while order_sum < order_sum_max:
            order_waiting = []
            for i in range(6):
                _tmp = list(seasonal_order)
                _tmp[i] += 1
                order_waiting.append(_tmp)

            for _order in order_waiting:
                model = SARIMAX(self.ts,
                                order=_order[:3],
                                seasonal_order=_order[3:] + [self.period],
                                enforce_stationarity=False)
                results = model.fit(low_memory=True)
                bic_value_ = pd.DataFrame([_order, results.bic], columns=['seasonal_order', 'bic'])
                self.bic_value = pd.concat([self.bic_value, bic_value_])
                new_bic = min(new_bic, (results.bic, _order))
            if new_bic < bic:
                bic = new_bic
                new_bic = (np.inf, None)
                seasonal_order = tuple(bic[1])
                print(f"{bic[1]} is chosed with bic={bic[0]}")
            else:
                break
        self.order=seasonal_order
        print(f"best order is {self.order}")
Beispiel #4
0
 def test_seasonal_arima4(self):
     ts_data = self.getData5()
     f_name='seasonal_arima4.pmml'
     model = SARIMAX(endog = ts_data,
                                     order = (1, 0, 1),
                                     seasonal_order = (1, 0, 1, 12),
                                     )
     result = model.fit(disp=False)
     StatsmodelsToPmml(result, f_name)
     self.assertEqual(os.path.isfile(f_name),True)
Beispiel #5
0
 def test_seasonal_arima2(self):
     ts_data = self.statsmodels_data_helper.get_seasonal_data()
     f_name = 'seasonal_arima2.pmml'
     model = SARIMAX(endog=ts_data,
                     exog=None,
                     order=(3, 1, 1),
                     seasonal_order=(3, 1, 1, 12))
     result = model.fit()
     ArimaToPMML(result, f_name, conf_int=[80])
     self.assertEqual(self.schema.is_valid(f_name), True)
 def test_seasonal_arima2(self):
     ts_data = self.getData5()
     f_name = 'seasonal_arima2.pmml'
     model = SARIMAX(
         endog=ts_data,
         order=(1, 0, 1),
         seasonal_order=(1, 1, 1, 12),
     )
     result = model.fit(disp=False)
     ArimaToPMML(result, f_name, conf_int=[95])
     self.assertEqual(os.path.isfile(f_name), True)
Beispiel #7
0
 def test_seasonal_arima1(self):
     ts_data = self.statsmodels_data_helper.get_seasonal_data()
     f_name = 'seasonal_arima1.pmml'
     model = SARIMAX(endog=ts_data,
                     exog=None,
                     order=(3, 1, 1),
                     seasonal_order=(3, 1, 1, 12),
                     trend='c')
     result = model.fit()
     StatsmodelsToPmml(result, f_name)
     self.assertEqual(self.schema.is_valid(f_name), True)
Beispiel #8
0
    def SARIMA_f(self, df, pdq, s):
        try:
            sarima_mod = SARIMAX(np.array(df['Actual']), order=pdq, seasonal_order=s)
            fit_sarima = sarima_mod.fit(use_boxcox=True, disp=0)
            forecast = fit_sarima.forecast()[0]

            Cluster, Warehouse, WF, YF = generate_attrib(df)
            self.df_forecast.append({'Cluster':Cluster, 'Warehouse':Warehouse, 'Year':YF, "Week": WF, "Forecast":forecast})
            return print(f'DEBUG:Forecast:{Cluster}:{Warehouse}:{YF}:{WF}:{forecast}')
        except:
            return print("ERROR:FORECAST-SARIMA")
Beispiel #9
0
    def sarima_(self):
        model = SARIMAX(self.df.iloc[:, 1], order=self.pdq_, seasonal_order=self.PDQ_)  # 与上一句等价
        print('the parameters: SARIMA{}x{}'.format(self.pdq_, self.PDQ_), '\n')
        self.results = model.fit()
        # joblib.dump(results, f'C:\\Users\\Administrator\\Desktop\\SARIMA模型.pkl')
        self.predict_ = self.results.forecast(self.forecast_num)

        fig, ax = plt.subplots(figsize=(20, 6))
        ax = self.predict_.plot(ax=ax)
        self.df.iloc[:, 1].plot(ax=ax)
        plt.legend(['y_pred', 'y_true'])
        plt.show()
 def test_seasonal_arima5(self):
     ts_data = self.getData5()
     f_name = 'seasonal_arima5.pmml'
     model = SARIMAX(
         endog=ts_data,
         order=(0, 0, 1),
         seasonal_order=(3, 1, 1, 12),
         trend='c',
     )
     result = model.fit(disp=False)
     ArimaToPMML(result, f_name)
     self.assertEqual(os.path.isfile(f_name), True)
def train_SARIMA_model(data, country=None):
    sarima = SARIMAX(data,
                     order=ARIMA_ORDER,
                     seasonal_order=SARIMA_SEASONAL_ORDER)
    sarima_model = sarima.fit()
    if country:
        pickle_file = os.path.join(
            MODELS_DIRECTORY,
            'sarima_' + country.replace(' ', '_') + '.pickle')
    else:
        pickle_file = os.path.join(MODELS_DIRECTORY, 'sarima.pickle')
    with open(pickle_file, 'wb') as file:
        pickle.dump(sarima_model, file)
    log_train(TRAIN_LOG, 'sarima', pickle_file, data.size)
def train_SARIMA_model(data,
                       order,
                       seasonal_order,
                       directory_models,
                       country=None):
    ''' Train a seasonal auto-regressive, integrating, moving-average (SARIMA) model '''
    sarima = SARIMAX(data, order=order, seasonal_order=seasonal_order)
    sarima_model = sarima.fit()
    if country:
        sarima_model.save(directory_models + 'sarima_' + country + '.pickle')
    else:
        sarima_model.save(directory_models + 'sarima.pickle')
    log_train('sarima', data.shape, {})
    return sarima_model
Beispiel #13
0
    def sarima(self):
        model = SARIMAX(self.df.iloc[:, 0],
                        order=self.param,
                        seasonal_order=self.param_seasonal,
                        low_memory=True)  #与上一句等价
        print('the best parameters: SARIMA{}x{}'.format(
            self.param, self.param_seasonal))
        self.results = model.fit()
        #joblib.dump(results, f'C:\\Users\\Administrator\\Desktop\\SARIMA模型.pkl')
        self.predict_ = self.results.forecast(self.forecast_num)

        fig, ax = plt.subplots(figsize=(30, 6))
        ax = self.predict_.plot(ax=ax)
        self.df.iloc[:, 0].plot(ax=ax)
        plt.legend(['y_pred', 'y_true'])
        plt.show()
Beispiel #14
0
def sarima(data, steps):
    model = SARIMAX(endog=data.values,
                    order=(2, 0, 1),
                    seasonal_order=(0, 1, 1, 7),
                    enforce_invertibility=False)
    sarima_fit = model.fit()
    print(sarima_fit.summary())

    # Rollling Forecast

    # Number of days to Forecast Parameter
    end = int(0.2 * len(data))
    values = data[:-end]
    actual_values = data[len(data) - end:]
    pred_values = []
    indexes = data[len(data) - end:].index

    for i in range(end):
        model = ARIMA((values), (2, 0, 1))
        arima_fit = model.fit()

        fnext = arima_fit.forecast()[0][0]
        pred_values.append(fnext)
        values = data[:-end + i]

    pred_values = pd.Series(pred_values)
    pred_values.index = indexes

    #Doubt
    #pred_values=pred_values.shift(-1)[:]

    rmse = rms(actual_values, pred_values)
    # Needs correction ??
    print("RMSE VALUE", rmse)
    #print(actual_values,pred_values)
    print(len(pred_values))
    return {
        "model": "Baseline",
        "index": list(indexes),
        "actual": list(actual_values.values),
        "predicted": list(pred_values),
        "rmse": rmse
    }
Beispiel #15
0
    def params_select(self):
        self.p = range(0, self.p_max)
        self.q = range(0, self.q_max)
        d = range(0, self.d_max)
        # Generate all different combinations of p, q and q triplets
        pdq = list(itertools.product(self.p, d, self.q))
        # Generate all different combinations of seasonal p, q and q triplets
        seasonal_pdq = [(x[0], x[1], x[2], self.period)
                        for x in list(itertools.product(self.p, d, self.q))]

        aic_value = pd.DataFrame()
        for i, param in enumerate(pdq):
            for param_seasonal in seasonal_pdq:
                model = SARIMAX(self.df.iloc[:, 0],
                                order=param,
                                seasonal_order=param_seasonal)
                results = model.fit(low_memory=True)
                print('SARIMA{}x{} - AIC:{}'.format(param, param_seasonal,
                                                    results.aic))
                param_list = [[param, param_seasonal, results.aic]]
                aic_value_ = pd.DataFrame(
                    param_list, columns=['param', 'param_seasonal', 'aic'])
                aic_value = pd.concat([aic_value, aic_value_])

        index_list = []
        for i in range(self.p_max * self.q_max * self.p_max * self.q_max *
                       self.d_max * self.d_max):
            index_list.append(i)
        aic_value.index = index_list

        min_index = aic_value[aic_value.aic == min(
            aic_value['aic'])].index  #找到aic值最小的行索引
        a = aic_value.iloc[min_index, :]
        a = a.values.tolist()
        self.param = a[0][0]
        self.param_seasonal = a[0][1]
# \end{align*}
# $$
#
# The parameters on deterministic terms are not directly comparable to
# `AutoReg` which evolves according to the equation
#
# $$
# (1-\phi(L)) y_t = x_t \beta + \epsilon_t.
# $$
#
# When $x_t$ contains only deterministic terms, these two representation
# are equivalent (assuming $\theta(L)=0$ so that there is no MA).
#

from statsmodels.tsa.api import SARIMAX

det_proc = DeterministicProcess(idx, period=52, fourier=2)
det_terms = det_proc.in_sample()

mod = SARIMAX(y, order=(1, 0, 0), trend="c", exog=det_terms)
res = mod.fit(disp=False)
print(res.summary())

# The forecasts are similar but differ since the parameters of the
# `SARIMAX` are estimated using MLE while `AutoReg` uses OLS.

sarimax_forecast = res.forecast(12, exog=det_proc.out_of_sample(12))
df = pd.concat([auto_reg_forecast, sarimax_forecast], axis=1)
df.columns = columns = ["AutoReg", "SARIMAX"]
df
Beispiel #17
0
def get_feature_SARIMA_residuals(time_series):
    predict = SARIMAX(time_series, trend='n').fit().get_prediction()
    return time_series - predict.predicted_mean
rho = 0.8
beta = 10
epsilon = eta.copy()
for i in range(1, eta.shape[0]):
    epsilon[i] = rho * epsilon[i - 1] + eta[i]
y = beta + epsilon
y = y[200:]

from statsmodels.tsa.api import SARIMAX, AutoReg
from statsmodels.tsa.arima.model import ARIMA

# The three models are specified and estimated in the next cell.  An AR(0)
# is included as a reference. The AR(0) is identical using all three
# estimators.

ar0_res = SARIMAX(y, order=(0, 0, 0), trend="c").fit()
sarimax_res = SARIMAX(y, order=(1, 0, 0), trend="c").fit()
arima_res = ARIMA(y, order=(1, 0, 0), trend="c").fit()
autoreg_res = AutoReg(y, 1, trend="c").fit()

# The table below contains the estimated parameter in the model, the
# estimated AR(1) coefficient, and the long-run mean which is either equal
# to the estimated parameters (AR(0) or `ARIMA`), or depends on the ratio of
# the intercept to 1 minus the AR(1) parameter.

intercept = [
    ar0_res.params[0],
    sarimax_res.params[0],
    arima_res.params[0],
    autoreg_res.params[0],
]
Beispiel #19
0
#
# * Specification of seasonal and nonseasonal AR and MA components
# * Inclusion of Exogenous variables
# * Full maximum-likelihood estimation using the Kalman Filter
#
# This model is more feature rich than `AutoReg`. Unlike `SARIMAX`,
# `AutoReg` estimates parameters using OLS.  This is faster and the problem
# is globally convex, and so there are no issues with local minima. The
# closed-form estimator and its performance are the key advantages of
# `AutoReg` over `SARIMAX` when comparing AR(P) models.  `AutoReg` also
# support seasonal dummies, which can be used with `SARIMAX` if the user
# includes them as exogenous regressors.

from statsmodels.tsa.api import SARIMAX

sarimax_mod = SARIMAX(ind_prod, order=((1, 5, 12, 13), 0, 0), trend="c")
sarimax_res = sarimax_mod.fit()
print(sarimax_res.summary())

sarimax_params = sarimax_res.params.iloc[:-1].copy()
sarimax_params.index = res_glob.params.index
params = pd.concat([res_glob.params, sarimax_params], axis=1, sort=False)
params.columns = ["AutoReg", "SARIMAX"]
params

# ## Custom Deterministic Processes
#
# The `deterministic` parameter allows a custom `DeterministicProcess` to
# be used. This allows for more complex deterministic terms to be
# constructed, for example one that includes seasonal components with two
# periods, or, as the next example shows, one that uses a Fourier series
Beispiel #20
0
    def fit(self, X, y=None):
        # Perform top percentile ceiling
        self.X = X
        mode = self.mode
        if '*f' in self.mode:
            self.X = np.minimum(X, np.percentile(X, 75))
            mode = self.mode.partition('*f')[0]
        # Perform transformation if specified by *transformation
        if '*ln' in self.mode:
            self.X = np.log(np.array(X) + 1)
            mode = self.mode.partition('*ln')[0]
        elif '*bc' in self.mode:
            transformer = pm.preprocessing.BoxCoxEndogTransformer()
            self.X = transformer.fit_transform(y=X)
            self.transformer = transformer
            mode = self.mode.partition('*bc')[0]

        try:
            if mode == 'll':
                # Local Level
                model = LocalLevel(self.X)
                self.res_ = model.fit(disp=False)
                self.k_exog = None
            elif mode == 'lla':
                endog = X[2:]
                exog = np.column_stack((X[1:-1], X[:-2]))
                self.k_exog = exog.shape[1]
                model = UnobservedComponents(endog=endog,
                                             exog=exog,
                                             level='local level')
                self.res_ = model.fit(disp=False)
            elif mode == 'lls':
                self.k_exog = None
                model = SARIMAX(endog=self.X,
                                order=(2, 0, 0),
                                trend='c',
                                measurement_error=True)
                self.res_ = model.fit(disp=False)
            elif mode == 'llt':
                # Local Linear Trend
                model = UnobservedComponents(endog=self.X,
                                             level='local linear trend')
                self.res_ = model.fit(disp=False)
            elif mode == 'llc':
                # Local Level Cycle
                model = UnobservedComponents(endog=self.X,
                                             level='local level',
                                             cycle=True,
                                             stochastic_cycle=True)
                self.res_ = model.fit(disp=False)
            elif mode == 'arima':
                self.res_ = pm.auto_arima(self.X,
                                          start_p=1,
                                          start_q=1,
                                          start_P=1,
                                          start_Q=1,
                                          max_p=5,
                                          max_q=5,
                                          max_P=5,
                                          max_Q=5,
                                          seasonal=True,
                                          stepwise=True,
                                          suppress_warnings=True,
                                          D=10,
                                          max_D=10,
                                          error_action='ignore')
            elif mode == 'rw1':
                # For RW model
                self.res_ = None
                self.converged = False
        except np.linalg.LinAlgError:
            # Some kalman filter error ==> Use random walk
            print(f'Convergence failed for {mode}')
            self.converged = False
            return self
        try:
            self.converged = self.res_.mle_retvals['converged']
        except AttributeError:
            if mode == 'arima':
                self.converged = True  # auto ARIMA from pmdarima should always converge
        return self
fig, axes = plt.subplots(1, 2, figsize=(15, 6))
fig = sm.graphics.tsa.plot_acf(bike['seasonal_diff'].iloc[12:],
                               lags=36,
                               ax=axes[0])
fig = sm.graphics.tsa.plot_pacf(bike['seasonal_diff'].iloc[12:],
                                lags=36,
                                ax=axes[1])

# Create a dataframe to iterate over potential values of p and q for the ARIMA(p,d,q)(P,D,Q)m
# model in order to select the optimal model (lowest AICc value). Note that we are not iterating
# over different values of the seasonal parameters.
sarima_models = pd.DataFrame(np.zeros((3, 2), dtype=float))
for p in range(3):
    for q in range(2):
        fit_sarima = SARIMAX(bike['num_rides'],
                             order=(p, 0, q),
                             seasonal_order=(1, 1, 0, 12)).fit()

        try:
            sarima_models.iloc[p, q] = fit_sarima.aicc
        except:
            sarima_models.iloc[p, q] = np.nan

sarima_models

# The model with the lowest AICc had p = q = 1, so we fit an ARIMA(1,0,1)(1,1,0)12 model
fit_sarima = SARIMAX(bike['num_rides'],
                     order=(1, 0, 1),
                     seasonal_order=(1, 1, 0, 12)).fit()

fit_sarima.summary()