Example #1
0
class Sarima:

    def __init__(self, ts, last_timestamp=None):
        self.ts = ts
        self.last_timestamp = last_timestamp

    def initialize(self, order, seasonal_order):
        self.order = order
        self.seasonal_order = seasonal_order
        self.model = SARIMAX(self.ts, order=self.order, seasonal_order=self.seasonal_order,
                             enforce_stationarity=False, enforce_invertibility=False,
                             simple_differencing=False)
        self.results = self.model.fit(low_memory=True)

    def forecast(self, n=1) -> tuple:
        return self.results.forecast(n)

    def append(self, new_ts, last_timestamp=None, refit = False):
        if last_timestamp:
            self.last_timestamp = last_timestamp
        self.results = self.results.append(new_ts, refit=refit)

    def apply(self, new_init_ts, last_timestamp=None):
        self.init_ts = new_init_ts
        self.results = self.results.apply(new_init_ts)
        self.last_timestamp = last_timestamp
Example #2
0
    def order_select(self, order_sum_max=15):
        self.order_sum_max = order_sum_max
        # self.seasonal_order_sum_max = seasonal_order_sum_max
        seasonal_order = (0, 0, 0, 0, 0, 0)
        order_sum = sum(seasonal_order)
        self.bic_value = pd.DataFrame(columns=['seasonal_order', 'bic'])
        new_bic = (np.inf, None)
        bic = (np.inf, None)
        while order_sum < order_sum_max:
            order_waiting = []
            for i in range(6):
                _tmp = list(seasonal_order)
                _tmp[i] += 1
                order_waiting.append(_tmp)

            for _order in order_waiting:
                model = SARIMAX(self.ts,
                                order=_order[:3],
                                seasonal_order=_order[3:] + [self.period],
                                enforce_stationarity=False)
                results = model.fit(low_memory=True)
                bic_value_ = pd.DataFrame([_order, results.bic], columns=['seasonal_order', 'bic'])
                self.bic_value = pd.concat([self.bic_value, bic_value_])
                new_bic = min(new_bic, (results.bic, _order))
            if new_bic < bic:
                bic = new_bic
                new_bic = (np.inf, None)
                seasonal_order = tuple(bic[1])
                print(f"{bic[1]} is chosed with bic={bic[0]}")
            else:
                break
        self.order=seasonal_order
        print(f"best order is {self.order}")
Example #3
0
def sarima(data, steps):
    model = SARIMAX(endog=data.values,
                    order=(2, 0, 1),
                    seasonal_order=(0, 1, 1, 7),
                    enforce_invertibility=False)
    sarima_fit = model.fit()
    print(sarima_fit.summary())

    # Rollling Forecast

    # Number of days to Forecast Parameter
    end = int(0.2 * len(data))
    values = data[:-end]
    actual_values = data[len(data) - end:]
    pred_values = []
    indexes = data[len(data) - end:].index

    for i in range(end):
        model = ARIMA((values), (2, 0, 1))
        arima_fit = model.fit()

        fnext = arima_fit.forecast()[0][0]
        pred_values.append(fnext)
        values = data[:-end + i]

    pred_values = pd.Series(pred_values)
    pred_values.index = indexes

    #Doubt
    #pred_values=pred_values.shift(-1)[:]

    rmse = rms(actual_values, pred_values)
    # Needs correction ??
    print("RMSE VALUE", rmse)
    #print(actual_values,pred_values)
    print(len(pred_values))
    return {
        "model": "Baseline",
        "index": list(indexes),
        "actual": list(actual_values.values),
        "predicted": list(pred_values),
        "rmse": rmse
    }
Example #4
0
 def test_seasonal_arima4(self):
     ts_data = self.getData5()
     f_name='seasonal_arima4.pmml'
     model = SARIMAX(endog = ts_data,
                                     order = (1, 0, 1),
                                     seasonal_order = (1, 0, 1, 12),
                                     )
     result = model.fit(disp=False)
     StatsmodelsToPmml(result, f_name)
     self.assertEqual(os.path.isfile(f_name),True)
Example #5
0
 def test_seasonal_arima2(self):
     ts_data = self.statsmodels_data_helper.get_seasonal_data()
     f_name = 'seasonal_arima2.pmml'
     model = SARIMAX(endog=ts_data,
                     exog=None,
                     order=(3, 1, 1),
                     seasonal_order=(3, 1, 1, 12))
     result = model.fit()
     ArimaToPMML(result, f_name, conf_int=[80])
     self.assertEqual(self.schema.is_valid(f_name), True)
Example #6
0
    def SARIMA_f(self, df, pdq, s):
        try:
            sarima_mod = SARIMAX(np.array(df['Actual']), order=pdq, seasonal_order=s)
            fit_sarima = sarima_mod.fit(use_boxcox=True, disp=0)
            forecast = fit_sarima.forecast()[0]

            Cluster, Warehouse, WF, YF = generate_attrib(df)
            self.df_forecast.append({'Cluster':Cluster, 'Warehouse':Warehouse, 'Year':YF, "Week": WF, "Forecast":forecast})
            return print(f'DEBUG:Forecast:{Cluster}:{Warehouse}:{YF}:{WF}:{forecast}')
        except:
            return print("ERROR:FORECAST-SARIMA")
 def test_seasonal_arima2(self):
     ts_data = self.getData5()
     f_name = 'seasonal_arima2.pmml'
     model = SARIMAX(
         endog=ts_data,
         order=(1, 0, 1),
         seasonal_order=(1, 1, 1, 12),
     )
     result = model.fit(disp=False)
     ArimaToPMML(result, f_name, conf_int=[95])
     self.assertEqual(os.path.isfile(f_name), True)
Example #8
0
 def test_seasonal_arima1(self):
     ts_data = self.statsmodels_data_helper.get_seasonal_data()
     f_name = 'seasonal_arima1.pmml'
     model = SARIMAX(endog=ts_data,
                     exog=None,
                     order=(3, 1, 1),
                     seasonal_order=(3, 1, 1, 12),
                     trend='c')
     result = model.fit()
     StatsmodelsToPmml(result, f_name)
     self.assertEqual(self.schema.is_valid(f_name), True)
Example #9
0
    def sarima_(self):
        model = SARIMAX(self.df.iloc[:, 1], order=self.pdq_, seasonal_order=self.PDQ_)  # 与上一句等价
        print('the parameters: SARIMA{}x{}'.format(self.pdq_, self.PDQ_), '\n')
        self.results = model.fit()
        # joblib.dump(results, f'C:\\Users\\Administrator\\Desktop\\SARIMA模型.pkl')
        self.predict_ = self.results.forecast(self.forecast_num)

        fig, ax = plt.subplots(figsize=(20, 6))
        ax = self.predict_.plot(ax=ax)
        self.df.iloc[:, 1].plot(ax=ax)
        plt.legend(['y_pred', 'y_true'])
        plt.show()
 def test_seasonal_arima5(self):
     ts_data = self.getData5()
     f_name = 'seasonal_arima5.pmml'
     model = SARIMAX(
         endog=ts_data,
         order=(0, 0, 1),
         seasonal_order=(3, 1, 1, 12),
         trend='c',
     )
     result = model.fit(disp=False)
     ArimaToPMML(result, f_name)
     self.assertEqual(os.path.isfile(f_name), True)
def train_SARIMA_model(data,
                       order,
                       seasonal_order,
                       directory_models,
                       country=None):
    ''' Train a seasonal auto-regressive, integrating, moving-average (SARIMA) model '''
    sarima = SARIMAX(data, order=order, seasonal_order=seasonal_order)
    sarima_model = sarima.fit()
    if country:
        sarima_model.save(directory_models + 'sarima_' + country + '.pickle')
    else:
        sarima_model.save(directory_models + 'sarima.pickle')
    log_train('sarima', data.shape, {})
    return sarima_model
def train_SARIMA_model(data, country=None):
    sarima = SARIMAX(data,
                     order=ARIMA_ORDER,
                     seasonal_order=SARIMA_SEASONAL_ORDER)
    sarima_model = sarima.fit()
    if country:
        pickle_file = os.path.join(
            MODELS_DIRECTORY,
            'sarima_' + country.replace(' ', '_') + '.pickle')
    else:
        pickle_file = os.path.join(MODELS_DIRECTORY, 'sarima.pickle')
    with open(pickle_file, 'wb') as file:
        pickle.dump(sarima_model, file)
    log_train(TRAIN_LOG, 'sarima', pickle_file, data.size)
Example #13
0
    def sarima(self):
        model = SARIMAX(self.df.iloc[:, 0],
                        order=self.param,
                        seasonal_order=self.param_seasonal,
                        low_memory=True)  #与上一句等价
        print('the best parameters: SARIMA{}x{}'.format(
            self.param, self.param_seasonal))
        self.results = model.fit()
        #joblib.dump(results, f'C:\\Users\\Administrator\\Desktop\\SARIMA模型.pkl')
        self.predict_ = self.results.forecast(self.forecast_num)

        fig, ax = plt.subplots(figsize=(30, 6))
        ax = self.predict_.plot(ax=ax)
        self.df.iloc[:, 0].plot(ax=ax)
        plt.legend(['y_pred', 'y_true'])
        plt.show()
Example #14
0
    def params_select(self):
        self.p = range(0, self.p_max)
        self.q = range(0, self.q_max)
        d = range(0, self.d_max)
        # Generate all different combinations of p, q and q triplets
        pdq = list(itertools.product(self.p, d, self.q))
        # Generate all different combinations of seasonal p, q and q triplets
        seasonal_pdq = [(x[0], x[1], x[2], self.period)
                        for x in list(itertools.product(self.p, d, self.q))]

        aic_value = pd.DataFrame()
        for i, param in enumerate(pdq):
            for param_seasonal in seasonal_pdq:
                model = SARIMAX(self.df.iloc[:, 0],
                                order=param,
                                seasonal_order=param_seasonal)
                results = model.fit(low_memory=True)
                print('SARIMA{}x{} - AIC:{}'.format(param, param_seasonal,
                                                    results.aic))
                param_list = [[param, param_seasonal, results.aic]]
                aic_value_ = pd.DataFrame(
                    param_list, columns=['param', 'param_seasonal', 'aic'])
                aic_value = pd.concat([aic_value, aic_value_])

        index_list = []
        for i in range(self.p_max * self.q_max * self.p_max * self.q_max *
                       self.d_max * self.d_max):
            index_list.append(i)
        aic_value.index = index_list

        min_index = aic_value[aic_value.aic == min(
            aic_value['aic'])].index  #找到aic值最小的行索引
        a = aic_value.iloc[min_index, :]
        a = a.values.tolist()
        self.param = a[0][0]
        self.param_seasonal = a[0][1]
rho = 0.8
beta = 2
delta0 = 10
delta1 = 0.5
epsilon = eta.copy()
for i in range(1, eta.shape[0]):
    epsilon[i] = rho * epsilon[i - 1] + eta[i]
t = np.arange(epsilon.shape[0])
y = delta0 + delta1 * t + beta * full_x + epsilon
y = y[200:]

start = np.array([110, delta1, beta, rho, 1])
arx_res = ARIMA(y, exog=x, order=(1, 0, 0), trend="ct").fit()
mod = SARIMAX(y, exog=x, order=(1, 0, 0), trend="ct")
start[:2] *= 1 - rho
sarimax_res = mod.fit(start_params=start, method="bfgs")

# The two estimators fit similarly, although there is a small difference
# in the log-likelihood.  This is a numerical issue and should not
# materially affect the predictions. Importantly the two trend parameters,
# `const` and `x1` (unfortunately named for the time trend), differ between
# the two.  The other parameters are effectively identical.

print(arx_res.summary())

print(sarimax_res.summary())

# ## Initial residuals `SARIMAX` and `ARIMA`
#
# Residuals for observations before the maximal model order, which depends
# on the AR, MA, Seasonal AR, Seasonal MA and differencing parameters, are
Example #16
0
# * Specification of seasonal and nonseasonal AR and MA components
# * Inclusion of Exogenous variables
# * Full maximum-likelihood estimation using the Kalman Filter
#
# This model is more feature rich than `AutoReg`. Unlike `SARIMAX`,
# `AutoReg` estimates parameters using OLS.  This is faster and the problem
# is globally convex, and so there are no issues with local minima. The
# closed-form estimator and its performance are the key advantages of
# `AutoReg` over `SARIMAX` when comparing AR(P) models.  `AutoReg` also
# support seasonal dummies, which can be used with `SARIMAX` if the user
# includes them as exogenous regressors.

from statsmodels.tsa.api import SARIMAX

sarimax_mod = SARIMAX(ind_prod, order=((1, 5, 12, 13), 0, 0), trend="c")
sarimax_res = sarimax_mod.fit()
print(sarimax_res.summary())

sarimax_params = sarimax_res.params.iloc[:-1].copy()
sarimax_params.index = res_glob.params.index
params = pd.concat([res_glob.params, sarimax_params], axis=1, sort=False)
params.columns = ["AutoReg", "SARIMAX"]
params

# ## Custom Deterministic Processes
#
# The `deterministic` parameter allows a custom `DeterministicProcess` to
# be used. This allows for more complex deterministic terms to be
# constructed, for example one that includes seasonal components with two
# periods, or, as the next example shows, one that uses a Fourier series
# rather than seasonal dummies.