class Sarima: def __init__(self, ts, last_timestamp=None): self.ts = ts self.last_timestamp = last_timestamp def initialize(self, order, seasonal_order): self.order = order self.seasonal_order = seasonal_order self.model = SARIMAX(self.ts, order=self.order, seasonal_order=self.seasonal_order, enforce_stationarity=False, enforce_invertibility=False, simple_differencing=False) self.results = self.model.fit(low_memory=True) def forecast(self, n=1) -> tuple: return self.results.forecast(n) def append(self, new_ts, last_timestamp=None, refit = False): if last_timestamp: self.last_timestamp = last_timestamp self.results = self.results.append(new_ts, refit=refit) def apply(self, new_init_ts, last_timestamp=None): self.init_ts = new_init_ts self.results = self.results.apply(new_init_ts) self.last_timestamp = last_timestamp
def order_select(self, order_sum_max=15): self.order_sum_max = order_sum_max # self.seasonal_order_sum_max = seasonal_order_sum_max seasonal_order = (0, 0, 0, 0, 0, 0) order_sum = sum(seasonal_order) self.bic_value = pd.DataFrame(columns=['seasonal_order', 'bic']) new_bic = (np.inf, None) bic = (np.inf, None) while order_sum < order_sum_max: order_waiting = [] for i in range(6): _tmp = list(seasonal_order) _tmp[i] += 1 order_waiting.append(_tmp) for _order in order_waiting: model = SARIMAX(self.ts, order=_order[:3], seasonal_order=_order[3:] + [self.period], enforce_stationarity=False) results = model.fit(low_memory=True) bic_value_ = pd.DataFrame([_order, results.bic], columns=['seasonal_order', 'bic']) self.bic_value = pd.concat([self.bic_value, bic_value_]) new_bic = min(new_bic, (results.bic, _order)) if new_bic < bic: bic = new_bic new_bic = (np.inf, None) seasonal_order = tuple(bic[1]) print(f"{bic[1]} is chosed with bic={bic[0]}") else: break self.order=seasonal_order print(f"best order is {self.order}")
def sarima(data, steps): model = SARIMAX(endog=data.values, order=(2, 0, 1), seasonal_order=(0, 1, 1, 7), enforce_invertibility=False) sarima_fit = model.fit() print(sarima_fit.summary()) # Rollling Forecast # Number of days to Forecast Parameter end = int(0.2 * len(data)) values = data[:-end] actual_values = data[len(data) - end:] pred_values = [] indexes = data[len(data) - end:].index for i in range(end): model = ARIMA((values), (2, 0, 1)) arima_fit = model.fit() fnext = arima_fit.forecast()[0][0] pred_values.append(fnext) values = data[:-end + i] pred_values = pd.Series(pred_values) pred_values.index = indexes #Doubt #pred_values=pred_values.shift(-1)[:] rmse = rms(actual_values, pred_values) # Needs correction ?? print("RMSE VALUE", rmse) #print(actual_values,pred_values) print(len(pred_values)) return { "model": "Baseline", "index": list(indexes), "actual": list(actual_values.values), "predicted": list(pred_values), "rmse": rmse }
def test_seasonal_arima4(self): ts_data = self.getData5() f_name='seasonal_arima4.pmml' model = SARIMAX(endog = ts_data, order = (1, 0, 1), seasonal_order = (1, 0, 1, 12), ) result = model.fit(disp=False) StatsmodelsToPmml(result, f_name) self.assertEqual(os.path.isfile(f_name),True)
def test_seasonal_arima2(self): ts_data = self.statsmodels_data_helper.get_seasonal_data() f_name = 'seasonal_arima2.pmml' model = SARIMAX(endog=ts_data, exog=None, order=(3, 1, 1), seasonal_order=(3, 1, 1, 12)) result = model.fit() ArimaToPMML(result, f_name, conf_int=[80]) self.assertEqual(self.schema.is_valid(f_name), True)
def SARIMA_f(self, df, pdq, s): try: sarima_mod = SARIMAX(np.array(df['Actual']), order=pdq, seasonal_order=s) fit_sarima = sarima_mod.fit(use_boxcox=True, disp=0) forecast = fit_sarima.forecast()[0] Cluster, Warehouse, WF, YF = generate_attrib(df) self.df_forecast.append({'Cluster':Cluster, 'Warehouse':Warehouse, 'Year':YF, "Week": WF, "Forecast":forecast}) return print(f'DEBUG:Forecast:{Cluster}:{Warehouse}:{YF}:{WF}:{forecast}') except: return print("ERROR:FORECAST-SARIMA")
def test_seasonal_arima2(self): ts_data = self.getData5() f_name = 'seasonal_arima2.pmml' model = SARIMAX( endog=ts_data, order=(1, 0, 1), seasonal_order=(1, 1, 1, 12), ) result = model.fit(disp=False) ArimaToPMML(result, f_name, conf_int=[95]) self.assertEqual(os.path.isfile(f_name), True)
def test_seasonal_arima1(self): ts_data = self.statsmodels_data_helper.get_seasonal_data() f_name = 'seasonal_arima1.pmml' model = SARIMAX(endog=ts_data, exog=None, order=(3, 1, 1), seasonal_order=(3, 1, 1, 12), trend='c') result = model.fit() StatsmodelsToPmml(result, f_name) self.assertEqual(self.schema.is_valid(f_name), True)
def sarima_(self): model = SARIMAX(self.df.iloc[:, 1], order=self.pdq_, seasonal_order=self.PDQ_) # 与上一句等价 print('the parameters: SARIMA{}x{}'.format(self.pdq_, self.PDQ_), '\n') self.results = model.fit() # joblib.dump(results, f'C:\\Users\\Administrator\\Desktop\\SARIMA模型.pkl') self.predict_ = self.results.forecast(self.forecast_num) fig, ax = plt.subplots(figsize=(20, 6)) ax = self.predict_.plot(ax=ax) self.df.iloc[:, 1].plot(ax=ax) plt.legend(['y_pred', 'y_true']) plt.show()
def test_seasonal_arima5(self): ts_data = self.getData5() f_name = 'seasonal_arima5.pmml' model = SARIMAX( endog=ts_data, order=(0, 0, 1), seasonal_order=(3, 1, 1, 12), trend='c', ) result = model.fit(disp=False) ArimaToPMML(result, f_name) self.assertEqual(os.path.isfile(f_name), True)
def train_SARIMA_model(data, order, seasonal_order, directory_models, country=None): ''' Train a seasonal auto-regressive, integrating, moving-average (SARIMA) model ''' sarima = SARIMAX(data, order=order, seasonal_order=seasonal_order) sarima_model = sarima.fit() if country: sarima_model.save(directory_models + 'sarima_' + country + '.pickle') else: sarima_model.save(directory_models + 'sarima.pickle') log_train('sarima', data.shape, {}) return sarima_model
def train_SARIMA_model(data, country=None): sarima = SARIMAX(data, order=ARIMA_ORDER, seasonal_order=SARIMA_SEASONAL_ORDER) sarima_model = sarima.fit() if country: pickle_file = os.path.join( MODELS_DIRECTORY, 'sarima_' + country.replace(' ', '_') + '.pickle') else: pickle_file = os.path.join(MODELS_DIRECTORY, 'sarima.pickle') with open(pickle_file, 'wb') as file: pickle.dump(sarima_model, file) log_train(TRAIN_LOG, 'sarima', pickle_file, data.size)
def sarima(self): model = SARIMAX(self.df.iloc[:, 0], order=self.param, seasonal_order=self.param_seasonal, low_memory=True) #与上一句等价 print('the best parameters: SARIMA{}x{}'.format( self.param, self.param_seasonal)) self.results = model.fit() #joblib.dump(results, f'C:\\Users\\Administrator\\Desktop\\SARIMA模型.pkl') self.predict_ = self.results.forecast(self.forecast_num) fig, ax = plt.subplots(figsize=(30, 6)) ax = self.predict_.plot(ax=ax) self.df.iloc[:, 0].plot(ax=ax) plt.legend(['y_pred', 'y_true']) plt.show()
def params_select(self): self.p = range(0, self.p_max) self.q = range(0, self.q_max) d = range(0, self.d_max) # Generate all different combinations of p, q and q triplets pdq = list(itertools.product(self.p, d, self.q)) # Generate all different combinations of seasonal p, q and q triplets seasonal_pdq = [(x[0], x[1], x[2], self.period) for x in list(itertools.product(self.p, d, self.q))] aic_value = pd.DataFrame() for i, param in enumerate(pdq): for param_seasonal in seasonal_pdq: model = SARIMAX(self.df.iloc[:, 0], order=param, seasonal_order=param_seasonal) results = model.fit(low_memory=True) print('SARIMA{}x{} - AIC:{}'.format(param, param_seasonal, results.aic)) param_list = [[param, param_seasonal, results.aic]] aic_value_ = pd.DataFrame( param_list, columns=['param', 'param_seasonal', 'aic']) aic_value = pd.concat([aic_value, aic_value_]) index_list = [] for i in range(self.p_max * self.q_max * self.p_max * self.q_max * self.d_max * self.d_max): index_list.append(i) aic_value.index = index_list min_index = aic_value[aic_value.aic == min( aic_value['aic'])].index #找到aic值最小的行索引 a = aic_value.iloc[min_index, :] a = a.values.tolist() self.param = a[0][0] self.param_seasonal = a[0][1]
rho = 0.8 beta = 2 delta0 = 10 delta1 = 0.5 epsilon = eta.copy() for i in range(1, eta.shape[0]): epsilon[i] = rho * epsilon[i - 1] + eta[i] t = np.arange(epsilon.shape[0]) y = delta0 + delta1 * t + beta * full_x + epsilon y = y[200:] start = np.array([110, delta1, beta, rho, 1]) arx_res = ARIMA(y, exog=x, order=(1, 0, 0), trend="ct").fit() mod = SARIMAX(y, exog=x, order=(1, 0, 0), trend="ct") start[:2] *= 1 - rho sarimax_res = mod.fit(start_params=start, method="bfgs") # The two estimators fit similarly, although there is a small difference # in the log-likelihood. This is a numerical issue and should not # materially affect the predictions. Importantly the two trend parameters, # `const` and `x1` (unfortunately named for the time trend), differ between # the two. The other parameters are effectively identical. print(arx_res.summary()) print(sarimax_res.summary()) # ## Initial residuals `SARIMAX` and `ARIMA` # # Residuals for observations before the maximal model order, which depends # on the AR, MA, Seasonal AR, Seasonal MA and differencing parameters, are
# * Specification of seasonal and nonseasonal AR and MA components # * Inclusion of Exogenous variables # * Full maximum-likelihood estimation using the Kalman Filter # # This model is more feature rich than `AutoReg`. Unlike `SARIMAX`, # `AutoReg` estimates parameters using OLS. This is faster and the problem # is globally convex, and so there are no issues with local minima. The # closed-form estimator and its performance are the key advantages of # `AutoReg` over `SARIMAX` when comparing AR(P) models. `AutoReg` also # support seasonal dummies, which can be used with `SARIMAX` if the user # includes them as exogenous regressors. from statsmodels.tsa.api import SARIMAX sarimax_mod = SARIMAX(ind_prod, order=((1, 5, 12, 13), 0, 0), trend="c") sarimax_res = sarimax_mod.fit() print(sarimax_res.summary()) sarimax_params = sarimax_res.params.iloc[:-1].copy() sarimax_params.index = res_glob.params.index params = pd.concat([res_glob.params, sarimax_params], axis=1, sort=False) params.columns = ["AutoReg", "SARIMAX"] params # ## Custom Deterministic Processes # # The `deterministic` parameter allows a custom `DeterministicProcess` to # be used. This allows for more complex deterministic terms to be # constructed, for example one that includes seasonal components with two # periods, or, as the next example shows, one that uses a Fourier series # rather than seasonal dummies.