def get_arma_order(endog, exog): if not exog is None: auto_arma_order = arma_order_select_ic(endog, ic=['aic'], model_kw={'exog':exog}, fit_kw={'exog':exog,'maxiter':100}) return auto_arma_order['aic_min_order'][0], auto_arma_order['aic_min_order'][1] else: auto_arma_order = arma_order_select_ic(endog, ic=['aic'], fit_kw={'maxiter':100}) return auto_arma_order['aic_min_order'][0], auto_arma_order['aic_min_order'][1]
def test_arma_order_select_ic(): # smoke test, assumes info-criteria are right from statsmodels.tsa.arima_process import arma_generate_sample arparams = np.array([.75, -.25]) maparams = np.array([.65, .35]) arparams = np.r_[1, -arparams] maparam = np.r_[1, maparams] nobs = 250 np.random.seed(2014) y = arma_generate_sample(arparams, maparams, nobs) res = arma_order_select_ic(y, ic=['aic', 'bic'], trend='nc') # regression tests in case we change algorithm to minic in sas aic_x = np.array([[ np.nan, 552.7342255 , 484.29687843], [ 562.10924262, 485.5197969 , 480.32858497], [ 507.04581344, 482.91065829, 481.91926034], [ 484.03995962, 482.14868032, 483.86378955], [ 481.8849479 , 483.8377379 , 485.83756612]]) bic_x = np.array([[ np.nan, 559.77714733, 494.86126118], [ 569.15216446, 496.08417966, 494.41442864], [ 517.61019619, 496.99650196, 499.52656493], [ 498.12580329, 499.75598491, 504.99255506], [ 499.49225249, 504.96650341, 510.48779255]]) aic = DataFrame(aic_x, index=lrange(5), columns=lrange(3)) bic = DataFrame(bic_x, index=lrange(5), columns=lrange(3)) assert_almost_equal(res.aic.values, aic.values, 5) assert_almost_equal(res.bic.values, bic.values, 5) assert_equal(res.aic_min_order, (1, 2)) assert_equal(res.bic_min_order, (1, 2)) assert_(res.aic.index.equals(aic.index)) assert_(res.aic.columns.equals(aic.columns)) assert_(res.bic.index.equals(bic.index)) assert_(res.bic.columns.equals(bic.columns)) index = pd.date_range('2000-1-1', freq='M', periods=len(y)) y_series = pd.Series(y, index=index) res_pd = arma_order_select_ic(y_series, max_ar=2, max_ma=1, ic=['aic', 'bic'], trend='nc') assert_almost_equal(res_pd.aic.values, aic.values[:3, :2], 5) assert_almost_equal(res_pd.bic.values, bic.values[:3, :2], 5) assert_equal(res_pd.aic_min_order, (2, 1)) assert_equal(res_pd.bic_min_order, (1, 1)) res = arma_order_select_ic(y, ic='aic', trend='nc') assert_almost_equal(res.aic.values, aic.values, 5) assert_(res.aic.index.equals(aic.index)) assert_(res.aic.columns.equals(aic.columns)) assert_equal(res.aic_min_order, (1, 2))
def test_arma_order_select_ic_failure(): # this should trigger an SVD convergence failure, smoke test that it # returns, likely platform dependent failure... # looks like AR roots may be cancelling out for 4, 1? y = np.array( [ 0.86074377817203640006, 0.85316549067906921611, 0.87104653774363305363, 0.60692382068987393851, 0.69225941967301307667, 0.73336177248909339976, 0.03661329261479619179, 0.15693067239962379955, 0.12777403512447857437, -0.27531446294481976, -0.24198139631653581283, -0.23903317951236391359, -0.26000241325906497947, -0.21282920015519238288, -0.15943768324388354896, 0.25169301564268781179, 0.1762305709151877342, 0.12678133368791388857, 0.89755829086753169399, 0.82667068795350151511, ] ) import warnings with warnings.catch_warnings(): # catch a hessian inversion and convergence failure warning warnings.simplefilter("ignore") res = arma_order_select_ic(y)
def test_arma_order_select_ic_failure(): # this should trigger an SVD convergence failure, smoke test that it # returns, likely platform dependent failure... y = np.array([ 0.86074377817203640006, 0.85316549067906921611, 0.87104653774363305363, 0.60692382068987393851, 0.69225941967301307667, 0.73336177248909339976, 0.03661329261479619179, 0.15693067239962379955, 0.12777403512447857437, -0.27531446294481976 , -0.24198139631653581283, -0.23903317951236391359, -0.26000241325906497947, -0.21282920015519238288, -0.15943768324388354896, 0.25169301564268781179, 0.1762305709151877342 , 0.12678133368791388857, 0.89755829086753169399, 0.82667068795350151511]) res = arma_order_select_ic(y)
def test_arma_order_select_ic_failure(): # this should trigger an SVD convergence failure, smoke test that it # returns, likely platform dependent failure... # looks like AR roots may be cancelling out for 4, 1? y = np.array([ 0.86074377817203640006, 0.85316549067906921611, 0.87104653774363305363, 0.60692382068987393851, 0.69225941967301307667, 0.73336177248909339976, 0.03661329261479619179, 0.15693067239962379955, 0.12777403512447857437, -0.27531446294481976, -0.24198139631653581283, -0.23903317951236391359, -0.26000241325906497947, -0.21282920015519238288, -0.15943768324388354896, 0.25169301564268781179, 0.1762305709151877342, 0.12678133368791388857, 0.89755829086753169399, 0.82667068795350151511 ]) import warnings with warnings.catch_warnings(): # catch a hessian inversion and convergence failure warning warnings.simplefilter("ignore") res = arma_order_select_ic(y)
def main(): df = pd.read_csv(FILE_NAME, sep=',', skipinitialspace=True, encoding='utf-8') df = df.drop('AverageTemperatureUncertainty', axis=1) df = df[df.Country == 'Canada'] df = df.drop('Country', axis=1) df.index = pd.to_datetime(df.dt) df = df.drop('dt', axis=1) df = df.ix['1900-01-01':] df = df.sort_index() # Display AT df.AverageTemperature.fillna(method='pad', inplace=True) mp.plot(df.AverageTemperature) mp.show() # Rolling Mean df.AverageTemperature.plot.line(style='b', legend=True, grid=True, label='Avg. Temperature (AT)') ax = df.AverageTemperature.rolling(window=12).mean().plot.line(style='r', legend=True, label='Mean AT') ax.set_xlabel('Date') mp.legend(loc='best') mp.title('Weather timeseries visualization') mp.show() test_stationarity(df.AverageTemperature) res = arma_order_select_ic(df.AverageTemperature, ic=['aic', 'bic'], trend='nc', max_ar=10, max_ma=10, fit_kw={'method': 'css-mle'}) print (res) # Fit the model ts = pd.Series(df.AverageTemperature, index=df.index) model = ARMA(ts, order=(5, 6)) results = model.fit(trend='nc', method='css-mle') print(results.summary2()) # Plot the model fig, ax = mp.subplots(figsize=(10, 8)) fig = results.plot_predict('01/01/2003', '12/01/2023', ax=ax) ax.legend(loc='lower left') mp.title('Weather Time Series prediction') mp.show() predictions = results.predict('01/01/2003', '12/01/2023')
def main(): df = pd.read_csv(FILE_NAME, sep=',', skipinitialspace=True, encoding='utf-8') df = df.drop('AverageTemperatureUncertainty', axis=1) df = df[df.Country == 'Canada'] df = df.drop('Country', axis=1) df.index = pd.to_datetime(df.dt) df = df.drop('dt', axis=1) df = df.ix['1900-01-01':] df = df.sort_index() # Display AT df.AverageTemperature.fillna(method='pad', inplace=True) mp.plot(df.AverageTemperature) mp.show() # Rolling Mean df.AverageTemperature.plot.line(style='b', legend=True, grid=True, label='Avg. Temperature (AT)') ax = df.AverageTemperature.rolling(window=12).mean().plot.line(style='r', legend=True, label='Mean AT') ax.set_xlabel('Date') mp.legend(loc='best') mp.title('Weather timeseries visualization') mp.show() test_stationarity(df.AverageTemperature) res = arma_order_select_ic(df.AverageTemperature, ic=['aic', 'bic'], trend='nc', max_ar=4, max_ma=4, fit_kw={'method': 'css-mle'}) print res # Fit the model ts = pd.Series(df.AverageTemperature, index=df.index) model = ARMA(ts, order=(3, 3)) results = model.fit(trend='nc', method='css-mle') print(results.summary2()) # Plot the model fig, ax = mp.subplots(figsize=(10, 8)) fig = results.plot_predict('01/01/2003', '12/01/2023', ax=ax) ax.legend(loc='lower left') mp.title('Weather Time Series prediction') mp.show() predictions = results.predict('01/01/2003', '12/01/2023')
def arma_model_selection(series, max_ar=4, max_ma=4): assert not series.isnull().any() order_select = sto.arma_order_select_ic( series.values, ic=['aic', 'bic'], max_ar=max_ar, max_ma=max_ma ) plt.figure(figsize=(10, 4)) plt.subplot(1, 2, 1) sns.heatmap(order_select["aic"]) plt.xlabel("Ordre MA") plt.ylabel("Ordre AR") plt.title("Résultats AIC") plt.subplot(1, 2, 2) sns.heatmap(order_select["bic"]) plt.xlabel("Ordre MA") plt.ylabel("Ordre AR") plt.title("Résultats BIC") plt.suptitle(f"max_ar={max_ar}, max_ma={max_ma}") plt.show(); aic_min_order = order_select["aic_min_order"] bic_min_order = order_select["bic_min_order"] print( "AIC meilleur modèle : AR={}, MA={}, AIC={} ".format( aic_min_order[0], aic_min_order[1], order_select['aic'].loc[aic_min_order] ) ) print( "BIC meilleur modèle : AR={}, MA={}, BIC={} ".format( bic_min_order[0], bic_min_order[1], order_select['bic'].loc[bic_min_order] ) ) return order_select
def derive_models(y, sentiment, symbol): models = y data = y['close'] max_iter = min(5, len(data) - 1) best_ar = tsa.ar_model.ar_select_order(data, maxlag=max_iter, ic="aic") lags = 0 if best_ar.ar_lags is [] else 1 max_lags = "AR(" + str(lags) + ")" ar_model = tsa.ar_model.AutoReg(data, lags=lags) models[max_lags] = ar_model.predict(ar_model.fit().params) best_ma_order = tsa.stattools.arma_order_select_ic(data, max_ar=0, max_ma=max_iter, ic="aic") min_order = "ARMA(0," + str(max(best_ma_order.aic_min_order)) + ")" best_ma = tsa.arima.model.ARIMA(data, order=(0, 0, best_ma_order.aic_min_order[1])) models[min_order] = best_ma.fit().predict() best_arma_model = st.arma_order_select_ic(data, max_ar=max_iter, max_ma=max_iter, ic="aic") arma_order = "ARMA(" + str(best_arma_model.aic_min_order) + ")" best_arma = tsa.arima.model.ARIMA(data, order=(best_arma_model.aic_min_order[0], 0, best_arma_model.aic_min_order[1])) models[arma_order] = best_arma.fit().predict() best_arima = tsa.arima.model.ARIMA( endog=data, exog=sentiment, order=(best_arma_model.aic_min_order[0], 0, best_arma_model.aic_min_order[1])) arima_order = "ARIMA(" + str(best_arma_model.aic_min_order) + "," + str( best_arima.fit().params[1]) + "*sentiment)" models[arima_order] = best_arima.fit().predict() return models
def run_ARMAX(self, exogenous_data): self.armax_order = arma_order_select_ic(self.train, 5, 5) self.exogenous_train = exogenous_data[:len(exogenous_data) - self.test_size] self.exogenous_test = exogenous_data[len(exogenous_data) - self.test_size:] model = ARMA(self.train, order=self.armax_order['bic_min_order'], exog=self.exogenous_train) model_fit = model.fit() print('Lag: %s' % model_fit.k_ar) print('Coefficients: %s' % model_fit.params) predictions = model_fit.predict(start=len(self.train), end=len(self.train) + len(self.test) - 1, dynamic=False, exog=self.exogenous_test) return np.array(predictions)
def test_find_ARIMA_params_automated(): from statsmodels.tsa.arima_process import arma_generate_sample arparams = np.array([.75, -.25]) maparams = np.array([0.65, 0.35]) arparams = np.r_[1, -arparams] maparam = np.r_[1, maparams] # number of observations nobs = 250 np.random.seed(2014) y = arma_generate_sample(arparams, maparams, nobs) res = arma_order_select_ic(y, max_ar=4, max_ma=4, ic=['aic', 'bic'], trend='nc', model_kw={}, fit_kw={'method': 'css'}) return res
def test_model(run_id): """Function to test model for one run Args: run_id: run for which to test model Returns: plots showing model results """ # Retrieve data for one run to model start = datetime.datetime(2014, 5, 18) end = datetime.datetime(2018, 5, 17) test_measures = REPO.get_measurements(run_id=run_id, start_date=start, end_date=end) # Average data and create train/test split measures_daily = daily_avg(test_measures) train_measures_daily = measures_daily[:-6] test_measures_daily = measures_daily[-7:] train_measures_daily = train_measures_daily.dropna() # Check if data is stationary test_stationarity(train_measures_daily['flow']) # Determine p and q parameters for ARIMA model params = arma_order_select_ic(train_measures_daily['flow'], ic='aic') # Build and fit model mod = ARIMA(train_measures_daily['flow'], order=(params.aic_min_order[0], 0, params.aic_min_order[1]), exog=train_measures_daily[['temp', 'precip']]).fit() test_measures_daily.loc[:, 'prediction'] = \ mod.forecast(steps=7, exog=test_measures_daily[['temp', 'precip']])[0] train_measures_daily.loc[:, 'model'] = mod.predict() # Plot results plt.plot(test_measures_daily[['flow', 'prediction']]) plt.plot(train_measures_daily[['flow', 'model']]['2015-07':]) plt.legend(['Test values', 'Prediction', 'Train values', 'Model'])
def arima(): series_ch = pd.read_csv( "http://labfile.oss.aliyuncs.com/courses/1176/agriculture.csv", index_col=0) series_ch.plot(figsize=(9, 6)) fig, axes = plt.subplots(ncols=3, nrows=1, figsize=(15, 3)) diff_ch = series_ch.diff().dropna() axes[0].plot(diff_ch) autocorrelation_plot(diff_ch, ax=axes[1]) axes[2].plot(acorr_ljungbox(diff_ch)[1]) fig, axes = plt.subplots(ncols=3, nrows=1, figsize=(15, 3)) diff_ch1 = series_ch.diff(periods=2).dropna() axes[0].plot(diff_ch1) autocorrelation_plot(diff_ch1, ax=axes[1]) axes[2].plot(acorr_ljungbox(diff_ch1)[1]) fig, axes = plt.subplots(ncols=3, nrows=1, figsize=(15, 3)) diff_ch2 = series_ch.diff().diff().dropna() axes[0].plot(diff_ch2) autocorrelation_plot(diff_ch2, ax=axes[1]) axes[2].plot(acorr_ljungbox(diff_ch2)[1]) fig, axes = plt.subplots(ncols=3, nrows=1, figsize=(15, 3)) diff_ch3 = series_ch.diff().diff().diff().dropna() axes[0].plot(diff_ch3) autocorrelation_plot(diff_ch3, ax=axes[1]) axes[2].plot(acorr_ljungbox(diff_ch3)[1]) d = 1 p, q = arma_order_select_ic(diff_ch, ic='aic')['aic_min_order'] print('p,d,q', p, d, q) return p, d, q
def arma_select_order(gold): sttool.arma_order_select_ic(gold, max_ar=3, max_ma=3, ic=['aic', 'bic'], trend='nc')
%run setup.ipy import quandl import my_secrets quandl.ApiConfig.api_key = my_secrets.QUANDL_API_KEY xrp = quandl.get("BITFINEX/XRPUSD")["Last"] eth = quandl.get("BITFINEX/ETHUSD")["Last"] # Should we replace the index? Some dates are missing so we can't set a frquency to 'D' without actually # replacing the entire index? This generates a lot of warnings... # When we've played with values a bit, we can then use the below. from statsmodels.tsa import stattools xrp_stats = stattools.arma_order_select_ic(xrp) eth_stats = stattools.arma_order_select_ic(eth) print("XRP: ", xrp_stats) print("ETH:", eth_stats) from statsmodels import api as sms xrp_model = sms.tsa.ARMA(xrp, order=(4, 2)) eth_model = sms.tsa.ARMA(eth, order=(3, 2)) xrp_results = xrp_model.fit() eth_results = eth_model.fit() xrp_results.summary() eth_results.summary()
df["production"] = df["production"].diff() df["production"].dropna(inplace=True) # 删去 NaN的值 # 再进行检验k, 还是不行,再做一次差分处理 df["production"] = df["production"].diff() df.dropna(inplace=True) # result = test_stationary(df) # print(result) # plt.show() # 根据检验可以认为是平稳的了 # ------------------------第三步模拟定阶和拟合 # 常用的有根据 ACF和 PACF结果的观察来定阶和暴力定阶 # 由于阶数一般不会很大,在数据量也不大的情况下,暴力定阶是个不错的选择。 # 暴力定阶通过遍历可能的阶数,找到 aic, bic, hqic最小的值,作为最优阶数 order = stattools.arma_order_select_ic(df["production"].values, max_ar=3, max_ma=3, ic=["aic", "bic", "hqic"]) bic_min = order.bic_min_order print(order.bic_min_order) # 拟合 model = ARMA(df, bic_min).fit() # ----------------------第四步白噪声检验 # QQ图检验,DW检验 resid = model.resid # fig = plt.figure(figsize=(6, 6)) # ax = fig.add_subplot(111) # fig = qqplot(resid, line="q", ax=ax, fit=True) # plt.show() # DW检验,如果值接近 2,认为系列不存在一阶相关性
#3.下面可以开始建模过程, #(1)观察自相关图和偏自相关图,定阶 #求自相关,偏自相关系数 lag_acf = acf(data, nlags=20, fft=False) lag_pacf = pacf(data, nlags=20, method='ols') #用自相关、偏自相关 fig, axes = plt.subplots(1, 2, figsize=(20, 5)) plot_acf(data, lags=20, ax=axes[0]) plot_pacf(data, lags=20, ax=axes[1], method='ols') plt.show(block=True) #这里python已经写了一个可以帮助选阶的函数 order_trend = arma_order_select_ic(data) print(order_trend['bic_min_order']) #结果为(2, 0) 也就是使用AR(2)模型 result_trend = ARMA(data, (2, 0)).fit() print(result_trend.params) exit() #result_trend.arparams 关于AR的参数 #result_trend.bic BIC信息准则值 #result_trend.bse 参数的标准误 #result_trend.hqic HQ信息准则 #result_trend.k_ar AR系数的数量 #result_trend.k_ma MA系数数量 #result_trend.k_trend 有常数时是1,没有常数时是0 #result_trend.llf 对数似然函数值 #result_trend.maparams MA参数值
stdresid = model6.resid / math.sqrt(model6.sigma) plt.plot(stdresid) plot_acf(stdresid, lags=20) LjungBox = stattools.q_stat(stattools.acf(stdresid)[1:13], len(stdresid)) print(LjungBox[1][-1]) LjungBox = stattools.q_stat(stattools.acf(stdresid)[1:20], len(stdresid)) print(LjungBox[1][-1]) plot_acf(stdresid, lags=40) print(model6.forecast(3)[0]) print(CPI.head(3)) Datang = pd.read_csv('Datang.csv', index_col='time') Datang.index = pd.to_datetime(Datang.index) returns = Datang['2014-01-01':'2016-01-01'] print(returns.head(n=3)) print(returns.tail(n=3)) print(ADF(returns).summary()) print(stattools.q_stat(stattools.acf(returns)[1:12], len(returns))[1]) print(stattools.arma_order_select_ic(returns, max_ma=4)) model = arima_model.ARIMA(returns, order=(1, 0, 1)).fit() print(model.summary()) print(model.conf_int()) stdresid = model.resid / math.sqrt(model.sigma2) plt.plot(stdresid) plot_acf(stdresid, lags=12) LjungBox = stattools.q_stat(stattools.acf(stdresid)[1:12], len(stdresid)) print(LjungBox[1])
def find_order(data): result = arma_order_select_ic(data["close"]).bic_min_order return (result[0], 1, result[1])
plt.plot(lag_pacf, marker="o") plt.axhline(y=0, linestyle='--', color='gray') plt.axhline(y=-1.96 / np.sqrt(len(df["df_log_shift"].dropna())), linestyle='--', color='gray') plt.axhline(y=1.96 / np.sqrt(len(df["df_log_shift"].dropna())), linestyle='--', color='gray') plt.title('Partial Autocorrelation Function') plt.xlabel('number of lags') plt.ylabel('correlation') plt.tight_layout() from statsmodels.tsa.stattools import arma_order_select_ic arma_order_select_ic(df["df_log_shift"].dropna()) ### AR from statsmodels.tsa.arima_model import ARIMA model = ARIMA(df["df_log_shift"].dropna(), order=(2, 2, 0)) results_AR = model.fit(disp=-1) plt.plot(df["df_log_shift"].dropna()) plt.plot(results_AR.fittedvalues, color="Red") plt.show() print(df["df_log_shift"].dropna()) print(results_AR.fittedvalues) #MA
warnings.filterwarnings("ignore", category=Warning) data = datapreprocess() output_lst = [] # generate submit version for ampm in ampms.keys(): for route in routes: for weekday in weekdays: # log-smooth sub = np.log(tolist(data[route][weekday][ampm])) # sub = np.log(tolist(data['C-3'][6]['am'])) # fit best model order = st.arma_order_select_ic(sub, max_ar=5, max_ma=5, ic=['aic', 'bic', 'hqic']) model = ARMA(sub, order=order.bic_min_order) result_arma = model.fit(disp=-1, method='css') predict = result_arma.predict() start = len(sub) - len(predict) end = start + len(predict) + 6 # fig = result_arma.plot_predict(start, end) # fig.suptitle("%s %s %s" %(route,weekday,ampm)) forecast = result_arma.predict(start, end)[-6:] # print (np.exp(sub)) for x in np.exp(forecast): output_lst.append(x)
plt.show() #6. import statsmodels.tsa.arima_process as sm from statsmodels.graphics.tsaplots import * import numpy as np import pandas as pd numbers=np.random.normal(size=100) numbers=pd.Series(numbers) numbers.plot() plt.show() plot_acf(numbers,lags=20) from statsmodels.tsa import stattools stattools.arma_order_select_ic(numbers.values,max_ma=4) #7. zgsy=pd.read_csv('Data/Part4/003/zgsy.csv') clprice=zgsy.iloc[:,4] clprice.plot() plot_acf(clprice,lags=20) from arch.unitroot import ADF adf=ADF(clprice,lags=6) print(adf.summary().as_text()) logReturn=pd.Series((np.log(clprice))).diff().dropna() logReturn.plot() adf=ADF(logReturn,lags=6) print(adf.summary().as_text())
show_acf_pacf(sleep_cost_all) import matplotlib.pyplot as plt ts_log = np.log(sleep_cost_all) def show_figure(ts): plt.plot(range(len(ts)), ts) plt.show() import statsmodels.tsa.stattools as st order = st.arma_order_select_ic(sleep_cost_all, max_ar=5, max_ma=5, ic=['aic', 'bic', 'hqic']) order.bic_min_order #结果是(0,0) from statsmodels.tsa.arima_model import ARMA model = ARMA(sleep_cost_all, order=(2, 1)) #滑动平稳 result_arma = model.fit(disp=-1, method='css') #拟合 train_predict = result_arma.predict(7) #预测 #均方根误差 np_arr = np.array(sleep_cost_all) RMSE = np.sqrt(((train_predict - np_arr[7:])**2).sum() / np_arr[7:].size) print(RMSE)
df_read = sm.datasets.nile.load_pandas().data #print (df_read['volume']) # Data Split (70: 30) df_test = df_nile['volume'].iloc[4000:6001] df_train = df_nile['volume'].iloc[:4000] # from statsmodels.tsa import stattools as st # ARMAモデルの次数を決める print(st.arma_order_select_ic(df_train, ic='bic', trend = 'nc')) arma_11 = sm.tsa.ARMA(df_train, (3, 0)).fit() #arma_11 = sm.tsa.SARIMAX(df_train, order=(3,1,2), seasonal_order=(0,0,0,213), enforce_stationarity = False, enforce_invertibility = False).fit() arma_11_inpred = arma_11.predict(start=2, end=4000,typ='levels') # out-of-sample predict arma_11_outpred = arma_11.predict(start=3999, end=6000,typ='levels') # plot data and predicted values def plot_ARMA_results(origdata, pred11in, pred11out):
stationarity_test(df.AverageTemperature) # The model is usually referred to as the ARMA(p,q) model where # p is the order of the autoregressive part and # q is the order of the moving average part. # Determining this p and q value can be a challenge. # So, pandas has a function for finding this. # To get the p and q value - print( arma_order_select_ic(df.AverageTemperature, ic=['aic', 'bic'], trend='nc', max_ar=4, max_ma=4, fit_kw={'method': 'css-mle'})) #Lets fit the model and make prediction using ARMA. # Fit the model ts = pd.Series(df.AverageTemperature, index=df.index) model = ARMA(ts, order=(3, 3)) results = model.fit(trend='nc', method='css-mle', disp=-1) print(results.summary2()) # Now, plot the prediction - # Plot the model fig, ax = plt.subplots(figsize=(10, 8)) fig = results.plot_predict('01/01/2010', '12/01/2023', ax=ax) ax.legend(loc='lower left')
fig = go.Figure() fig.add_trace(go.Scatter(name="Raw Data", x=df.index, y=df.temp)) fig.add_trace( go.Scatter(name="AR model Prediction", x=ar_predict.index, y=ar_predict)) fig.update_xaxes(rangeslider_visible=True) fig.update_layout(title_text="AR MODEL", xaxis_title="Date", yaxis_title="Temperature, C") plotly.offline.plot(fig, filename=r'../Images/4_AR.png') # 2. ARMA Model # with statsmodel, aic check of params from statsmodels.tsa import stattools as st from statsmodels.tsa.arima_model import ARMA, ARIMA, ARMAResults st.arma_order_select_ic(train_df, ic='aic') arma = ARMA(train_df, order=[3, 2]).fit(maxlag=4, ic='aic', dates=date) arma_predict = arma.predict('2019-10-22', '2020-10-21') # Visualization fig = go.Figure() fig.add_trace(go.Scatter(name="Raw Data", x=df.index, y=df.temp)) fig.add_trace( go.Scatter(name="ARMA model Prediction", x=arma_predict.index, y=arma_predict)) fig.update_xaxes(rangeslider_visible=True) fig.update_layout(title_text="ARMA MODEL", xaxis_title="Date", yaxis_title="Temperature, C") plotly.offline.plot(fig, filename=r'../Images/5_ARMA.png')
for ind, item in enumerate(items): try: print("================================================") print("item[{0:d}]========================".format(item)) temp = [float(x) for x in result[item]] training = temp[:-12] to_be_add = temp[-12:] try: testing = [float(x) for x in sales[ind]] except ValueError: continue gw_fcsting = gw_fcst[ind][:] predictions = [] print(testing) res = arma_order_select_ic(training, ic=['aic', 'bic'], trend='nc') for t in range(len(testing)): # res.aic_min_order # res.bic_min_order model = ARIMA(training, order=(res.aic_min_order[0], 1, res.aic_min_order[1])) model_fit = model.fit(disp=0) output = model_fit.forecast(steps=13) # print(len(output)) yhat = output[0][0] obs = testing[t] predictions.append(yhat) training.append(to_be_add[t]) yhmape = mape(testing, predictions) gwmape = mape(testing, gw_fcsting)
plt.suptitle(u'分站点预测/实际值对比') MSE = [] MAE = [] MAPE = [] layout_num = 0 #画图排版用的 for i in range(0, SITE_SIZE): if (layout_num == 6): layout_num = 0 plt.figure(figsize=(16, 9)) plt.suptitle(u'分站点预测/实际值对比') subplot = plt.subplot(3, 2, layout_num + 1) site = site_names[i] order = stattools.arma_order_select_ic(site, max_ar=3, max_ma=3, ic=['aic', 'bic', 'hqic']) print("(p,q):") pq = order.bic_min_order print(order.bic_min_order) # (p,q) # 拟合(生成训练模型),开始预测 model = ARMA(site, pq).fit() predict_data = model.predict(start=0, end=DATA_SIZE - 1) #在这里进行反归一化# predict_data = scaler.inverse_transform(predict_data.reshape(-1, 1)) site = scaler.inverse_transform(site.reshape(-1, 1)) site = np.exp(site) #––––––––––––– predict_data = np.exp(predict_data) #––––––––––––––– plt.plot(predict_data)
# TODO: Ljung-Box test # Check if acorr_ljungbox(ts, lags=1) source code # If AR model is needed and df_data is changed TS_new = TS_Analysis(df_data=df_data_new) # Autoregressive Model AR(p) AR_1_model = TS.AR_p(x='i1701', p=1) df_sp = TS.add_sp_lag(x='i1701', sp_lag=4) print(AR_1_model.summary()) TS.acf(AR_1_model) TS.acf_table(AR_1_model, maxlag=12) # ARMA model best_order = st.arma_order_select_ic(df_data, max_ar=5, max_ma=5, ic=['aic', 'bic', 'hqic']) arma_model = ARMA(df_data, order=best_order.bic_min_order).fit(disp=-1, method='css') print(arma_model.summary()) #################################################################### # PART III Model Selection and Prediction # #################################################################### # # this is the nsteps ahead predictor function # from statsmodels.tsa.arima_model import _arma_predict_out_of_sample # # res = sm.tsa.ARMA(y, (3, 2)).fit(trend="nc") # res = arma_model # # get what you need for predicting one-step ahead # params = res.params # residuals = res.resid # p = res.k_ar
def choose_order(ts, maxar, maxma): order = st.arma_order_select_ic(ts, maxar, maxma, ic=['aic', 'bic', 'hqic']) return order.bic_min_order
def arima_model(self, run_id): """Creates flow rate predictions using ARIMA model. Calls Arima.daily_avg to retrieve data for given run, then creates flow rate predictions by using statsmodels functions arma_order_select_ic and ARIMA. Three weeks of past flow rate data are also returned for plotting purposes. Args: run_id (int): id of run for which model will be created Returns: DataFrame: containing time-series flow rate predictions for next 7 days and historical flow rate for past 21 days """ # Retrieve data for modelling measures = self.daily_avg(run_id) # don't try to compute if there aren't any measures if measures is None: return pd.DataFrame() # Take past 7-day average of exogenous predictors to use for # future prediction exog_future_predictors = pd.concat( [measures.iloc[-7:, :].mean(axis=0).to_frame().T] * 7, ignore_index=True) try: # Find optimal order for model params = arma_order_select_ic(measures['flow'], ic='aic') try: # Build and fit model mod = ARIMA(measures['flow'], order=(params.aic_min_order[0], 0, params.aic_min_order[1]), exog=measures[['temp', 'precip']]).fit() prediction = pd.DataFrame([ mod.forecast( steps=7, exog=exog_future_predictors[['temp', 'precip']], alpha=0.05)[0] ]).T except Exception: # If model doesn't converge, return "prediction" # of most recent day prediction = pd.concat([measures.iloc[-1, :].to_frame().T] * 7, ignore_index=True)['flow'] except ValueError: # If order fitting doesn't converge, return "prediction" # of most recent day prediction = pd.concat([measures.iloc[-1, :].to_frame().T] * 7, ignore_index=True)['flow'] # Add dates and return past 21 days for plotting prediction_dates = [ measures.index[-2] + datetime.timedelta(days=x) for x in range(0, 7) ] prediction.index = prediction_dates past = measures['flow'][-22:-1] prediction = pd.concat([past[:-1], prediction], axis=0) return prediction
def _arma_order_selector(ts, ic='bic'): res = arma_order_select_ic(ts, ic=ic, fit_kw={'method': 'css'}) return getattr(res, '{}_min_order'.format(ic))
autocorrelation_plot(ts); plt.xlim(xmax=100); plt.figure(figsize=(12,6)) autocorrelation_plot(ts); plt.xlim(xmax=10); # The highest auto-correclation peaks are every ~365 day and oscillates with a frequence of about 7 days. This corresponds with our intuitive ideas about shopping - that it is a weekly chore. Even though ecuadorians are most likely paid monthly, there is no significant periodicity visible on this scale. # # From the last diagram it seems that the autocorrelation is significant (above the dashed line) for a period of maximum 2 days. All the references then suggest to use 2 for the p parameter. But the stats.model library contains a nifty tool for coefficient selection : arma_order_select_ic(). It performs a grid search with the p and q parameters. Let's see what it says for max parameters 10 and the [Baysian Information Criterion](https://en.wikipedia.org/wiki/Bayesian_information_criterion) for estimator: # In[ ]: result = arma_order_select_ic(ts,max_ar=10, max_ma=10, ic=['aic','bic'], trend='c', fit_kw=dict(method='css',maxiter=500)) print('The bic prescribes these (p,q) parameters : {}'.format(result.bic_min_order)) print('The aic prescribes these (p,q) parameters : {}'.format(result.aic_min_order)) plt.figure(figsize=(12,6)) plt.subplot(1,2,1) plt.title('bic results') seaborn.heatmap(result.bic); plt.subplot(1,2,2) plt.title('aic results') seaborn.heatmap(result.aic); # With the initial parameters, we are getting many many convergence warnings, which may show that the model does not fit well the time series. It may be important to factor out seasonality first, which we shall do in a later part of the analysis. To reduce the amount of warnings, we raised the number of iterations to a maximum of 500, in case of a very slow converging rate. We also plotted heatmaps of the results for both the aic and bic, for comparison. # # It is clear from the heatmap that we can afford to run the model with parameters (5,0,5)
red_35.plot(figsize=(12, 8)) plt.show() # In[132]: #原假设是白噪音 from statsmodels.stats.diagnostic import acorr_ljungbox acorr_ljungbox(red_35, lags=1) acorr_ljungbox(sub_pur, lags=1) # In[138]: #自动选择,p,q的阶数 from statsmodels.tsa.stattools import arma_order_select_ic arma_order_select_ic(diff, max_ar=7, max_ma=7, ic='aic')['aic_min_order'] # In[139]: arma_order_select_ic(diff, max_ar=7, max_ma=7, ic='bic')['bic_min_order'] # In[140]: arma_order_select_ic(diff, max_ar=7, max_ma=7, ic='hqic')['hqic_min_order'] # In[142]: #拒绝原假设,残差不是白噪声 model_23 = ARIMA(diff, order=(2, 0, 3)).fit() red_23 = model_23.resid acorr_ljungbox(red_23, lags=1)
#!/usr/bin/env python3 import pandas as pd import numpy as np import statsmodels.tsa.stattools as st from statsmodels.tsa.arima_model import ARMA from statsmodels.sandbox.stats.diagnostic import acorr_ljungbox import matplotlib.pyplot as plt if __name__ == "__main__": allData = pd.read_csv('MonthlyWeather.txt', header=None, sep=',') data = allData.iloc[:, 0] original_new = data[234:] data = data[0:234] order = st.arma_order_select_ic(data, ic=['aic', 'bic']) model = ARMA(data, order=(4, 3)) result_arma = model.fit(disp=-1, method='css') print(result_arma.summary()) predict_ts = result_arma.predict() err = (data - predict_ts).dropna() p_value = acorr_ljungbox(err, [6, 12, 18, 24]) print(p_value) predict_new = result_arma.predict( 234, 271, ) ax = predict_new.plot(label='forecast') original_new.plot(label='observed') ax.set_xlabel('Month') ax.set_ylabel('Precipitation') plt.legend() plt.show()