def main(): file = 'E:\\IntegrationTesting\\Predictive-Analytics\\ARMA\datasets\\dataset3.xlsx' head = 0 sheet = 1 df = readFile(file, head, sheet) print "Want to plot for blocked connections? : " plot(df) df = acf_pacf(df) #print df p = raw_input("Enter order of AR model : ") q = raw_input("Enter order of MA model : ") #print df, len(df) arma_pq = sm.tsa.ARMA(df, (int(p),int(q))).fit() print arma_pq.params, " :Parameters" print "AIC : {0}, BIC : {1}, HQIC : {2} ".format(arma_pq.aic, arma_pq.bic, arma_pq.hqic) dw = durbin_watson(arma_pq) resid = arma_pq.resid normalTest = normaltest(resid) print "Printing Q-Q plot" qqplot(resid) print "Plotting acf & pacf for residual values" acf_pacf(resid, 2) r, q, p = get_acf(resid) start, end = getStartEnd(df) predictions = predict(arma_pq, start, end) mean = meanForecastErr(df.blocked, predictions) print "Mean error in forecasting : {0}. \nNote that this error is on 1st difference and not actual blocked connections".format(mean) res = solver(predictions) print res
def main(): import scipy.stats as ss print('正态检验',ss.normaltest(ss.norm.rvs(size=10)))#正态检验 print('卡四方表格',ss.chi2_contingency([[15, 95], [85, 5]], False))#卡方四格表 print('独立分布检验',ss.ttest_ind(ss.norm.rvs(size=10), ss.norm.rvs(size=20)))#t独立分布检验 print('F分布检验',ss.f_oneway([49, 50, 39,40,43], [28, 32, 30,26,34], [38,40,45,42,48]))#F分布检验 from statsmodels.graphics.api import qqplot from matplotlib import pyplot as plt qqplot(ss.norm.rvs(size=100))#QQ图 plt.show() s = pd.Series([0.1, 0.2, 1.1, 2.4, 1.3, 0.3, 0.5]) df = pd.DataFrame([[0.1, 0.2, 1.1, 2.4, 1.3, 0.3, 0.5], [0.5, 0.4, 1.2, 2.5, 1.1, 0.7, 0.1]]) #s1,s2
def main(): import scipy.stats as ss print(ss.normaltest(ss.norm.rvs(size=10))) #正态检验 print(ss.chi2_contingency([[15, 95], [85, 5]], False)) #卡方四格表 print(ss.ttest_ind(ss.norm.rvs(size=10), ss.norm.rvs(size=20))) #t独立分布检验 print( ss.f_oneway([49, 50, 39, 40, 43], [28, 32, 30, 26, 34], [38, 40, 45, 42, 48])) #F分布检验 from statsmodels.graphics.api import qqplot from matplotlib import pyplot as plt qqplot(ss.norm.rvs(size=100)) #QQ图 plt.show() s = pd.Series([0.1, 0.2, 1.1, 2.4, 1.3, 0.3, 0.5]) df = pd.DataFrame([[0.1, 0.2, 1.1, 2.4, 1.3, 0.3, 0.5], [0.5, 0.4, 1.2, 2.5, 1.1, 0.7, 0.1]]) #相关分析 print(s.corr(pd.Series([0.5, 0.4, 1.2, 2.5, 1.1, 0.7, 0.1]))) print(df.corr()) import numpy as np #回归分析 x = np.arange(10).astype(np.float).reshape((10, 1)) y = x * 3 + 4 + np.random.random((10, 1)) print(x) print(y) from sklearn.linear_model import LinearRegression linear_reg = LinearRegression() reg = linear_reg.fit(x, y) y_pred = reg.predict(x) print(reg.coef_) print(reg.intercept_) print(y.reshape(1, 10)) print(y_pred.reshape(1, 10)) plt.figure() plt.plot(x.reshape(1, 10)[0], y.reshape(1, 10)[0], "r*") plt.plot(x.reshape(1, 10)[0], y_pred.reshape(1, 10)[0]) plt.show() #PCA降维 df = pd.DataFrame( np.array([ np.array([2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2, 1, 1.5, 1.1]), np.array([2.4, 0.7, 2.9, 2.2, 3, 2.7, 1.6, 1.1, 1.6, 0.9]) ]).T) from sklearn.decomposition import PCA lower_dim = PCA(n_components=1) lower_dim.fit(df.values) print("PCA") print(lower_dim.explained_variance_ratio_) print(lower_dim.explained_variance_)
def drawPIT(self, data, cdf, xlabel, ylabel, title, isSave, savePath, isShow): lw = 4 fontsize = 40 fig = plt.figure(figsize=(12, 12)) axs = fig.add_subplot(111) fig = qqplot(data, dist=cdf, line='45', ax=axs) deta = 1.358 / (len(data))**0.5 * (2**0.5) axs.plot([deta, 1], [0, 1 - deta], '--', color='blueviolet', lw=lw, label='Kolmogorov 5% significance band') axs.plot([0, 1 - deta], [deta, 1], '--', color='blueviolet', lw=lw) axs.set_title(title, loc="center", fontsize=fontsize) axs.set_xlabel(xlabel, fontsize=fontsize) axs.set_ylabel(ylabel, fontsize=fontsize) axs.set_xlim([0, 1]) axs.set_ylim([0, 1]) plt.xticks(fontsize=fontsize) plt.yticks(fontsize=fontsize) plt.grid() plt.legend(fontsize=25) if isShow: plt.show() if isSave: fig.savefig(savePath, bbox_inches="tight", dpi=300) plt.close()
def residue_test(residue): ''' 观察ARIMA模型的残差是否是平均值为0且方差为常数的正态分布 ''' fig = plt.figure(figsize=(12, 8)) # ax1 = fig.add_subplot(211) # fig = plot_acf(residue.values.squeeze(), lags=35, ax=ax1) # plt.show() ax2 = fig.add_subplot(212) fig = plot_pacf(residue.values.squeeze(), lags=35, ax=ax2) plt.show() # 通过q-q图观察,检验残差是否符合正态分布 fig = plt.figure(figsize=(8, 6)) ax = fig.add_subplot(111) fig = qqplot(residue, line='q', ax=ax, fit=True) plt.show() # Ljung-Box Test - 基于一些列滞后阶数,判断序列总体的相关性或随机性是否存在 r1, q1, p1 = ACF(residue.values.squeeze(), qstat=True) tmp = np.c_[list(range(1, 36)), r1[1:], q1, p1] table = pd.DataFrame(tmp, columns=['lag', 'AC', 'Q', 'Prob(>Q)']) print(table.set_index('lag')[:15]) # 残差的白噪声检验 print('残差的白噪声检验结果为:', acorr_ljungbox(residue, lags=1))
def valid_model(data): """ 模型检验 :param data: :return: """ arma_mod80 = sm.tsa.ARMA(data, (8, 0)).fit() resid = arma_mod80.resid fig = plt.figure(figsize=(12, 8)) ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(data, lags=40, ax=ax1) ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(data, lags=40, ax=ax2) plt.show() print(sm.stats.durbin_watson(arma_mod80.resid.values)) fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(111) fig = qqplot(resid, line='q', ax=ax, fit=True) plt.show() r, q, p = sm.tsa.acf(resid.values.squeeze(), qstat=True) data = np.c_[range(1, 41), r[1:], q, p] table = pd.DataFrame(data, columns=['lag', 'AC', 'Q', 'Prob(>Q)']) print(table.set_index('lag'))
def check_norm_qq(self, ): norm = stats.normaltest(arma_mod.resid) print norm figure003 = plt.figure(figsize=(12, 6)) ax5 = figure003.add_subplot(111) figqq = qqplot(arma_mod.resid, ax=ax5, fit=True, line='q') plt.show()
def analise_model(arma_mod): plot_model(arma_mod) resid = arma_mod.resid print(scipy.stats.normaltest(resid)) fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(111) fig = qqplot(resid, line='q', ax=ax, fit=True)
def ARMA_model(train, order): arma_model = ARMA(train, order) # ARMA模型 result = arma_model.fit() # 激活模型 print(result.summary()) # 给出一份模型报告 ############ in-sample ############ pred = result.predict() pred.plot() train.plot() print('标准差为{}'.format(mean_squared_error(train, pred))) # 残差 resid = result.resid # 利用QQ图检验残差是否满足正态分布 plt.figure(figsize=(12, 8)) qqplot(resid, line='q', fit=True) plt.show() # 利用D-W检验,检验残差的自相关性 print('D-W检验值为{}'.format(durbin_watson(resid.values))) return result
def seasonal_detect(ts, trend, seasonal, residual): """直接对残差进行分析,我们检查残差的稳定性""" ts_decompose = residual ts_decompose.dropna(inplace=True) test_stationarity(ts_decompose) print('---------------------------------------------') fig = plt.figure fig = qqplot(residual, line='q', fit=True) fig.title('qqplot of residual') plt.show() fig = plt.figure(figsize=(12, 8)) #ts ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(ts, lags=40, ax=ax1) ax1.xaxis.set_ticks_position('bottom') fig.tight_layout() ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(ts, lags=40, ax=ax2) ax2.xaxis.set_ticks_position('bottom') plt.savefig('ts_aacf_pacf.jpg', dpi=300) plt.show() fig.tight_layout() print('-----------------------------------------------') #trend fig = plt.figure(figsize=(12, 8)) ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(trend, lags=40, ax=ax1) ax1.xaxis.set_ticks_position('bottom') fig.tight_layout() ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(trend, lags=40, ax=ax2) ax2.xaxis.set_ticks_position('bottom') plt.savefig('trend_acf_pacf.jpg', dpi=300) plt.show() fig.tight_layout() print('-----------------------------------------------') #seasonal fig = plt.figure(figsize=(12, 8)) ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(seasonal, lags=40, ax=ax1) ax1.xaxis.set_ticks_position('bottom') fig.tight_layout() ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(seasonal, lags=40, ax=ax2) ax2.xaxis.set_ticks_position('bottom') plt.savefig('season_acf_pacf.jpg', dpi=300) plt.show() fig.tight_layout()
def arima_handler(dta, start, end): #dta, x = data.dataHandler('./tmpfile00431',0.5) dta = pd.TimeSeries(dta) #dta.index = pd.Index(sm.tsa.datetools.dates_from_range('1700','2060')) dta.index = pd.Index(sm.tsa.datetools.dates_from_range(start, end)) dta.plot(figsize=(12, 8)) fig = plt.figure(figsize=(12, 8)) ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(dta.values.squeeze(), lags=40, ax=ax1) ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(dta, lags=40, ax=ax2) arma_mod20 = sm.tsa.ARMA(dta, (2, 0)).fit() #print(arma_mod20) arma_mod30 = sm.tsa.ARMA(dta, (3, 0)).fit() #print(arma_mod30) print(arma_mod20.aic, arma_mod20.bic, arma_mod20.hqic) print(arma_mod30.aic, arma_mod30.bic, arma_mod30.hqic) if arma_mod20.aic < arma_mod30.aic: sm.stats.durbin_watson(arma_mod20.resid.values) fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(111) ax = arma_mod20.resid.plot(ax=ax) resid = arma_mod20.resid stats.normaltest(resid) fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(111) fig = qqplot(resid, line='q', ax=ax, fit=True) fig = plt.figure(figsize=(12, 8)) ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1) ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2) r, q, p = sm.tsa.acf(resid.values.squeeze(), qstat=True) data = np.c_[range(1, 41), r[1:], q, p] #table = pandas.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"]) table = pd.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"]) #print(table.set_index('lag')) predict_sunspots = arma_mod20.predict(str(string.atoi(start) + 360), str(string.atoi(end) + 5), dynamic=True) #print(predict_sunspots) return predict_sunspots
def arima_handler(dta, start, end): #dta, x = data.dataHandler('./tmpfile00431',0.5) dta = pd.TimeSeries(dta) #dta.index = pd.Index(sm.tsa.datetools.dates_from_range('1700','2060')) dta.index = pd.Index(sm.tsa.datetools.dates_from_range(start,end)) dta.plot(figsize=(12,8)) fig = plt.figure(figsize=(12,8)) ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(dta.values.squeeze(), lags=40, ax=ax1) ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(dta, lags=40, ax=ax2) arma_mod20 = sm.tsa.ARMA(dta, (2,0)).fit() #print(arma_mod20) arma_mod30 = sm.tsa.ARMA(dta, (3,0)).fit() #print(arma_mod30) print(arma_mod20.aic, arma_mod20.bic, arma_mod20.hqic) print(arma_mod30.aic, arma_mod30.bic, arma_mod30.hqic) if arma_mod20.aic < arma_mod30.aic: sm.stats.durbin_watson(arma_mod20.resid.values) fig = plt.figure(figsize=(12,8)) ax = fig.add_subplot(111) ax = arma_mod20.resid.plot(ax=ax); resid = arma_mod20.resid stats.normaltest(resid) fig = plt.figure(figsize=(12,8)) ax = fig.add_subplot(111) fig = qqplot(resid, line='q', ax=ax, fit=True) fig = plt.figure(figsize=(12,8)) ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1) ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2) r,q,p = sm.tsa.acf(resid.values.squeeze(), qstat=True) data = np.c_[range(1,41), r[1:], q, p] #table = pandas.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"]) table = pd.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"]) #print(table.set_index('lag')) predict_sunspots = arma_mod20.predict(str(string.atoi(start)+360),str(string.atoi(end)+5), dynamic=True) #print(predict_sunspots) return predict_sunspots
def processData(self,p,q): self.arma_mod = sm.tsa.ARMA(self.data, (p,q)).fit() print("AIC:",str(self.arma_mod.aic)) print("BIC:",str(self.arma_mod.bic)) print("HQIC:",str(self.arma_mod.hqic)) resid = self.arma_mod.resid print("DW value:",sm.stats.durbin_watson(resid.values)) fig = figure(figsize=(12, 8)) ax1 = fig.add_subplot(311) fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1) ax2 = fig.add_subplot(312) fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2) ax = fig.add_subplot(313) fig = qqplot(resid, line='q', ax=ax, fit=True) show()
def testModelFit(arma_mod30, dta): # does our model fit the theory? residuals = arma_mod30.resid sm.stats.durbin_watson(residuals.values) # NOTE: Durbin Watson Test Statistic approximately equal to 2*(1-r) # where r is the sample autocorrelation of the residuals. # Thus, for r == 0, indicating no serial correlation, # the test statistic equals 2. This statistic will always be # between 0 and 4. The closer to 0 the statistic, the more evidence # for positive serial correlation. The closer to 4, the more evidence # for negative serial correlation. # plot the residuals so we can see if there are any areas in time which # are poorly explained. fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(111) ax = arma_mod30.resid.plot(ax=ax) plt.savefig(config.plot_dir + 'ARIMAX_test_residualsVsTime.png', bbox_inches='tight') # plt.show() # tests if samples are different from normal dist. k2, p = stats.normaltest(residuals) print("residuals skew (k2):" + str(k2) + " fit w/ normal dist (p-value): " + str(p)) # plot residuals fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(211) fig = qqplot(residuals, line='q', ax=ax, fit=True) ax2 = fig.add_subplot(212) # resid_dev = residuals.resid_deviance.copy() # resid_std = (resid_dev - resid_dev.mean()) / resid_dev.std() plt.hist(residuals, bins=25) plt.title('Histogram of standardized deviance residuals') plt.savefig(config.plot_dir + 'ARIMAX_test_residualsNormality.png', bbox_inches='tight') plt.clf() # plot ACF/PACF for residuals plotACFAndPACF(residuals, 'residualsACFAndPACF.png') r, q, p = sm.tsa.acf(residuals.values.squeeze(), qstat=True) data = np.c_[range(1, 41), r[1:], q, p] table = pandas.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"]) print(table.set_index('lag'))
def ARIMA_modeling(data, order, test): tempModel = ARIMA(data.values, order).fit() # fit the value into model fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(111) print("QQ plot of residuals (should be like a straight line)") fig = qqplot(tempModel.resid, line='q', ax=ax, fit=True) print("######################") noiseRes = acorr_ljungbox(tempModel.resid, lags=1) print("result of residual's white noice testing (should be very large)") print('stat | p-value') for x in noiseRes: print( x, '|', ) print("######################") predicts = tempModel.forecast(steps=len(test))[0] pred_CI = tempModel.forecast(steps=len(test))[2] low, high = [], [] for i in range(len(pred_CI)): low.append(pred_CI[i][0]) high.append(pred_CI[i][1]) comp = pd.DataFrame() comp['original'] = test comp['predict'] = predicts comp['low'] = low comp['high'] = high comp.plot() print("######################") rms = sqrt(mean_squared_error(test, predicts)) print('mean squared error: ', rms) print("######################") q = (abs(comp['original'] - comp['predict']) / comp['original']) * 100 print(q) print('average MAPE: ', np.mean(abs(q)), '%') print('If MAPE is "Inf", it because the test data contains 0') print("######################") q1 = (abs(comp['original'] - comp['predict']) / ((comp['original'] + comp['predict']) / 2)) * 100 print(q1) print('average Symmetric MAPE: ', np.mean(abs(q1)), '%')
def testModelFit(arma_mod30, dta): # does our model fit the theory? residuals = arma_mod30.resid sm.stats.durbin_watson(residuals.values) # NOTE: Durbin Watson Test Statistic approximately equal to 2*(1-r) # where r is the sample autocorrelation of the residuals. # Thus, for r == 0, indicating no serial correlation, # the test statistic equals 2. This statistic will always be # between 0 and 4. The closer to 0 the statistic, the more evidence # for positive serial correlation. The closer to 4, the more evidence # for negative serial correlation. # plot the residuals so we can see if there are any areas in time which # are poorly explained. fig = plt.figure(figsize=(12,8)) ax = fig.add_subplot(111) ax = arma_mod30.resid.plot(ax=ax); plt.savefig(FIG_DIR+'residualsVsTime.png', bbox_inches='tight') # plt.show() # tests if samples are different from normal dist. k2, p = stats.normaltest(residuals) print ("residuals skew (k2):" + str(k2) + " fit w/ normal dist (p-value): " + str(p)) # plot residuals fig = plt.figure(figsize=(12,8)) ax = fig.add_subplot(211) fig = qqplot(residuals, line='q', ax=ax, fit=True) ax2 = fig.add_subplot(212) # resid_dev = residuals.resid_deviance.copy() # resid_std = (resid_dev - resid_dev.mean()) / resid_dev.std() plt.hist(residuals, bins=25); plt.title('Histogram of standardized deviance residuals'); plt.savefig(FIG_DIR+'residualsNormality.png', bbox_inches='tight') # plot ACF/PACF for residuals plotACFAndPACF(residuals, 'residualsACFAndPACF.png') r,q,p = sm.tsa.acf(residuals.values.squeeze(), qstat=True) data = np.c_[range(1,41), r[1:], q, p] table = pandas.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"]) print table.set_index('lag')
def residual_test(residual, lags=31): # plot acf and pacf fig = plt.figure(facecolor='white') ax1 = fig.add_subplot(211) fig = plot_acf(residual.values.squeeze(), lags=lags, ax=ax1) ax2 = fig.add_subplot(212) fig = plot_pacf(residual, lags=lags, ax=ax2) plt.show() # Durbin-Watson test: 2, no autocorrelation; 4: negtive autocorrelation; 0: positive autocorrelation #print sm.stats.durbin_watson(arma_moddel.resid.values) fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(111) fig = qqplot(residual, line='q', ax=ax, fit=True) plt.show() ljung_box_test(residual)
def tsdiag( resid ): ''' 展示模型检验的结果 :param resid: :return: ''' fig = plt.figure(figsize=(12,8)); ax1 = fig.add_subplot(311); fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1); ax2 = fig.add_subplot(312); fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2); ax3 = fig.add_subplot(313); fig = qqplot(resid, line='q', ax=ax3, fit=True) plt.show();
def rosen(x): return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0) x0 = np.array([1.3, 0.7, 0.8, 1.9, 1.2]) res = minimize(rosen, x0, method='nelder-mead', options={'xtol': 1e-8, 'disp': True}) print(res.x) import statsmodels.api as sm import statsmodels.formula.api as smf star98 = sm.datasets.star98.load_pandas().data formula = 'SUCCESS ~ LOWINC + PERASIAN + PERBLACK + PERHISP + PCTCHRT + \ PCTYRRND + PERMINTE*AVYRSEXP*AVSALK + PERSPENK*PTRATIO*PCTAF' dta = star98[['NABOVE', 'NBELOW', 'LOWINC', 'PERASIAN', 'PERBLACK', 'PERHISP', 'PCTCHRT', 'PCTYRRND', 'PERMINTE', 'AVYRSEXP', 'AVSALK', 'PERSPENK', 'PTRATIO', 'PCTAF']].copy() endog = dta['NABOVE'] / (dta['NABOVE'] + dta.pop('NBELOW')) del dta['NABOVE'] dta['SUCCESS'] = endog mod1 = smf.glm(formula=formula, data=dta, family=sm.families.Binomial()).fit() print(mod1.summary()) import numpy as np from scipy import stats import pandas as pd import matplotlib.pyplot as plt import statsmodels.api as sm from statsmodels.graphics.api import qqplot print(sm.datasets.sunspots.NOTE) arma_mod20 = sm.tsa.ARMA(dta, (2,0)).fit(disp=False) print(arma_mod20.params) arma_mod30 = sm.tsa.ARMA(dta, (3,0)).fit(disp=False) resid = arma_mod30.resid stats.normaltest(resid) fig = plt.figure(figsize=(12,8)) ax = fig.add_subplot(111) fig = qqplot(resid, line='q', ax=ax, fit=True) predict_sunspots = arma_mod30.predict('1990', '2012', dynamic=True) print(predict_sunspots) fig, ax = plt.subplots(figsize=(12, 8)) ax = dta.ix['1950':].plot(ax=ax) fig = arma_mod30.plot_predict('1990', '2012', dynamic=True, ax=ax, plot_insample=False)
def model_detect(result): """模型检验""" import statsmodels.api as sm fig = plt.figure(figsize=(12, 8)) ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(result.resid.values.squeeze(), lags=40, ax=ax1) ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(result.resid, lags=40, ax=ax2) plt.show() print(sm.stats.durbin_watson(result.resid.values)) #检验结果是1.93206697832,说明不存在自相关性。 resid = result.resid #残差 fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(111) fig = qqplot(resid, line='q', ax=ax, fit=True) plt.show()
def check_resid_wd_acf_pacf_qq(model): """残差白噪声序列检验、计算D-W检验的结果,越接近于2效果就好""" resid = model.resid print(stats.normaltest(resid)) print(sm.stats.durbin_watson(resid)) fig = plt.figure(figsize=(12, 4)) ax = fig.add_subplot(111) fig = qqplot(resid, line='q', ax=ax, fit=True) r, q, p = sm.tsa.acf(resid.values.squeeze(), qstat=True) data = np.c_[range(1, 41), r[1:], q, p] table = pd.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"]) print(table.set_index('lag')) fig = plt.figure(figsize=(12, 8)) ax1 = fig.add_subplot(211) fig = plot_acf(resid, lags=40, ax=ax1) ax2 = fig.add_subplot(212) fig = plot_pacf(resid, lags=40, ax=ax2) plt.show()
def drawPIT(data, cdf=stats.uniform, xlabel="uniform distribution", ylabel="PIT", title="", isShow=False, isSave=False, savePath=None): lw = 4 fontsize = 40 fig = plt.figure(figsize=(16, 16)) axs = fig.add_subplot(111) fig = qqplot(data, dist=cdf, line='45', ax=axs) deta = 1.358 / (len(data))**0.5 * (2**0.5) axs.plot([deta, 1], [0, 1 - deta], '--', color='blueviolet', lw=lw, label='Kolmogorov 5% significance band') axs.plot([0, 1 - deta], [deta, 1], '--', color='blueviolet', lw=lw) axs.set_title(title, loc="center", fontsize=fontsize) axs.set_xlabel(xlabel, fontsize=fontsize) axs.set_ylabel(ylabel, fontsize=fontsize) axs.set_xlim([0, 1]) axs.set_ylim([0, 1]) plt.xticks(fontsize=fontsize) plt.yticks(fontsize=fontsize) plt.grid() plt.legend(fontsize=25) if isShow: plt.show() dirPath = os.path.dirname(savePath) if not os.path.exists(dirPath): os.makedirs(dirPath) if isSave: fig.savefig(savePath, bbox_inches="tight", dpi=300) plt.close()
print(u'模型ARIMA(%s,1,%s)符合白噪声检验' % (p, q)) #残差的自相关图 fig = plt.figure(figsize=(12, 8)) ax1 = fig.add_subplot(211) fig = plot_acf(pred_error, ax=ax1) ax2 = fig.add_subplot(212) fig = plot_pacf(pred_error, ax=ax2) fig.show() #D-W检验 print(sm.stats.durbin_watson(pred_error)) #绘制qq图 fig = plt.figure(figsize=(12, 8)) fig = qqplot(pred_error, line='q', fit=True) fig.show() ###不同差分次数的精度 ##print(ARIMA(dta, (p,0,q)).fit().bic) ##print(ARIMA(dta, (p,1,q)).fit().bic) ## ##print(ARIMA(dta, (p,0,q)).fit().aic) ##print(ARIMA(dta, (p,1,q)).fit().aic) ###print(ARIMA(dta, (p,2,q)).fit().aic) #MA系数不可逆 ## ###差分比较 ##fig1 = plt.figure(figsize=(8,7)) ##ax1= fig1.add_subplot(211) ##diff1 = dta.diff(1) ##diff1.plot(ax=ax1)
def armia(id, product, predicted): flag = True #whethe the model fail to find the params #1.data prepocessing sale_per_product = product[str(id)] data = np.array(sale_per_product, dtype=np.float) src = data data = pd.Series(data) data.index = pd.Index(np.arange(118)) data.plot(figsize=(12, 8)) plt.title("product_" + str(id)) if visible: plt.show() #2.时间序列的差分d fig = plt.figure(figsize=(12, 8)) ax1 = fig.add_subplot(111) diff = data.diff(1) diff.plot(ax=ax1) plt.title("diff" + str(id)) if visible: plt.show() #3.find the proper p and q #3.1 model selection p_, d_, q_ = grid_search(data, search_mode) print((p_, d_, q_)) info = [] info.append(str(id)) with open("log.txt", 'w+') as f: f.writelines(info) #f.writelines(sale_per_product) try: arma_mod = sm.tsa.ARMA(data, order=(p_, d_, q_)).fit() except: p_, d_, q_ = grid_search(data, "error") arma_mod = sm.tsa.ARMA(data, order=(p_, d_, q_)).fit() flag = False #3.2 check the res resid = arma_mod.resid #print(sm.stats.durbin_watson(resid.values)) #3.3 if normal distribution #print(stats.normaltest(resid)) fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(111) fig = qqplot(resid, line='q', ax=ax, fit=True) # plt.show() # 3.5残差序列检验 r, q, p = sm.tsa.acf(resid.values.squeeze(), qstat=True) rdata = np.c_[range(1, 41), r[1:], q, p] table = pd.DataFrame(rdata, columns=['lag', "AC", "Q", "Prob(>Q)"]) #print(table.set_index('lag')) predict_dta = arma_mod.predict(117, 144, dynamic=True) predicted[str(id)] = np.array(predict_dta) print(predict_dta) print((p_, d_, q_)) plt.subplot(111) all = np.concatenate((src, np.array(predict_dta).astype(int))) plt.plot(np.arange(all.size), all) plt.title("whole sale of the product") if visible: plt.show() return flag '''''' ''' # 3.prediction fig, ax = plt.subplots(figsize=(12, 8)) ax = data.ix[0:].plot(ax=ax) fig = arma_mod.plot_predict(117, 144, dynamic=True, ax=ax, plot_insample=False) plt.legend([ax,fig],["previous sale","predicted sale"],loc='upper right') plt.title('whole sale of the product') #plt.show() ''' ''''''
plt.ylabel('Correlation') plt.title('Log(Return) Autocorrelation') sm.graphics.tsa.plot_acf(stock_data['log_ret'].values.squeeze(), lags=60, ax=ax1) ax2 = fig.add_subplot(212) plt.title('Log(Return) Pacf') plt.xlabel('Lag (Business Days)') plt.ylabel('Correlation') sm.graphics.tsa.plot_pacf(stock_data['log_ret'], lags=60, ax=ax2) plt.figtext(0.5, 0.95,'Daily Return Correllations by Date') plt.savefig('stock_logregcorr.png') sys.exit() fig = plt.figure(figsize=(12,8)) plt.title('qq plot of the log(return)') qqplot(stock_data['log_ret'], line='q', ax=plt.gca(), fit=True) plt.savefig('logrec_qq.png') train_arr = get_pred_arr(stock_data) test_arr =get_pred_arr(test_data) scaler = preprocessing.StandardScaler().fit(train_arr) sctrain_arr = scaler.transform(train_arr) sctest_arr = scaler.transform(test_arr) from sklearn.cross_validation import train_test_split stock_train, stock_cv, true_train, true_cv = train_test_split(sctrain_arr, stock_data['log_ret'].fillna(method='backfill').values, test_size=0.33, random_state=42) # reweight outliers weighter_scale = preprocessing.StandardScaler().fit(true_train) train_weight_outliers = 5.0*np.abs(weighter_scale.transform(true_train))+1
plt.show() # In[6]: #残差的ACF和PACF图,可以看到序列残差基本为白噪声 #进一步进行D-W检验,是目前检验自相关性最常用的方法,但它只使用于检验一阶自相关性。 #DW=4<=>ρ=-1 即存在负自相关性 #DW=2<=>ρ=0 即不存在(一阶)自相关性 #因此,当DW值显著的接近于O或4时,则存在自相关性,而接近于2时,则不存在(一阶)自相关性。 print(sm.stats.durbin_watson(ar10.resid.values)) #观察是否符合正态分布,这里使用QQ图,它用于直观验证一组数据是否来自某个分布,或者验证某两组数据是否来自同一(族)分布。 print(stats.normaltest(resid)) fig = plt.figure(figsize=(12, 8)) fig = qqplot(resid, line='q', fit=True) plt.show() #结果表明基本符合正态分布 # In[7]: predict_dta = ar10.forecast(steps=5) import datetime fig = ar10.plot_predict( pd.to_datetime('2017-01-01') + datetime.timedelta(days=190), pd.to_datetime('2017-01-01') + datetime.timedelta(days=220), dynamic=False, plot_insample=True) plt.show() # In[8]:
ax = fig.add_subplot(111) ax = arma_mod30.resid.plot(ax=ax); # <codecell> resid = arma_mod30.resid # <codecell> stats.normaltest(resid) # <codecell> fig = plt.figure(figsize=(12,8)) ax = fig.add_subplot(111) fig = qqplot(resid, line='q', ax=ax, fit=True) # <codecell> fig = plt.figure(figsize=(12,8)) ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1) ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2) # <codecell> r,q,p = sm.tsa.acf(resid.values.squeeze(), qstat=True) data = np.c_[range(1,41), r[1:], q, p] table = pandas.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"]) print table.set_index('lag')
# In[74]: z, p = stats.normaltest(result.resid.values) # In[75]: p # In[76]: result.params # In[77]: fig, ax = plt.subplots(figsize=(8, 4)) smg.qqplot(result.resid, ax=ax) fig.tight_layout() fig.savefig("ch14-qqplot-model-1.pdf") # In[78]: model = smf.ols("y ~ x1 + x2 + x1*x2", data) # In[79]: result = model.fit() # In[80]: print(result.summary())
sm.graphics.tsa.plot_acf(stock_data['log_ret'].values.squeeze(), lags=60, ax=ax1) ax2 = fig.add_subplot(212) plt.title('Log(Return) Pacf') plt.xlabel('Lag (Business Days)') plt.ylabel('Correlation') sm.graphics.tsa.plot_pacf(stock_data['log_ret'], lags=60, ax=ax2) plt.figtext(0.5, 0.95, 'Daily Return Correllations by Date') plt.savefig('stock_logregcorr.png') sys.exit() fig = plt.figure(figsize=(12, 8)) plt.title('qq plot of the log(return)') qqplot(stock_data['log_ret'], line='q', ax=plt.gca(), fit=True) plt.savefig('logrec_qq.png') train_arr = get_pred_arr(stock_data) test_arr = get_pred_arr(test_data) scaler = preprocessing.StandardScaler().fit(train_arr) sctrain_arr = scaler.transform(train_arr) sctest_arr = scaler.transform(test_arr) from sklearn.cross_validation import train_test_split stock_train, stock_cv, true_train, true_cv = train_test_split( sctrain_arr, stock_data['log_ret'].fillna(method='backfill').values, test_size=0.33, random_state=42)
# * Does our model obey the theory? sm.stats.durbin_watson(arma_mod30.resid.values) fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(111) ax = arma_mod30.resid.plot(ax=ax) resid = arma_mod30.resid stats.normaltest(resid) fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(111) fig = qqplot(resid, line="q", ax=ax, fit=True) fig = plt.figure(figsize=(12, 8)) ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1) ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2) r, q, p = sm.tsa.acf(resid.values.squeeze(), fft=True, qstat=True) data = np.c_[np.arange(1, 25), r[1:], q, p] table = pd.DataFrame(data, columns=["lag", "AC", "Q", "Prob(>Q)"]) print(table.set_index("lag")) # * This indicates a lack of fit.
# Load in the image for Subject 1. img = nib.load(pathtodata + "BOLD/task001_run001/bold.nii.gz") data = img.get_data() data = data[..., 6:] # Knock off the first 6 observations. # Pull out a single voxel. voxel = data[41, 47, 2] plt.plot(voxel) plt.close() # Sort of a curve = nonconstant mean. # Variance also seems to be funky toward the ends. plt.hist(voxel) plt.close() # Long right tail. qqplot(voxel, line='q') plt.close() # More-or-less normal, with deviations at tails. # Box-Cox method to find best power transformation. bc = stats.boxcox(voxel) bc[1] # Lambda pretty close to 0, so try log transformation. print("Log transforming data.") # Log transform the data. lvoxel = np.log(voxel) plt.plot(lvoxel) plt.close() plt.hist(lvoxel) plt.close() qqplot(lvoxel, line='q')
def plot_qqplot(arma_mod): fig = plt.figure(figsize=(12,8)) ax = fig.add_subplot(111) fig = qqplot(arma_mod.resid, line='q', ax=ax, fit=True) plt.show()
# Load in the image for Subject 1. img = nib.load(pathtodata + "BOLD/task001_run001/bold.nii.gz") data = img.get_data() data = data[..., 6:] # Knock off the first 6 observations. # Pull out a single voxel. voxel = data[41, 47, 2] plt.plot(voxel) plt.close() # Sort of a curve = nonconstant mean. # Variance also seems to be funky toward the ends. plt.hist(voxel) plt.close() # Long right tail. qqplot(voxel, line="q") plt.close() # More-or-less normal, with deviations at tails. # Box-Cox method to find best power transformation. bc = stats.boxcox(voxel) bc[1] # Lambda pretty close to 0, so try log transformation. print("Log transforming data.") # Log transform the data. lvoxel = np.log(voxel) plt.plot(lvoxel) plt.close() plt.hist(lvoxel) plt.close() qqplot(lvoxel, line="q")
def qqplot(resid): from statsmodels.graphics.api import qqplot fig = plt.figure(figsize=(12,8)) ax = fig.add_subplot(111) fig = qqplot(resid, line='q', ax = ax, fit = True) pylab.show()
def predict_arma(number, index, data, original_index, original_data): # axes list ax = [] # difference list diff = [] # order number order_num = 5 # Set the index as date type df = pd.DataFrame({ 'year': 1999, 'month': 3, 'day': 1, 'minute': index * 5 }) original_df = pd.DataFrame({ 'year': 1999, 'month': 3, 'day': 1, 'minute': original_index * 5 }) # print pd.to_datetime(df) data = pd.Series(data, index=pd.to_datetime(df)) original_data = pd.Series(original_data, index=pd.to_datetime(original_df)) # data = pd.Series(data, index=pd.to_datetime(index, unit='m')) # print data fig = plt.figure("Differences of Diverse Orders in Day %s" % number, figsize=(10, 4 * order_num)) # Show differences with i-order # for i in range(1, order_num + 1): # ax.append(fig.add_subplot(order_num, 1, i)) # # Get the difference of time series, which is # # the d parameter of ARIMA(p, d, q) # diff.append(data.diff(i)) # # Plot the i-order of difference # diff[i - 1].plot(ax=ax[i - 1]) # # After observation, choose first-order difference order = 1 data = data.diff(order) original_data = original_data.diff(order) print data print original_data # "data[0]=NaN" causes the autocorrelation figure shows abnormally for i in range(order): data[i] = 0.0 original_data[i] = 0.0 # autocorrelation_plot(data) ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(data, lags=40, ax=ax1) ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(data, lags=40, ax=ax2) #================================== # arma_mod70 = sm.tsa.ARMA(data, (7, 0)).fit() arma_mod = [] row = 3 col = 3 # ARMA(0, 2) is the best for i in range(row): temp = [] for j in range(col): temp.append(sm.tsa.ARMA(data, (i, j)).fit()) arma_mod.append(temp) for i in range(row): for j in range(col): print(arma_mod[i][j].aic, arma_mod[i][j].bic, arma_mod[i][j].hqic) #================================== # get the mininal value of aic/bic res = sm.tsa.arma_order_select_ic(data, ic=['aic', 'bic'], trend='nc') # print res.aic_min_order # print res.bic_min_order # arma_mod00 = sm.tsa.ARMA(data, (0, 0)).fit() # arma_mod01 = sm.tsa.ARMA(data, (0, 1)).fit() # arma_mod02 = sm.tsa.ARMA(data, (0, 2)).fit() # arma_mod10 = sm.tsa.ARMA(data, (1, 0)).fit() # arma_mod11 = sm.tsa.ARMA(data, (1, 1)).fit() # arma_mod12 = sm.tsa.ARMA(data, (1, 2)).fit() # arma_mod20 = sm.tsa.ARMA(data, (2, 0)).fit() # arma_mod21 = sm.tsa.ARMA(data, (2, 1)).fit() # arma_mod22 = sm.tsa.ARMA(data, (2, 2)).fit() # print(arma_mod00.aic, arma_mod00.bic, arma_mod00.hqic) # print(arma_mod01.aic, arma_mod01.bic, arma_mod01.hqic) # print(arma_mod02.aic, arma_mod02.bic, arma_mod02.hqic) # print(arma_mod10.aic, arma_mod10.bic, arma_mod10.hqic) # print(arma_mod11.aic, arma_mod11.bic, arma_mod11.hqic) # print(arma_mod12.aic, arma_mod12.bic, arma_mod12.hqic) # print(arma_mod20.aic, arma_mod20.bic, arma_mod20.hqic) # print(arma_mod21.aic, arma_mod21.bic, arma_mod21.hqic) # print(arma_mod22.aic, arma_mod22.bic, arma_mod22.hqic) # Autocorrelation for ARMA(0, 2) # fit model # model = ARIMA(data_bak, order=(1, 1, 0)) # model_fit = model.fit(disp=0) # print model_fit.summary() # # plot residual errors # residuals = DataFrame( model_fit.resid ) # residuals.plot() # residuals.plot(kind='kde') # print residuals.describe() # row = 3 # col = 3 # model = [] # model_fit = [] # for i in range(row): # temp = [] # for j in range(col): # # print(i, j) # temp.append(ARIMA(data_bak, order=(i, 1, j))) # model.append(temp) # for i in range(row): # temp = [] # for j in range(col): # print(i, j) # # print model[i][j].fit(disp=0).summary() # temp.append(model[i][j].fit(disp=0)) # model_fit.append(temp) # for i in range(row): # for j in range(col): # print model_fit[i][j].summary() # # plot residual errors # residuals = DataFrame(model_fit[i][j].resid) # residuals.plot() # residuals.plot(kind='kde') # print residuals.describe() # plot autocorrelation of residual errors # predict_model = arma_mod[0][2] predict_model = arma_mod[res.aic_min_order[0]][res.aic_min_order[1]] resid = predict_model.resid fig = plt.figure("Autocorrelation of residuals", figsize=(12, 8)) ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1) ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2) # Durbin-Watson Exam # DW is in [0, 4], where # DW = 4 <=> p(rou) = -1, DW = 2 <=> p(rou) = 0, DW = 0 <=> p(rou) = 1 print(sm.stats.durbin_watson(resid.values)) # Check the data are from the same distribution or not fig = plt.figure("Check for the data validation", figsize=(12, 8)) ax = fig.add_subplot(111) fig = qqplot(resid, line='q', ax=ax, fit=True) # Ljung-Box Exam r, q, p = sm.tsa.acf(resid.values.squeeze(), qstat=True) lb_data = np.c_[range(1, 41), r[1:], q, p] table = pd.DataFrame(lb_data, columns=['lag', "AC", "Q", "Prob(>Q)"]) print(table.set_index('lag')) # Prediction with arma model begin_time = str(pd.to_datetime(df)[len(index) - 1]) end_time = str(pd.to_datetime(original_df)[len(original_index) - 1]) # print pd.to_datetime(df)[len(index)-1] # print pd.to_datetime(original_df)[len(original_index)-1] predict_sunspots = predict_model.predict(begin_time, end_time, dynamic=True) print predict_sunspots fig, ax = plt.subplots(figsize=(10, 3)) ax = original_data.ix['1999-03-01 00:00:00':].plot(ax=ax) predict_sunspots.plot(ax=ax)
# 计算ARMA模型的评估准则 arma_mod = sm.tsa.ARMA(time_series, (1, 1)).fit() print('AIC:', arma_mod.aic, 'BIC:', arma_mod.bic, 'HQIC:', arma_mod.hqic) # ARMA模型回归的诊断 resid = list(arma_mod.resid) fig = plt.figure(figsize=(12, 8)) ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(resid, lags=40, ax=ax1) ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2) plt.show() # Durbin-Watson检验值 print('Durbin-Watson:', sm.stats.durbin_watson(arma_mod.resid)) # 残差QQ图 fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(111) fig = qqplot(np.array(resid), line='q', ax=ax, fit=True) plt.show() # 新的预测序列值,对应于一阶差分的计算 predict = arma_mod.predict(N_history - 1, N_history + 3, dynamic=True) print(predict) predict = predict.cumsum() # 不用差分时两行注释掉 predict = time_series_ori[-1] + predict print(predict) time_series_ori_predict = np.r_[time_series_ori, predict] plt.plot(range(len(time_series_ori)), time_series_ori, 'b') plt.plot(range(len(time_series_ori), len(time_series_ori) + 5), predict, 'r') plt.show()
plt.subplots_adjust(hspace=0.4) ax1 = fig.add_subplot(211) plt.xlabel('Lag (Business Days)') plt.ylabel('Correlation') plt.title('Residual Volume Autocorrelation') sm.graphics.tsa.plot_acf(stock_data['ARMAResid'].values.squeeze(), lags=60, ax=ax1) ax2 = fig.add_subplot(212) plt.title('Residual Volume Pacf') plt.xlabel('Lag (Business Days)') plt.ylabel('Correlation') sm.graphics.tsa.plot_pacf(stock_data['ARMAResid'], lags=60, ax=ax2) plt.savefig('volume_ARMAresidcorr.png') fig = plt.figure(figsize=(12,8)) plt.title('qq plot of the ARMA Volume Residual') qqplot(stock_data['ARMAResid'], line='q', ax=plt.gca(), fit=True) plt.savefig('volARMAresid_qq.png') error_figure(stock_data, 'ARMAResid', 'ARMAPredictVolume', 'ARMANormResids','ARMA Only') plt.savefig('ARMA_stock_residual_panel.png') error_figure(test_data, 'ARMAResid', 'ARMAPredictVolume', 'ARMANormResids','ARMA Only') plt.savefig('ARMA_test_residual_panel.png') plt.close('all') train_arr = get_pred_arr(stock_data) test_arr =get_pred_arr(test_data) scaler = preprocessing.StandardScaler().fit(train_arr)
# Fit an ARIMA model # In[39]: arma_mod20 = sm.tsa.ARMA(dta, (2, 0)).fit(disp=False) print(arma_mod20.params) #arma自回归移动平均 # In[40]: arma_mod30 = sm.tsa.ARMA(dta, (3, 0)).fit(disp=False) resid = arma_mod30.resid stats.normaltest(resid) fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(111) fig = qqplot(resid, line='q', ax=ax, fit=True) #resid残差项 #qqplot绘制qq图 # Let's then do some predictions # In[41]: predict_sunspots = arma_mod30.predict('1990', '2012', dynamic=True) print(predict_sunspots) fig, ax = plt.subplots(figsize=(12, 8)) ax = dta.ix['1950':].plot(ax=ax) fig = arma_mod30.plot_predict('1990', '2012', dynamic=True, ax=ax,
import tushare as ts import pandas as pd import statsmodels.api as sm data = ts.get_k_data('600858', start='2016-02-05',end='2016-06-05') share_change=data['close']-data['open'] share_change.index=pd.Index(sm.tsa.datetools.dates_from_range('1','78')) from statsmodels.tsa.stattools import adfuller #检测序列平稳性,单位根检验。 dftest = adfuller(share_change, autolag='AIC') print(dftest[1])#在保证ADF检验的p<0.01的情况下,阶数越小越好 from statsmodels.stats.diagnostic import acorr_ljungbox p_value = acorr_ljungbox(share_change, lags=1)#检测autocorrelation,P>0.5代表有相关 share_acf = sm.graphics.tsa.plot_acf(share_change,lags=40)# acf图 share_pacf = sm.graphics.tsa.plot_pacf(share_change,lags=40)#pacf图 print(share_acf,share_pacf) share_change1=list(share_change) arma_mod1 = sm.tsa.ARMA(share_change1,(2,1)).fit() print(arma_mod1.aic,arma_mod1.bic,arma_mod1.hqic) arma_mod2 = sm.tsa.ARMA(share_change1,(2,2)).fit() print(arma_mod2.aic,arma_mod2.bic,arma_mod2.hqic) arma_mod3 = sm.tsa.ARMA(share_change1,(1,0)).fit() print(arma_mod3.aic,arma_mod3.bic,arma_mod3.hqic) arma_mod4 = sm.tsa.ARMA(share_change1,(0,1)).fit() print(arma_mod4.aic,arma_mod4.bic,arma_mod4.hqic) resid = arma_mod4.resid from statsmodels.graphics.api import qqplot figqq = qqplot(resid) print(figqq) predict_ts = arma_mod2.predict(start=79,end=82)