def __analysis_index(self): index = self.get_env().query_data(Index_Data).get_data_serise() index_name = list(index.columns) index_name.remove(COM_DATE) index[index_name] = index[index_name].pct_change()/100 index[index_name] = np.log(index[index_name]+1) index = index.set_index(COM_DATE) index.index = pd.to_datetime(index.index) res = pd.DataFrame(columns = ['mean','std','skew','kurt','jarque-Bera','adf','lm']) for index_name_ in index_name: fig, ax = plt.subplots() ax.plot(index[index_name_].dropna(), label=index_name_) ax.set_xlabel('时间') ax.set_ylabel('收益率的对数') ax.set_title(index_name_+'收益率图') ax.legend() plt.savefig(os.path.join(RESULTS, index_name_+'.png')) plt.close() fig, ax = plt.subplots() ax.hist(index[index_name_].dropna(),bins =25) ax.set_xlabel('收益率范围') ax.set_ylabel('收益率的对数') ax.set_title(index_name_+'收益率图') plt.savefig(os.path.join(RESULTS, index_name_+'bar.png')) plt.close() res.loc[index_name_] = [ np.nanmean(index[index_name_].dropna()), np.nanstd(index[index_name_].dropna()), index[index_name_].dropna().skew(), index[index_name_].dropna().kurt(), stats.jarque_bera(index[index_name_].dropna())[0], adfuller(index[index_name_].dropna())[4]['5%'], q_stat(acf(index[index_name_].dropna())[1:13],len(index[index_name_].dropna()))[1][-1] ] res.to_csv(os.path.join(RESULTS,'index_info.csv'))
def plot_correlogram(self, lags=10, title=None): # NOTE: without passing residuals this meethod can notbe used by the optimal brute force finder def moving_average(self, a: pd.array, n: int = 3): ret = np.cumsum(a) ret[n:] = ret[n:] - ret[:-n] return ret[n - 1:] / n matplotlib.use( 'TkAgg' ) # NOTE: necessary due to inheritence of TimeSeries which uses 'Agg' x = self.data lags = min(10, int(len(x) / 5)) if lags is None else lags fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(14, 8)) axes[0][0].plot(x.values) # Residuals # axes[0][0].plot(moving_average(x, n=21), c='k', lw=1) # moving average of risiduals # FIXME calculate moveaverage q_p = np.max(q_stat(acf(x, nlags=lags), len(x))[1]) stats = f'Q-Stat: {np.max(q_p):>8.2f}\nADF: {adfuller(x)[1]:>11.2f}' axes[0][0].text(x=.02, y=.85, s=stats, transform=axes[0][0].transAxes) probplot(x, plot=axes[0][1]) mean, var, skew, kurtosis = moment(x, moment=[1, 2, 3, 4]) s = f'Mean: {mean:>12.2f}\nSD: {np.sqrt(var):>16.2f}\nSkew: {skew:12.2f}\nKurtosis:{kurtosis:9.2f}' axes[0][1].text(x=.02, y=.75, s=s, transform=axes[0][1].transAxes) plot_acf(x=x, lags=lags, zero=False, ax=axes[1][0]) plot_pacf(x, lags=lags, zero=False, ax=axes[1][1]) axes[1][0].set_xlabel('Lag') axes[1][1].set_xlabel('Lag') fig.suptitle(title, fontsize=14) sns.despine() fig.tight_layout() fig.subplots_adjust(top=.9) fig1 = plt.gcf() print('plotting') plt.show()
def WhiteNoiseTest(ret, nlags=20, isprintsummary=False): """ 白雜訊測試:使用 Ljung-Box 檢定 若接受虛無假設則表示為白雜訊(純隨機序列),回傳值為False 若拒絕虛無假設則表示序列並非隨機的,回傳值為True """ acf = stattools.acf(ret, nlags = nlags, qstat = False, \ fft = True, alpha = None, missing = "drop") n = len(ret) results = stattools.q_stat(acf, n) if isprintsummary: print("Show Ljung-Box Q-statistics: ", results[0]) print("Show corresponding P-values: ", results[1]) if np.any(results[1] < PVal): # 只要有一Q統計量顯著大於0,即滿足條件 if isprintsummary: print("- - - - - - - - - - - - - - - - - - - - - - - - - -") print("Some LB stats p < %.2f -> correlated time series!" % PVal) print("- - - - - - - - - - - - - - - - - - - - - - - - - -") return True else: # All p-values > PVal, accept H0 (white noise time series) if isprintsummary: print("- - - - - - - - - - - - - - - - - - - - - - - - - -") print("All LB stats p > %.2f -> white noise time series!" % PVal) print("- - - - - - - - - - - - - - - - - - - - - - - - - -") return False
def plot_correlogram(x, fig, axes, lags=40, title=None): lags = min(10, int(len(x) / 5)) if lags is None else lags x.plot(ax=axes[0][0]) q_p = np.max(q_stat(acf(x, nlags=lags), len(x))[1]) stats = f'Q-Stat: {np.max(q_p):>8.2f}\nADF: {adfuller(x)[1]:>11.2f} \nHurst: {round(hurst(x.values),2)}' axes[0][0].text(x=.02, y=.85, s=stats, transform=axes[0][0].transAxes) probplot(x, plot=axes[0][1]) mean, var, skew, kurtosis = moment(x, moment=[1, 2, 3, 4]) s = f'Mean: {mean:>12.2f}\nSD: {np.sqrt(var):>16.2f}\nSkew: {skew:12.2f}\nKurtosis:{kurtosis:9.2f}' axes[0][1].text(x=.02, y=.75, s=s, transform=axes[0][1].transAxes) plot_acf(x=x, lags=lags, zero=False, ax=axes[1][0]) plot_pacf(x=x, lags=lags, zero=False, ax=axes[1][1]) axes[1][0].set_xlabel('Lag') axes[1][1].set_xlabel('Lag') fig.suptitle(title, fontsize=20) fig.tight_layout() fig.subplots_adjust(top=.9)
def plot_correlogram(x, lags=None, title=None): lags = min(10, int(len(x)/5)) if lags is None else lags fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(14, 8)) x.plot(ax=axes[0][0], title='Time Series') x.rolling(21).mean().plot(ax=axes[0][0], c='k', lw=1) q_p = np.max(q_stat(acf(x, nlags=lags), len(x))[1]) stats = f'Q-Stat: {np.max(q_p):>8.2f}\nADF: {adfuller(x)[1]:>11.2f}' axes[0][0].text(x=.02, y=.85, s=stats, transform=axes[0][0].transAxes) probplot(x, plot=axes[0][1]) mean, var, skew, kurtosis = moment(x, moment=[1, 2, 3, 4]) s = f'Mean: {mean:>12.2f}\nSD: {np.sqrt(var):>16.2f}\nSkew: {skew:12.2f}\nKurtosis:{kurtosis:9.2f}' axes[0][1].text(x=.02, y=.75, s=s, transform=axes[0][1].transAxes) plot_acf(x=x, lags=lags, zero=False, ax=axes[1][0]) plot_pacf(x, lags=lags, zero=False, ax=axes[1][1]) axes[1][0].set_xlabel('Lag') axes[1][1].set_xlabel('Lag') fig.suptitle(title, fontsize=14) sns.despine() fig.tight_layout() fig.subplots_adjust(top=.9)
def many_paras(): """ p-value本质是控制假阳性率(False positive rate,FPR) q-value 控制的是FDR (false discovery rate) Q-statistic: Qlb=T*(T+2)*sigma(j=1,p)(rj^2/(T-j)) rj残差序列,j阶自相关系数,T观测值的个数,p滞后阶数。 FDR = E(V/R) 错误发现次数V,总的拒绝次数R acf: 自相关系数 -- y(t)= a0 + a1*y(t-1) + epsilon p(x(i)|x(i-h)) :sigma(i=1,n-h) ((x(i)-mu)*(x(i+h)-mu)/sigma(i=1,n) ((x(i)-mu)^2)) pacf: 偏自相关系数,k-1个时间滞后 作为已知,只求k -- y(t)= a0 + a1*y(t-1) + ... a1*y(t-k) + epsilon p(x(i)..x(i-k)|x(i-1)x(i-k+1)) : ARMA(p,q): AR代表p阶自回归过程,MA代表q阶移动平均过程 ARIMA模型是在ARMA模型的基础上多了差分的操作。 ADF: 白噪声随机干扰项的一阶自回归过程。用单位根 检验,存在就是非平稳。y(t)= mu + fi*y(t-1) + epsilon。p阶要求 p个根的和小于1。 Sma: 移动平均 wma: 加权移动平均 ema: 指数移动平均 ewma: 指数加权移动平均 OBV: On Balance Volume, 多空比率净额= [(收盘价-最低价)-(最高价-收盘价)] ÷( 最高价-最低价)×V :return: """ # 1. 计算自相关系数 acfs = stattools.acf(SHRet) # 绘制自相关系数图 plot_acf(SHRet, use_vlines=True, lags=30) # 2. 计算偏自相关系数 pacfs = stattools.pacf(SHRet) plot_pacf(SHRet, use_vlines=True, lags=30) # 3. 进行ADF单位根检验,并查看结果; adfSHRet = ADF(SHRet) print(adfSHRet.summary().as_text()) # 4. Q 统计 LjungBox1 = stattools.q_stat(stattools.acf(SHRet)[1:13], len(SHRet)) print(LjungBox1) # 5. lag即为上述检验表达式中的m,在这里我们选择检验12阶的自相关系数。 LjungBox = stattools.q_stat(stattools.acf(CPItrain)[1:12], len(CPItrain)) # order表示建立的模型的阶数,c(1,0,1)表示建立的是ARMA(1,1)模型; # 中间的数字0表示使用原始的、未进行过差分(差分次数为0)的数据; model1 = arima_model.ARIMA(CPItrain, order=(1, 0, 1)).fit() model1.summary() model1.conf_int() # 6. 绘制时间序列模拟的诊断图 stdresid = model1.resid / math.sqrt(model1.sigma2) plt.plot(stdresid) plot_acf(stdresid, lags=20) LjungBox = stattools.q_stat(stattools.acf(stdresid)[1:13], len(stdresid)) print(LjungBox[1][-1]) print(model1.forecast(3)[0]) # 7. Autoregressive conditional heteroskedasticity model 自回归条件异方差模型 # y(t)=b*x(t)+epsilon(t) # epsilon(t)^2=a0+a1*epsilon(t-1)^2+a2*epsilon(t-2)^2+n(t) # \sigma_t^{2}=\omega+\sum_{i=1}^{p}\alpha_{i}\epsilon_{t-i}^{2} # n(t)独立同分布 期望为0,var(n^2)=r^2 am = arch_model(SHret) model = am.fit(update_freq=0) print(model.summary()) # 8. 对子 的 处理 pt = TradeTool() SSD = pt.SSD(priceAf, priceBf) SSDspread = pt.SSDSpread(priceAf, priceBf) SSDspread.describe() coefficients = pt.cointegration(priceAf, priceBf) CoSpreadF = pt.CointegrationSpread(priceA, priceB, formPeriod, formPeriod) CoSpreadTr = pt.CointegrationSpread(priceA, priceB, formPeriod, tradePeriod) CoSpreadTr.describe() bound = pt.calBound(priceA, priceB, 'Cointegration', formPeriod, width=1.2) # 9. 配对 选点 trtl = TradeTool() account = trtl.TradeSimPair(PAt, PBt, position) # 10. momentum function et = ElementTool() et.momentum(Close, 5).tail(n=5) momen35 = et.momentum(Close, 35) signal = [] for i in momen35: if i > 0: signal.append(1) else: signal.append(-1) signal = pd.Series(signal, index=momen35.index) signal.head() tradeSig = signal.shift(1) ret = Close / Close.shift(1) - 1 # ret=ret['2014-02-20':] # ret.head(n=3) Mom35Ret = ret * (signal.shift(1)) Mom35Ret[0:5] real_Mom35Ret = Mom35Ret[Mom35Ret != 0] real_ret = ret[ret != 0] Rsi12 = et.rsi(BOCMclp, 12) # 策略 rsi6 = et.rsi(BOCMclp, 6) rsi24 = et.rsi(BOCMclp, 24) # rsi6捕捉买卖点 Sig1 = [] for i in rsi6: if i > 80: Sig1.append(-1) elif i < 20: Sig1.append(1) else: Sig1.append(0) date1 = rsi6.index Signal1 = pd.Series(Sig1, index=date1) Signal1[Signal1 == 1].head(n=3) Signal1[Signal1 == -1].head(n=3) Signal2 = pd.Series(0, index=rsi24.index) lagrsi6 = rsi6.shift(1) lagrsi24 = rsi24.shift(1) for i in rsi24.index: if (rsi6[i] > rsi24[i]) & (lagrsi6[i] < lagrsi24[i]): Signal2[i] = 1 elif (rsi6[i] < rsi24[i]) & (lagrsi6[i] > lagrsi24[i]): Signal2[i] = -1 signal = Signal1 + Signal2 signal[signal >= 1] = 1 signal[signal <= -1] = -1 signal = signal.dropna() tradSig = signal.shift(1) tt = TradeTool() BuyOnly = tt.strategy_analy(buy, ret) SellOnly = tt.strategy_analy(sell, ret) Trade = tt.strategy_analy(tradSig, ret) Test = pd.DataFrame({ "BuyOnly": BuyOnly, "SellOnly": SellOnly, "Trade": Trade }) # 累计收益率 cumStock = np.cumprod(1 + ret) - 1 cumTrade = np.cumprod(1 + tradeRet) - 1 # 12. 移动平均线 sma5 = et.smaCal(Close, 5) # 12. 加权移动平均线 wma5 = et.wmaCal(Close, w) # 12. 指数移动平均线 Ema = et.emaCal(Close, period) print(Ema) # 12. 指数加权移动平均线 Ewma = et.ewmaCal(Close, 5, 0.2) # 13. 布林带 UnicomBBands = et.bbands(Close, 20, 2) print(UnicomBBands) multiplier = [1, 1.65, 1.96, 2, 2.58] price2010 = Close['2010-01-04':'2010-12-31'] tt.CalBollRisk(price2010, multiplier) # 14. 性能 btt = BackTestTool() Performance1 = btt.perform(Close, tradSignal1) print(Performance1) # 15. 交易, 回测 KDtrade = btt.trade(KDSignal, close) btt.backtest(KDtrade.Ret, KDtrade.KDtradeRet) # 16. 上下突破 KDupbreak = et.upbreak(KValue, DValue) * 1 KDupbreak[KDupbreak == 1].head() KDdownbreak = et.downbreak(KValue, DValue) * 1 KDdownbreak[KDdownbreak == 1].head() # "金叉"与"死叉"交易策略绩效表现 btt.backtest(KDbreak.Ret, KDbreak.KDbreakRet) # 17. 成交量指标 cumUpVol = et.VOblock(UpVol) cumDownVol = et.VOblock(DownVol) ALLVol = np.array([cumUpVol, cumDownVol]).transpose() # 18. 判断持有 hold = tt.judge_hold(trade) # 19. 单交易 TradeAccount = tt.TradeSim(close, hold) print(TradeAccount)
plot_acf(SHRet, use_vlines=True, lags=30) plot_pacf(SHRet, use_vlines=True, lags=30) SHclose = SHindex.Clsindex SHclose.plot() plt.title('2014-2015年上证综指收盘指数时序图') SHRet.plot() plt.title('2014-2015年上证综指收益率指数时序图') plot_acf(SHRet, use_vlines=True, lags=30) plot_pacf(SHRet, use_vlines=True, lags=30) plot_acf(SHclose, use_vlines=True, lags=30) adfSHRet = ADF(SHRet) print(adfSHRet.summary().as_text()) adfSHclose = ADF(SHclose) print(adfSHclose.summary().as_text()) whiteNoise = np.random.standard_normal(size=500) plt.plot(whiteNoise, c='b') plt.title('White Noise') LjungBox1 = stattools.q_stat(stattools.acf(SHRet)[1:13], len(SHRet)) print(LjungBox1) print(LjungBox1[1][-1]) LjungBox2 = stattools.q_stat(stattools.acf(SHclose)[1:13], len(SHRet)) print(LjungBox2[1][-1])
##自相关系数 acfs=stattools.acf(SH_ret) ##偏自相关系数 pacfs=stattools.pacf(SH_ret) plot_acf(SH_ret,use_vlines=True,lags=30) SH_ret.plot() plt.title('return') SH_close.plot() plt.title('close price') adfSH_ret=ADF(SH_ret) print(adfSH_ret) adfSH_close=ADF(SH_close) print(adfSH_close) LjungBox_ret=stattools.q_stat(acfs,len(SH_ret)) LjungBox_ret[1][-1] HS300_data['close']['2010-01-01':'2016-12-31'] HS300_data['2010-01-01':'2016-12-31'] model1=arima_model.ARIMA(SH_ret,order=(2,0,1)).fit() model1.summary() stattools.arma_order_select_ic(SH_ret,max_ma=4)
end) returns = RJ['close_price'].pct_change().dropna() returns.name = 'return' # returns.plot() # returns.column = ['return'] # print(returns) # 计算自相关系数 # acfs = stattools.acf(returns) # # print(acfs) # # 偏自相关系数 # pacfs = stattools.pacf(returns) # # print(pacfs) # # 自相关性图 # plot_acf(returns,use_vlines=True,lags=30) # # 偏自相关性图 # plot_pacf(returns,use_vlines=True,lags=30) # plot_acf(RJ['close_price'],use_vlines=True,lags=30) # # plt.show() # 单位根检验 # adfRJ= ADF(returns) # print(adfRJ.summary().as_text()) # adfClose = ADF(RJ['close_price']) # print(adfClose.summary().as_text()) # 白噪声检验 LB = stattools.q_stat(stattools.acf(returns), len(returns)) print(LB)
@author: Liu """ import numpy as np import pandas as pd import pandas_datareader as web import matplotlib.pyplot as plt import datetime start=datetime.datetime(2018,1,1) end=datetime.datetime(2019,6,30) nasdaq=web.DataReader('^IXIC','yahoo',start,end) nasdaq=nasdaq.dropna() nasdaq=pd.DataFrame(nasdaq['Adj Close'].values,index=pd.to_datetime(nasdaq.index),columns=['Price']) nasdaq=(nasdaq-nasdaq.shift(1))/nasdaq.shift(1) nasdaq=nasdaq.dropna() nasdaq.columns=['Return'] nasdaq.tail(3) traindata=nasdaq[:-3] from arch.unitroot import ADF result=ADF(traindata.Return,max_lags=10) print(result.summary().as_text()) from statsmodels.tsa import stattools LjungBox=stattools.q_stat(stattools.acf(traindata)[1:12],len(traindata)) LjungBox[1][-1] import statsmodels.graphics.tsaplots as ts ts.plot_acf(traindata,use_vlines=True, lags=30)
print(adfReutn.summary().as_text()) # plot_acf(logReturn, lags=20) # plot_pacf(logReturn, lags=20) model1 = arima_model.ARIMA(logReturn.values, order=(2, 0, 0)).fit() model2 = arima_model.ARIMA(logReturn.values, order=(0, 0, 2)).fit() print("model1.aic=%f \t model2.aic=%f" % (model1.aic, model2.aic)) print(model2.sigma2) strresid = model2.resid / math.sqrt(model2.sigma2) # strresid.plot() plt.subplot(211) plt.plot(strresid) # plot_acf(strresid , lags=20) LinjiuBox = stattools.q_stat(stattools.acf(strresid)[1:13], len(strresid)) print("LinjiuBox:", LinjiuBox[1][-1], '\n', LinjiuBox) # plt.subplot(212) print(model2.forecast(10)[0]) print(type(model2.forecast(10)[0])) test = pd.DataFrame(model2.forecast(10)[0]) test.index = logReturn1.index test.columns = ['forecast'] test['return1'] = logReturn1.values logReturn1.columns = ['return1'] print("test:\n", test) # logReturn1['forecast'] = pd.Series( model2.forecast(10)[0])
#取对数以增加平稳性 inData = np.log(inData) #–––––––––––––––––– #在进行运算之前可以对数据进行归一化,进而降低loss #scaler = MinMaxScaler() scaler = MinMaxScaler(feature_range=(0, 1)) inData = scaler.fit_transform(inData.reshape(-1, 1)) ''' #数据平稳化,使Adfuller指数小于-3.435298,并且p-value小于0.05 ADF = adfuller(inData.ravel(),1) print("ADF:") print(ADF) ''' stattools.q_stat(stattools.acf(inData)[1:13], len(inData))[1][-1] plot_acf(inData, lags=30) plot_pacf(inData, lags=30) #定阶ARMA(p,q) '''order = stattools.arma_order_select_ic(inData,max_ar=3,max_ma=3,ic=['aic','bic','hqic']) print("(p,q):") pq = order.bic_min_order print(order.bic_min_order)#(p,q)''' #将数据按站点分为SITE_SIZE组 site_names = [] #站点数据列表 site_cnames = [] #站点名字列表 for num in range(0, SITE_SIZE): site_cnames.append(df.at[num * DATA_SIZE, u'事发街道']) if num == 0:
import pandas as pd import matplotlib.pyplot as plt import numpy as np from statsmodels.tsa import stattools from arch import arch_model SHret = pd.read_table('TRD_IndexSum.txt', index_col='Trddt', sep='\t') SHret.index = pd.to_datetime(SHret.index) SHret = SHret.sort_index() plt.subplot(211) plt.plot(SHret**2) plt.xticks([]) plt.title('Squared Daily Return of SH Index') plt.subplot(212) plt.plot(np.abs(SHret)) plt.title('Absolute Daily Return of SH Index') LjungBox = stattools.q_stat(stattools.acf(SHret**2)[1:13], len(SHret)) print(LjungBox[1][-1]) am = arch_model(SHret) model = am.fit(update_freq=0) print(model.summary())
close.plot() plt.title('2014-2015年加權股價指數收盤指數時序圖 ') taiexRet.plot() plt.title('2014-2015年加權股價指數收益率指數時序圖') plot_acf(taiexRet,use_vlines=True,lags=30) plot_pacf(taiexRet,use_vlines=True,lags=30) plot_acf(close,use_vlines=True,lags=30) adf_taiexRet=ADF(taiexRet) print(adf_taiexRet.summary().as_text()) adfclose=ADF(close) print(adfclose.summary().as_text()) #生成純隨機序列 whiteNoise=np.random.standard_normal(size=500) #繪制該序列圖 plt.plot(whiteNoise,c='b') plt.title('White Noise') LjungBox1=stattools.q_stat(stattools.acf(taiexRet)[1:13],len(taiexRet)) LjungBox1 LjungBox1[1][-1] LjungBox2=stattools.q_stat(stattools.acf(close)[1:13],len(taiexRet)) LjungBox2[1][-1]
from statsmodels.graphics.tsaplots import plot_pacf plot_pacf(df["log_diff_12"].dropna(), method="ywmle", zero=False) # Method : tsa.seasonal_decompose() """ Seasonal decomposition using moving averages. """ help(tsa.seasonal_decompose) # Method : statsmodels.tsa.stattools.q_stat() """ Compute Ljung-Box Q Statistic. Returns q-statistics, p-value """ from statsmodels.tsa.stattools import q_stat q_stat(df["log_diff_12"].dropna(), nobs=len(df["log_diff_12"].dropna())) # Class : statsmodels.api.tsa.arima.ARIMA() """ ARIMA model """ m = statsmodels.api.tsa.arima.ARIMA(endog=df["value"], order=(1, 1, 0), exog=df["time"]) res = m.fit() print(res.summary()) # Method : statsmodels.api.qqplot() """ Q-Q plot of the quantiles of x versus the quantiles/ppf of a distribution. """
from statsmodels.tsa import stattools import matplotlib.pyplot as plt import numpy as np from arch import arch_model indexRet = pd.read_csv('index.csv', sep='\t') indexRet.index = pd.to_datetime(indexRet.Date) indexRet.head() np.unique(indexRet.CoName) taiexRet = indexRet.loc[indexRet.CoName == 'TSE Taiex '].ROI taiexRet.head() taiexRet.tail() taiexRet = taiexRet.astype(np.float).dropna() #繪制收益率平方序列圖 plt.subplot(211) plt.plot(taiexRet**2) plt.xticks([]) plt.title('Squared Daily Return of taiex') plt.subplot(212) plt.plot(np.abs(taiexRet)) plt.title('Absolute Daily Return of taiex') LjungBox = stattools.q_stat(stattools.acf(taiexRet**2)[1:13], len(taiexRet)) LjungBox[1][-1] am = arch_model(taiexRet) model = am.fit(update_freq=0) print(model.summary())
for code in codes: if ar>lines*columns: pdf.savefig(fig) fig = plt.figure() ar=1 fig.add_subplot(lines, columns, ar) closevalues = pd.Series(np.array(feed.getDataSeries(instrument=code).getCloseDataSeries())) tsaplots.plot_acf(np.log(closevalues).diff().dropna(), alpha=0.05, lags=40, ax=fig.gca(), title=code) ar += 1 pdf.savefig(fig) plt.close('all') ################################################################################################# #Trying to fit an ARMA model ################################################################################################# stattools.q_stat() stattools.acf() closevalues = pd.Series(np.array(feed.getDataSeries(instrument=codes[1]).getCloseDataSeries())) # arma = smapi.tsa.ARMA(np.array(np.log(closevalues).diff().dropna()), (0, 5)).fit(maxiter=10000) arma = smapi.tsa.ARMA(np.array(closevalues), (0, 5)).fit(maxiter=10000) pvalue = diagnostic.acorr_ljungbox(arma.resid, lags=[20])[1] print(pvalue) results = {} for code in codes: final_arma = None final_aic = 1e200 final_order = None for ar in range(1, 5): for ma in range(1, 5):
start = datetime.datetime(2018, 1, 1) end = datetime.datetime(2019, 6, 30) nasdaq = web.DataReader('^IXIC', 'yahoo', start, end) nasdaq = nasdaq.dropna() nasdaq = pd.DataFrame(nasdaq['Adj Close'].values, index=pd.to_datetime(nasdaq.index), columns=['Price']) nasdaq = (nasdaq - nasdaq.shift(1)) / nasdaq.shift(1) nasdaq = nasdaq.dropna() nasdaq.columns = ['Return'] traindata = nasdaq plt.subplot(211) plt.plot(traindata**2) plt.xticks([]) plt.subplot(212) plt.plot(np.abs(traindata)) from statsmodels.tsa import stattools LjungBox = stattools.q_stat(stattools.acf(traindata**2)[1:13], len(traindata)) LjungBox[1][-1] from arch import arch_model am = arch_model(traindata) model = am.fit(update_freq=0) print(model.summary())
import pandas as pd import matplotlib.pyplot as plt #5. CRSPday = pd.read_csv('Data/Part4/002/CRSPday.csv') ibm = CRSPday.ibm ibm.plot() from statsmodels.graphics.tsaplots import * plot_acf(ibm, lags=20) from statsmodels.tsa import stattools LjungBox = stattools.q_stat(stattools.acf(ibm)[1:13], len(ibm)) LjungBox[1][-1] #6. ge = CRSPday.iloc[:, 3] ge.plot() plot_acf(ge, lags=20) LjungBox = stattools.q_stat(stattools.acf(ge)[1:2], len(ge)) LjungBox[1][-1] LjungBox = stattools.q_stat(stattools.acf(ge)[1:9], len(ge)) LjungBox[1][-1] #7. SP500 = pd.read_csv('Data/Part4/002/SP500.csv') r500 = SP500.r500 r500.plot()
fig = plot_acf(SHClose, use_vlines=True, lags=30) fig.savefig("SHRet_acf.png") # 单位根检验 adfSHRet = ADF(SHRet) print(adfSHRet.summary().as_text()) adfSHClose = ADF(SHClose) print(adfSHClose.summary().as_text()) # 白噪声 whiteNoise = np.random.standard_normal(size=500) fig = plt.figure() plt.plot(whiteNoise, c="b") fig.savefig("whiteNoise.png") # 上证综指的白噪声检测 LB1 = stattools.q_stat(stattools.acf(SHRet)[1:13], len(SHRet)) print(LB1) print(LB1[1][-1]) LB2 = stattools.q_stat(stattools.acf(SHClose)[1:13], len(SHClose)) print(LB2[1][-1]) # ARMA建模 cpi = pd.read_csv("CPI.csv", index_col="time") cpi.index = pd.to_datetime(cpi.index) print(cpi.head()) print(cpi.shape) # 训练集 CPITrain = cpi[3:] # 绘制时序图 fig = plt.figure() plt.plot(cpi)
adfshindex = ADF(shindex) print(adfshindex.summary().as_text()) whitenoise = np.random.standard_normal(500) plt.plot(whitenoise,c = 'b') cpi = ts.get_cpi() cpi.index = pd.to_datetime(cpi['month']) cpi = cpi['cpi'] cpitrain = cpi['2016-01-01':'2000-01-01'] cpitrain.plot() #是否平稳 print(ADF(cpitrain,max_lags=10).summary().as_text()) #是否白噪声 ljb0 = stattools.q_stat(stattools.acf(cpitrain)[1:12],len(cpitrain)) ljb0[1][-1] #识别ARMA模型参数pq plot_acf(cpitrain,use_vlineEs=True,lags=30) plot_pacf(cpitrain,use_vlines=True,lags=30) model1 = arima_model.ARIMA(cpitrain.values,order=(1, 0, 1)).fit() model1.summary() p = np.arange(1,4) q = np.arange(1,4) result = dict() for i in p: for j in q: model1 = arima_model.ARIMA(cpitrain.values, order=(i, 0, j)).fit()
import pandas as pd #3. CRSPday = pd.read_csv('Data/Part4/004/CRSPday.csv') ibm = CRSPday.ibm ibm.plot() from statsmodels.graphics.tsaplots import * plot_acf(ibm**2, lags=20) from statsmodels.tsa import stattools LjungBox = stattools.q_stat(stattools.acf(ibm**2)[1:13], len(ibm)) LjungBox[1][-1] #4. import pandas_datareader.data as web import datetime as dt google = web.DataReader('GOOGL', 'yahoo', dt.datetime(2004, 1, 1), dt.datetime(2015, 12, 31)) google = google.asfreq('M', 'ffill', 'end') googleRet = (google.Close - google.Close.shift(1)) / google.Close.shift(1) googleRet = googleRet.dropna() googleRet.plot() plot_acf(googleRet, lags=20) plot_pacf(googleRet, lags=20) LjungBox = stattools.q_stat(stattools.acf(googleRet)[1:13], len(googleRet)) LjungBox[1][-1] (googleRet**2).plot()
trend='c', method='aeg', maxlag=None, autolag='aic', return_results=None) #return #coint_t : float #t-statistic of unit-root test on residuals #pvalue : float #MacKinnon’s approximate, asymptotic p-value based on MacKinnon (1994) #crit_value : dict #Critical values for the test statistic at the 1 %, 5 %, and 10 % levels based on regression curve. This depends on the number of observations '''Ljung-Box白噪声检验''' x = stattools.acf(array, nlags=40, unbiased=False, qstat=False, alpha=None) nobs = array stattools.q_stat(x, nobs, type='ljungbox') # x所检验的自相关系数序列 nobs计算自相关系数序列x所用的样本数n # return #检验的统计量array #p值的array 若p小于0.05 则拒绝原假设 则不是白噪声 存在自相关性 # %% # ================================================================ # 3. 回归 #================================================================= # OLS ---------------------------- '''OLS''' array1 = st.add_constant(array) model = st.OLS(array_y, array1).fit() print(model.summary()) print(model.fittedvalues) # fit values print(model.bic) # bic
logReturn=pd.Series((np.log(clprice))).diff().dropna() logReturn.plot() adf=ADF(logReturn,lags=6) print(adf.summary().as_text()) plot_acf(logReturn,lags=20) plot_pacf(logReturn,lags=20) model1=arima_model.ARIMA(logReturn.values,order=(0,0,2)).fit() model2=arima_model.ARIMA(logReturn.values,order=(2,0,0)).fit() model1.aic model2.aic import math stdresid=model2.resid/math.sqrt(model2.sigma2) stdresid.plot() plot_acf(stdresid,lags=20) LjungBox=stattools.q_stat(stattools.acf(stdresid)[1:13],len(stdresid)) LjungBox[1][-1] pd.Series(model2.forecast(10)[0]).plot()
def q_stat(self, timeseries): autocorrelation_coefs = stattools.acf(timeseries) result = stattools.q_stat(autocorrelation_coefs) QstatResult = namedtuple('QstatResult', 'statistic pvalue') return QstatResult(result[0], result[1])
CPI=CPI.sort_index() CPItrain=CPI[:-3] CPItrain.tail(n=3) CPItest = CPI[-3:] CPItest CPI.plot(title='CPI 2001-2014') from arch.unitroot import ADF CPItrain=CPItrain.dropna() print(ADF(CPItrain,max_lags=10).summary().as_text()) from statsmodels.tsa import stattools LjungBox=stattools.q_stat(stattools.acf(CPItrain)[1:12],len(CPItrain)) LjungBox[1][-1] from statsmodels.graphics.tsaplots import * import matplotlib.pyplot as plt #將畫面一分為二 axe1=plt.subplot(121) axe2=plt.subplot(122) #在第一個畫面中畫出序列的自相關係數圖 plot1=plot_acf(CPItrain,lags=30,ax=axe1) #在第二個畫面中畫出序列的偏自相關係數圖 plot2=plot_pacf(CPItrain,lags=30,ax=axe2) from statsmodels.tsa import arima_model #order表示建立的模型的階數,c(1,0,1)表示建立的是ARMA(1,1)模型;
#model = sm.tsa.ARMA(y, (2, 1)).fit(trend='nc', disp=0) #Descriptive TSA Statistics stools.adfuller(y) stools.kpss(y) #Plot ACF and PACF tplot.plot_acf(y) tplot.plot_pacf(y) #Fir ARMA Model tsmodel = sm.tsa.ARMA(y, (2, 1)).fit(trend='nc', disp=0) residuals = tsmodel.resid stools.q_stat(tsmodel.resid, nobs=len(tsmodel.resid)) fig = plt.figure() qq_ax = fig.add_subplot() sm.qqplot(y, line='s', ax=qq_ax) plt.show() ############################################################################### #5. Load package datasets. Use data(faithful) to import the waiting time (in min) # between eruptions and the duration (in min) of the eruption for the Old Faithful # geyser in Yellowstone National Park, Wyoming, USA. We would like to forecast # when the next ejection would be. # • The length of the samples? How many variables? # • Perform unit root tests on the data. Is there unit-root in the data? # • Plot ACF and PACF to determine the appropriate order # • Fitting the time series model