def unitroot_test(series): # Basic statistic plt.figure() plt.plot(series) plot_pacf(series) # ADF test # AIC & BIC from lags 12 to 1 print('$p$ & AIC & BIC \\\\') max_lags = 12 for lags in (max_lags - i for i in range(max_lags)): ar_model = AutoReg(series, lags, 'n') res = ar_model.fit() print(f'{lags} & {round(res.aic, 3)} & {round(res.bic, 3)} \\\\') # Best lags by `ar_select_order` sel = ar_select_order(series, max_lags, trend='n') lags = sel.ar_lags[-1] print(f'Lags selection: {sel.ar_lags}') # Start ADF test adf = ADF(series, lags) print(adf.summary()) # PP test pp_tau = PhillipsPerron(series, 3, test_type='tau') # q = 3 pp_rho = PhillipsPerron(series, 3, test_type='rho') # q = 3 print(pp_tau.summary()) print(pp_rho.summary())
def AugmentedDickeyFullerTest(data, printResults=True, trend=None, lags=None): options_Trend = trend if trend != None else {'nc','c','ct','ctt'} options_Lags = lags if lags != None else {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24} #options_LagMethod = lagMethod if lagMethod != None else {'AIC', 'BIC', 't-stat', None} results = dict() for column in data.columns: print("Augmented Dickey Fuller test for column: " + column) results_Trend = dict() for option_Trend in options_Trend: results_Lag = dict() for option_Lag in options_Lags: result = ADF(data[column].dropna(), trend=option_Trend, lags=option_Lag) if printResults: result.summary() results_Lag[option_Lag] = result results_Trend[option_Trend] = results_Lag results[column] = results_Trend return results
def UnitRootTest(ret, isdefaultmethod=True, isprintsummary=False): """ 進行 Augmented Dickey-Fuller (ADF) 單根檢定 若接受虛無假設則表示有單根,序列為非定態 (non-stationary),回傳值為False 若拒絕虛無假設則表示無單根,序列為弱定態 (weakly stationary),回傳值為True """ # 除非進行除錯,否則建議參數使用預設值 if isdefaultmethod: # default & recommended in the current version results = ADF(ret, lags = None, max_lags = None, \ trend = 'c', method = "AIC") if isprintsummary: print(results.summary().as_text()) if results.pvalue < PVal: # ADF測試對應p-value小於5% if isprintsummary: print("- - - - - - - - - - - - - - - - - - - - - - - - - -") print("ADF stats p-value < %.2f -> weakly stationary!" % PVal) print("- - - - - - - - - - - - - - - - - - - - - - - - - -") return True else: if isprintsummary: print("- - - - - - - - - - - - - - - - - - - - - - - - - -") print("ADF stats p-value > %.2f -> non-stationary!" % PVal) print("- - - - - - - - - - - - - - - - - - - - - - - - - -") return False else: # future preference results = stattools.adfuller(ret, maxlag = None, \ regression = 'c', autolag = "AIC", \ store = False, regresults = False) # returns: adfstat, pvalue, usedlag, nobs, critvalues(, icbest) if isprintsummary: print("ADF test statistics: ", results[0]) print("MacKinnon approximated p-value: ", results[1]) print("# of lags used: ", results[2]) print("# of obs. used for ADF test: ", results[3]) print("Critical values for p-value = 0.01, 0.05, and 0.10: ") print(results[4]["1%"]) print(results[4]["5%"]) print(results[4]["10%"]) print("The best information criterion (min. AIC): ", results[5]) if results[1] < PVal: # ADF測試對應p-value小於5% if isprintsummary: print("- - - - - - - - - - - - - - - - - - - - - - - - - -") print("ADF stats p-value < %.2f -> weakly stationary!" % PVal) print("- - - - - - - - - - - - - - - - - - - - - - - - - -") return True else: if isprintsummary: print("- - - - - - - - - - - - - - - - - - - - - - - - - -") print("ADF stats p-value > %.2f -> non-stationary!" % PVal) print("- - - - - - - - - - - - - - - - - - - - - - - - - -") return False
def ADF_test(self, df_ts, lags=None): if lags == 'None': try: adf = ADF(df_ts) except: adf = ADF(df_ts.dropna()) else: try: adf = ADF(df_ts) except: adf = ADF(df_ts.dropna()) adf.lags = lags print(adf.summary().as_text()) return adf
def test(): df = merge_prices(get_price('300033'), get_price('300059')) train_df, test_df = split_by_date(df, '2015-12-31') x, y = train_df.close_x, train_df.close_y # print(ADF(x).summary()) # print(ADF(y).summary()) # print(ADF(np.diff(x)).summary()) # print(ADF(np.diff(y)).summary()) modol = sm.OLS(y, sm.add_constant(x)) result = modol.fit() # print(result.summary()) residual = result.resid adf = ADF(residual) print(adf.summary())
def time_series(self): # 时间序列 rate1 = self.rate # 计算自相关系数 acfs = stattools.acf(rate1) # 计算偏自相关系数 pacfs = stattools.pacf(rate1) # 绘制自相关系数图 plot_acf(rate1, use_vlines=True, lags=30) # 绘制偏自相关系数图 plot_pacf(rate1, use_vlines=True, lags=30) # 平稳性 1 看时序图 2 看自相关和偏自相关 3 单位根检验DF ADF PP检验 # ADF检验 adfrate = ADF(rate1) print(adfrate.summary().as_text()) pass
def ADF_test(df_ts, lags=None): """ ADF from arch formula: xt-xt-1 ~ b0 + (b1-1)*xt-1 + e test if b1-1 == 0 ~ DF statistics :param df_ts: :param lags: :return: """ if lags == 'None': try: adf = ADF(df_ts) except: adf = ADF(df_ts.dropna()) else: try: adf = ADF(df_ts) except: adf = ADF(df_ts.dropna()) adf.lags = lags print(adf.summary().as_text()) return adf
@author: Liu """ import numpy as np import pandas as pd import pandas_datareader as web import matplotlib.pyplot as plt import datetime start=datetime.datetime(2018,1,1) end=datetime.datetime(2019,6,30) nasdaq=web.DataReader('^IXIC','yahoo',start,end) nasdaq=nasdaq.dropna() nasdaq=pd.DataFrame(nasdaq['Adj Close'].values,index=pd.to_datetime(nasdaq.index),columns=['Price']) nasdaq=(nasdaq-nasdaq.shift(1))/nasdaq.shift(1) nasdaq=nasdaq.dropna() nasdaq.columns=['Return'] nasdaq.tail(3) traindata=nasdaq[:-3] from arch.unitroot import ADF result=ADF(traindata.Return,max_lags=10) print(result.summary().as_text()) from statsmodels.tsa import stattools LjungBox=stattools.q_stat(stattools.acf(traindata)[1:12],len(traindata)) LjungBox[1][-1] import statsmodels.graphics.tsaplots as ts ts.plot_acf(traindata,use_vlines=True, lags=30)
if priceX is None or priceY is None: print('缺少价格序列.') returnX = (priceX - priceX.shift(1)) / priceX.shift(1)[1:] returnY = (priceY - priceY.shift(1)) / priceY.shift(1)[1:] standardX = (returnX + 1).cumprod() standardY = (returnY + 1).cumprod() SSD = np.sum((standardX - standardY)**2) return SSD dis = SSD(PAf, PBf) print(dis) PAflog = np.log(PAf) adfA = ADF(PAflog) print(adfA.summary().as_text()) retA = PAflog.diff()[1:] adfretA = ADF(retA) print(adfretA.summary().as_text()) PBflog = np.log(PBf) adfB = ADF(PBflog) print(adfB.summary().as_text()) retB = PBflog.diff()[1:] adfretB = ADF(retB) print(adfretB.summary().as_text()) PAflog.plot(label='601988', style='--') PBflog.plot(label='600000', style='-') plt.legend(loc='upper left') plt.title('中国银行与浦发银行的对数价格时序图')
# 上证综指的平稳性 SHClose = SHindex.Clsindex fig = plt.figure() SHClose.plot() fig.savefig("stabibly.png") fig = plt.figure() SHRet.plot() fig.savefig("SHret.png") fig = plt.figure() fig = plot_acf(SHClose, use_vlines=True, lags=30) fig.savefig("SHRet_acf.png") # 单位根检验 adfSHRet = ADF(SHRet) print(adfSHRet.summary().as_text()) adfSHClose = ADF(SHClose) print(adfSHClose.summary().as_text()) # 白噪声 whiteNoise = np.random.standard_normal(size=500) fig = plt.figure() plt.plot(whiteNoise, c="b") fig.savefig("whiteNoise.png") # 上证综指的白噪声检测 LB1 = stattools.q_stat(stattools.acf(SHRet)[1:13], len(SHRet)) print(LB1) print(LB1[1][-1]) LB2 = stattools.q_stat(stattools.acf(SHClose)[1:13], len(SHClose)) print(LB2[1][-1])
def do_pair_trading_org(self): sh = pd.read_csv('./data/sh50p.csv', index_col='Trddt') sh.index = pd.to_datetime(sh.index) #配对交易实测 #提取形成期数据 formStart = '2014-01-01' formEnd = '2015-01-01' PA = sh['601988'] PB = sh['600000'] PAf = PA[formStart:formEnd] PBf = PB[formStart:formEnd] #形成期协整关系检验 #一阶单整检验 log_PAf = np.log(PAf) adfA = ADF(log_PAf) print(adfA.summary().as_text()) adfAd = ADF(log_PAf.diff()[1:]) print(adfAd.summary().as_text()) # B股票平稳性检查 log_PBf = np.log(PBf) adfB = ADF(log_PBf) print(adfB.summary().as_text()) adfBd = ADF(log_PBf.diff()[1:]) print(adfBd.summary().as_text()) # #协整关系检验 model = sm.OLS(log_PBf, sm.add_constant(log_PAf)).fit() print('model:\r\n{0}'.format(model.summary())) alpha = model.params[0] print('alpha={0};'.format(alpha)) beta = model.params[1] print('beta={0}'.format(beta)) #残差单位根检验 spreadf = log_PBf - beta * log_PAf - alpha adfSpread = ADF(spreadf) print(adfSpread.summary().as_text()) # mu = np.mean(spreadf) sd = np.std(spreadf) # #设定交易期 tradeStart = '2015-01-01' tradeEnd = '2015-06-30' PAt = PA[tradeStart:tradeEnd] PBt = PB[tradeStart:tradeEnd] CoSpreadT = np.log(PBt) - beta * np.log(PAt) - alpha print('CoSpreadT: {0};'.format(CoSpreadT.describe())) plt.rcParams['font.family'] = 'sans-serif' plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False CoSpreadT.plot() plt.title('交易期价差序列(协整配对)') plt.axhline(y=mu, color='black') plt.axhline(y=mu + 0.2 * sd, color='blue', ls='-', lw=2) plt.axhline(y=mu - 0.2 * sd, color='blue', ls='-', lw=2) plt.axhline(y=mu + 1.5 * sd, color='green', ls='--', lw=2.5) plt.axhline(y=mu - 1.5 * sd, color='green', ls='--', lw=2.5) plt.axhline(y=mu + 2.5 * sd, color='red', ls='-.', lw=3) plt.axhline(y=mu - 2.5 * sd, color='red', ls='-.', lw=3) plt.show() # level = (float('-inf'), mu - 2.5 * sd, mu - 1.5 * sd, mu - 0.2 * sd, mu + 0.2 * sd, mu + 1.5 * sd, mu + 2.5 * sd, float('inf')) print('level: {0}={1}'.format(type(level), level)) # prcLevel = pd.cut(CoSpreadT, level, labels=False) - 3 print('prcLevel: {0}'.format(prcLevel.head())) signal = self.TradeSig(prcLevel) print('signal: {0}={1}'.format(type(signal), signal)) # position position = [signal[0]] ns = len(signal) for i in range(1, ns): position.append(position[-1]) if signal[i] == 1: position[i] = 1 elif signal[i] == -2: position[i] = -1 elif signal[i] == -1 and position[i - 1] == 1: position[i] = 0 elif signal[i] == 2 and position[i - 1] == -1: position[i] = 0 elif signal[i] == 3: position[i] = 0 elif signal[i] == -3: position[i] = 0 position = pd.Series(position, index=CoSpreadT.index) print('position: {0}'.format(position.tail())) # account = self.TradeSim(alpha, beta, PAt, PBt, position) print('account: {0}'.format(account.tail())) # account.iloc[:, [0, 1, 4]].plot(style=['--', '-', ':']) plt.title('配对交易账户') plt.show()
def test_adf_lags_10(self): adf = ADF(self.inflation, lags=10) assert_almost_equal(adf.stat, -2.28375, DECIMAL_4) adf.summary()
garch_plot1(data['Close']) lh = np.log(data / data.shift(1)).dropna() # d 1 garch_plot1(lh['Close']) print('Lean Hogs Future skewness is {}'.format(lh.skew(axis=0)[0])) print('Lean Hogs Future kurtosis is {}'.format(lh.kurtosis(axis=0)[0])) sns.distplot(lh['Close'], color='blue') #density plot plt.title('1986–2018 Lean Hogs Future return frequency') plt.xlabel('Possible range of data values') # Pull up summary statistics print(lh.describe()) adf = ADF(lh['Close']) print(adf.summary().as_text()) kpss = KPSS(lh['Close']) print(kpss.summary().as_text()) dfgls = DFGLS(lh['Close']) print(dfgls.summary().as_text()) pp = PhillipsPerron(lh['Close']) print(pp.summary().as_text()) za = ZivotAndrews(lh['Close']) print(za.summary().as_text()) vr = VarianceRatio(lh['Close'], 12) print(vr.summary().as_text()) from arch import arch_model X = 100 * lh
plot_pacf(taiexRet,use_vlines=True,lags=30) close.plot() plt.title('2014-2015年加權股價指數收盤指數時序圖 ') taiexRet.plot() plt.title('2014-2015年加權股價指數收益率指數時序圖') plot_acf(taiexRet,use_vlines=True,lags=30) plot_pacf(taiexRet,use_vlines=True,lags=30) plot_acf(close,use_vlines=True,lags=30) adf_taiexRet=ADF(taiexRet) print(adf_taiexRet.summary().as_text()) adfclose=ADF(close) print(adfclose.summary().as_text()) #生成純隨機序列 whiteNoise=np.random.standard_normal(size=500) #繪制該序列圖 plt.plot(whiteNoise,c='b') plt.title('White Noise') LjungBox1=stattools.q_stat(stattools.acf(taiexRet)[1:13],len(taiexRet)) LjungBox1 LjungBox1[1][-1]
#table2.to_csv('Table_2.csv') ### Table 3 #JC1.iloc[:, 2] = JC1.iloc[:, 2]*(-1) x = JC1[['growth', 'VIX', 'BDI']] x = sm.add_constant(x) y = JC1['ore_price'] model_t3 = sm.OLS(y, x).fit(cov_type='HC1') model_t3.summary() residual_t3 = model_t3.resid adf_resi_t3 = ADF(residual_t3) adf_resi_t3.summary() # Table for coefficient table3_1 = pd.DataFrame( index=['theta_0', 'theta_1', 'theta_2', 'theta_3'], columns=['Estimated Coefficient', 'Standard Error', 't-stats', 'p-Value']) t3_1_cols = table3_1.columns.tolist() table3_1[t3_1_cols[0]] = model_t3.params.tolist() table3_1[t3_1_cols[1]] = model_t3.bse.tolist() table3_1[t3_1_cols[2]] = model_t3.tvalues.tolist() table3_1[t3_1_cols[3]] = model_t3.pvalues.tolist() # Table for F-stat, R^2, ADF table3_2 = pd.DataFrame(index=[ 'F-statistic', 'F-stat-pvalue', 'R^2', 'ADF-statistics Residual Value',
baiyun = pd.read_csv('data/baiyun.csv') baiyun.index = pd.to_datetime(baiyun.Date) # baiyun = baiyun.sort_index(); bclose = baiyun.Close # print(bclose) logReturn_all = pd.Series(np.log(bclose)).diff().dropna() logReturn1 = logReturn_all[-10:] logReturn = logReturn_all[:-10] print(logReturn_all.shape, logReturn.shape, logReturn1.shape) print(logReturn1) # logReturn.plot() adfReutn = ADF(logReturn, lags=6) print(adfReutn.summary().as_text()) # plot_acf(logReturn, lags=20) # plot_pacf(logReturn, lags=20) model1 = arima_model.ARIMA(logReturn.values, order=(2, 0, 0)).fit() model2 = arima_model.ARIMA(logReturn.values, order=(0, 0, 2)).fit() print("model1.aic=%f \t model2.aic=%f" % (model1.aic, model2.aic)) print(model2.sigma2) strresid = model2.resid / math.sqrt(model2.sigma2) # strresid.plot() plt.subplot(211) plt.plot(strresid) # plot_acf(strresid , lags=20) LinjiuBox = stattools.q_stat(stattools.acf(strresid)[1:13], len(strresid))
# # plot_acf(ibm, use_vlines=True, lags=30) # plot_acf(ibm, use_vlines=True, lags=20) # adfIBM = ADF(ibm) # print(adfIBM.summary().as_text()) # LinjiuBox = stattools.q_stat(stattools.acf(ibm)[1:13], len(ibm)) # print(LinjiuBox[1][-1]) # 第6题 # ge = CRSP.iloc[:,3] # # ge.plot() # plot_acf(ge, use_vlines=True, lags=20) # LinjiuBox = stattools.q_stat(stattools.acf(ge)[1:2], len(ge)) # print('lag=2',LinjiuBox[1][-1]) # LinjiuBox = stattools.q_stat(stattools.acf(ge)[1:9], len(ge)) # print('lag=9',LinjiuBox[1][-1]) # 第7题 r500 = SP500.r500 print(r500.head()) plt.subplot(221) r500.plot(subplots=True) plot_acf(r500, lags=20) plot_pacf(r500, lags=20) adfIBM = ADF(r500) print(adfIBM.summary().as_text()) plt.show()
shindex = index['close'] shindex = shindex.diff(-1)/shindex.shift(-1) shindex.index = pd.to_datetime(shindex.index) shindex = shindex.dropna() acf = stattools.acf(shindex) pacf = stattools.pacf(shindex) plot_acf(shindex,use_vlines=True,lags=30) plot_pacf(shindex,use_vlines=True,lags=30) shindex.plot() adfshindex = ADF(shindex) print(adfshindex.summary().as_text()) whitenoise = np.random.standard_normal(500) plt.plot(whitenoise,c = 'b') cpi = ts.get_cpi() cpi.index = pd.to_datetime(cpi['month']) cpi = cpi['cpi'] cpitrain = cpi['2016-01-01':'2000-01-01'] cpitrain.plot() #是否平稳 print(ADF(cpitrain,max_lags=10).summary().as_text()) #是否白噪声 ljb0 = stattools.q_stat(stattools.acf(cpitrain)[1:12],len(cpitrain)) ljb0[1][-1]
def many_paras(): """ p-value本质是控制假阳性率(False positive rate,FPR) q-value 控制的是FDR (false discovery rate) Q-statistic: Qlb=T*(T+2)*sigma(j=1,p)(rj^2/(T-j)) rj残差序列,j阶自相关系数,T观测值的个数,p滞后阶数。 FDR = E(V/R) 错误发现次数V,总的拒绝次数R acf: 自相关系数 -- y(t)= a0 + a1*y(t-1) + epsilon p(x(i)|x(i-h)) :sigma(i=1,n-h) ((x(i)-mu)*(x(i+h)-mu)/sigma(i=1,n) ((x(i)-mu)^2)) pacf: 偏自相关系数,k-1个时间滞后 作为已知,只求k -- y(t)= a0 + a1*y(t-1) + ... a1*y(t-k) + epsilon p(x(i)..x(i-k)|x(i-1)x(i-k+1)) : ARMA(p,q): AR代表p阶自回归过程,MA代表q阶移动平均过程 ARIMA模型是在ARMA模型的基础上多了差分的操作。 ADF: 白噪声随机干扰项的一阶自回归过程。用单位根 检验,存在就是非平稳。y(t)= mu + fi*y(t-1) + epsilon。p阶要求 p个根的和小于1。 Sma: 移动平均 wma: 加权移动平均 ema: 指数移动平均 ewma: 指数加权移动平均 OBV: On Balance Volume, 多空比率净额= [(收盘价-最低价)-(最高价-收盘价)] ÷( 最高价-最低价)×V :return: """ # 1. 计算自相关系数 acfs = stattools.acf(SHRet) # 绘制自相关系数图 plot_acf(SHRet, use_vlines=True, lags=30) # 2. 计算偏自相关系数 pacfs = stattools.pacf(SHRet) plot_pacf(SHRet, use_vlines=True, lags=30) # 3. 进行ADF单位根检验,并查看结果; adfSHRet = ADF(SHRet) print(adfSHRet.summary().as_text()) # 4. Q 统计 LjungBox1 = stattools.q_stat(stattools.acf(SHRet)[1:13], len(SHRet)) print(LjungBox1) # 5. lag即为上述检验表达式中的m,在这里我们选择检验12阶的自相关系数。 LjungBox = stattools.q_stat(stattools.acf(CPItrain)[1:12], len(CPItrain)) # order表示建立的模型的阶数,c(1,0,1)表示建立的是ARMA(1,1)模型; # 中间的数字0表示使用原始的、未进行过差分(差分次数为0)的数据; model1 = arima_model.ARIMA(CPItrain, order=(1, 0, 1)).fit() model1.summary() model1.conf_int() # 6. 绘制时间序列模拟的诊断图 stdresid = model1.resid / math.sqrt(model1.sigma2) plt.plot(stdresid) plot_acf(stdresid, lags=20) LjungBox = stattools.q_stat(stattools.acf(stdresid)[1:13], len(stdresid)) print(LjungBox[1][-1]) print(model1.forecast(3)[0]) # 7. Autoregressive conditional heteroskedasticity model 自回归条件异方差模型 # y(t)=b*x(t)+epsilon(t) # epsilon(t)^2=a0+a1*epsilon(t-1)^2+a2*epsilon(t-2)^2+n(t) # \sigma_t^{2}=\omega+\sum_{i=1}^{p}\alpha_{i}\epsilon_{t-i}^{2} # n(t)独立同分布 期望为0,var(n^2)=r^2 am = arch_model(SHret) model = am.fit(update_freq=0) print(model.summary()) # 8. 对子 的 处理 pt = TradeTool() SSD = pt.SSD(priceAf, priceBf) SSDspread = pt.SSDSpread(priceAf, priceBf) SSDspread.describe() coefficients = pt.cointegration(priceAf, priceBf) CoSpreadF = pt.CointegrationSpread(priceA, priceB, formPeriod, formPeriod) CoSpreadTr = pt.CointegrationSpread(priceA, priceB, formPeriod, tradePeriod) CoSpreadTr.describe() bound = pt.calBound(priceA, priceB, 'Cointegration', formPeriod, width=1.2) # 9. 配对 选点 trtl = TradeTool() account = trtl.TradeSimPair(PAt, PBt, position) # 10. momentum function et = ElementTool() et.momentum(Close, 5).tail(n=5) momen35 = et.momentum(Close, 35) signal = [] for i in momen35: if i > 0: signal.append(1) else: signal.append(-1) signal = pd.Series(signal, index=momen35.index) signal.head() tradeSig = signal.shift(1) ret = Close / Close.shift(1) - 1 # ret=ret['2014-02-20':] # ret.head(n=3) Mom35Ret = ret * (signal.shift(1)) Mom35Ret[0:5] real_Mom35Ret = Mom35Ret[Mom35Ret != 0] real_ret = ret[ret != 0] Rsi12 = et.rsi(BOCMclp, 12) # 策略 rsi6 = et.rsi(BOCMclp, 6) rsi24 = et.rsi(BOCMclp, 24) # rsi6捕捉买卖点 Sig1 = [] for i in rsi6: if i > 80: Sig1.append(-1) elif i < 20: Sig1.append(1) else: Sig1.append(0) date1 = rsi6.index Signal1 = pd.Series(Sig1, index=date1) Signal1[Signal1 == 1].head(n=3) Signal1[Signal1 == -1].head(n=3) Signal2 = pd.Series(0, index=rsi24.index) lagrsi6 = rsi6.shift(1) lagrsi24 = rsi24.shift(1) for i in rsi24.index: if (rsi6[i] > rsi24[i]) & (lagrsi6[i] < lagrsi24[i]): Signal2[i] = 1 elif (rsi6[i] < rsi24[i]) & (lagrsi6[i] > lagrsi24[i]): Signal2[i] = -1 signal = Signal1 + Signal2 signal[signal >= 1] = 1 signal[signal <= -1] = -1 signal = signal.dropna() tradSig = signal.shift(1) tt = TradeTool() BuyOnly = tt.strategy_analy(buy, ret) SellOnly = tt.strategy_analy(sell, ret) Trade = tt.strategy_analy(tradSig, ret) Test = pd.DataFrame({ "BuyOnly": BuyOnly, "SellOnly": SellOnly, "Trade": Trade }) # 累计收益率 cumStock = np.cumprod(1 + ret) - 1 cumTrade = np.cumprod(1 + tradeRet) - 1 # 12. 移动平均线 sma5 = et.smaCal(Close, 5) # 12. 加权移动平均线 wma5 = et.wmaCal(Close, w) # 12. 指数移动平均线 Ema = et.emaCal(Close, period) print(Ema) # 12. 指数加权移动平均线 Ewma = et.ewmaCal(Close, 5, 0.2) # 13. 布林带 UnicomBBands = et.bbands(Close, 20, 2) print(UnicomBBands) multiplier = [1, 1.65, 1.96, 2, 2.58] price2010 = Close['2010-01-04':'2010-12-31'] tt.CalBollRisk(price2010, multiplier) # 14. 性能 btt = BackTestTool() Performance1 = btt.perform(Close, tradSignal1) print(Performance1) # 15. 交易, 回测 KDtrade = btt.trade(KDSignal, close) btt.backtest(KDtrade.Ret, KDtrade.KDtradeRet) # 16. 上下突破 KDupbreak = et.upbreak(KValue, DValue) * 1 KDupbreak[KDupbreak == 1].head() KDdownbreak = et.downbreak(KValue, DValue) * 1 KDdownbreak[KDdownbreak == 1].head() # "金叉"与"死叉"交易策略绩效表现 btt.backtest(KDbreak.Ret, KDbreak.KDbreakRet) # 17. 成交量指标 cumUpVol = et.VOblock(UpVol) cumDownVol = et.VOblock(DownVol) ALLVol = np.array([cumUpVol, cumDownVol]).transpose() # 18. 判断持有 hold = tt.judge_hold(trade) # 19. 单交易 TradeAccount = tt.TradeSim(close, hold) print(TradeAccount)
Skew: -0.206 Prob(JB): 0.0558 Kurtosis: 2.486 Cond. No. 148. ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. """ beta = result.params[1] # 残差 spread = PAlog - beta * ZSlog print(spread.head()) spread.plot() # 残差的均值为0,tread设置为c adfspread = ADF(spread, trend='c') print(adfspread.summary().as_text()) """ Name: close_price, dtype: float64 Augmented Dickey-Fuller Results ===================================== Test Statistic -1.429 P-value 0.568 Lags 0 ------------------------------------- Trend: Constant Critical Values: -3.45 (1%), -2.87 (5%), -2.57 (10%) Null Hypothesis: The process contains a unit root. Alternative Hypothesis: The process is weakly stationary. 不能拒绝原假设,误差太大了
numbers.plot() plt.show() plot_acf(numbers,lags=20) from statsmodels.tsa import stattools stattools.arma_order_select_ic(numbers.values,max_ma=4) #7. zgsy=pd.read_csv('Data/Part4/003/zgsy.csv') clprice=zgsy.iloc[:,4] clprice.plot() plot_acf(clprice,lags=20) from arch.unitroot import ADF adf=ADF(clprice,lags=6) print(adf.summary().as_text()) logReturn=pd.Series((np.log(clprice))).diff().dropna() logReturn.plot() adf=ADF(logReturn,lags=6) print(adf.summary().as_text()) plot_acf(logReturn,lags=20) plot_pacf(logReturn,lags=20) from statsmodels.tsa import arima_model model1=arima_model.ARIMA(logReturn.values,order=(0,0,1)).fit() model1.summary() model2=arima_model.ARIMA(logReturn.values,order=(1,0,0)).fit()
from sklearn.metrics import mean_squared_error as mse from sklearn.metrics import r2_score as r2 from arch.unitroot import ADF def parser(x): return datetime.strptime(x,'%d/%m/%Y') data = pd.read_csv('Bitcoin-Train.csv',index_col=0,parse_dates= [0],date_parser = parser) test_data = pd.read_csv('Bitcoin-Test.csv',index_col=0,parse_dates = [0],date_parser = parser) # Augmented Dickey Fuller Test for Stationarity adf_test = ADF(data,regression = 'c') adf_test.lags = 2 adf.trend = 'c' print(adf_test.summary().as_text()) plt.plot(data) # ACF and PACF of Raw data plot_acf(data) plot_pacf(data) ### Stationarity Conversion ### data_diff = data.diff(periods = 1) data_diff = data_diff[1:] ### ACF and PACF for differenced Time Series ### plot_acf(data_diff) plot_pacf(data_diff)
def series(self): rate = self.rate_log fig = plt.figure(figsize=(20, 10), dpi=80, facecolor=[199 / 255, 238 / 255, 206 / 255], edgecolor='g') fig.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.15) ax = fig.add_subplot(1, 1, 1) ax.set_facecolor([199 / 255, 238 / 255, 206 / 255]) min_ticks = math.floor(len(rate) / 30) ax.set_xlabel('time') ax.set_ylabel('price') xtick = [] for i in range(0, len(rate), min_ticks): xtick.append(i) ax.set_xticks(xtick) xlable = [] for i in xtick: xlable.append(str(rate.index[i])) ax.set_xticklabels(xlable, rotation=90) ext = math.floor(0.01 * len(rate)) ax.set_xlim((0 - ext, len(rate) + ext)) ax.plot(rate, color='blue', label='origin') ax.legend(loc='best') ax.set_title("M Rate Series") # 计算自相关系数 plot_acf(rate, use_vlines=True, lags=30) plot_acf(abs(rate), use_vlines=True, lags=30) # 自相关 plot_acf(rate**2, use_vlines=True, lags=30) # 偏相关 plot_pacf(rate, use_vlines=True, lags=30) acfs = stattools.acf(rate) # 计算偏自相关系数 pacfs = stattools.pacf(rate) """ 1 通过建立均值方程或计量经济学模型,消除线性依赖 2 对方程残差进行ARCH效应检验 3 如果具有ARCH效应,建立波动率模型 4 检验拟合的模型,如有必要进行改进 """ # 平稳性 1 看时序图 2 看自相关和偏自相关 3 单位根检验DF ADF PP检验 # ADF检验 adfrate = ADF(rate) print(adfrate.summary().as_text()) plot_pacf(rate**2, use_vlines=True, lags=30) model3 = arch.arch_model(rate, mean='Constant', vol='ARCH',p=3) res = model3.fit() res.plot() model3 = arch.arch_model(rate, mean='Constant', vol='ARCH', p=10) res = model3.fit() res.plot() model3 = arch.arch_model(rate, mean='Constant', vol='GARCH') res = model3.fit() res.plot() close = self.data['收盘价'] fig = plt.figure(figsize=(20, 10), dpi=80, facecolor=[199 / 255, 238 / 255, 206 / 255], edgecolor='g') fig.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.15) ax = fig.add_subplot(1, 1, 1) ax.set_facecolor([199 / 255, 238 / 255, 206 / 255]) min_ticks = math.floor(len(close) / 30) ax.set_xlabel('time') ax.set_ylabel('price') xtick = [] for i in range(0, len(close), min_ticks): xtick.append(i) ax.set_xticks(xtick) xlable = [] for i in xtick: xlable.append(str(close.index[i])) ax.set_xticklabels(xlable, rotation=90) ext = math.floor(0.01 * len(close)) ax.set_xlim((0 - ext, len(close) + ext)) ax.plot(close, color='blue', label='close') ax2 = ax.twinx() pl2 = ax2.plot(res.conditional_volatility, color='m', linestyle='-', label='volatility') ax.legend(loc=1) ax2.legend(loc=2) # 画收盘线,然后画收益率小概率尾部事件发生的位置 rate = res.conditional_volatility rate_right = rate[rate > rate.quantile(0.95)] for i in range(0, len(rate_right)): x = rate[rate.index == rate_right.index[i]] x[0] = ax.get_ylim()[0] * 1.05 ax.plot(x, color='red', marker='*') x[0] = close[x.index[0]] ax.plot(x, color='red', marker='*') rate_left = rate[rate < rate.quantile(0.05)] for i in range(0, len(rate_left)): x = rate[rate.index == rate_left.index[i]] x[0] = ax.get_ylim()[0] * 1.03 ax.plot(x, color='green', marker='^') x[0] = close[x.index[0]] ax.plot(x, color='green', marker='^') ax.set_xticks(xtick) ax.set_xticklabels(xlable, rotation=90) ax.set_title("GARCH model tail vs close price")