def test_is_I1(price, alpha=0.05): adf_price = ADF(price) adf_return = ADF(np.diff(price)) adf_price_is_pass = adf_price.pvalue < alpha adf_return_is_pass = adf_return.pvalue < alpha if (adf_price_is_pass == False) and (adf_return_is_pass == True): is_I1 = True else: is_I1 = False return is_I1
def test_trends_low_memory(trend): rnd = np.random.RandomState(12345) y = np.cumsum(rnd.standard_normal(250)) adf = ADF(y, trend=trend, max_lags=16) adf2 = ADF(y, trend=trend, low_memory=True, max_lags=16) assert adf.lags == adf2.lags assert adf.max_lags == 16 with pytest.warns(FutureWarning, match="Mutating unit root"): adf.max_lags = 1 assert_equal(adf.lags, 1) assert_equal(adf.max_lags, 1)
def ADF_test(self, df_ts, lags=None): if lags == 'None': try: adf = ADF(df_ts) except: adf = ADF(df_ts.dropna()) else: try: adf = ADF(df_ts) except: adf = ADF(df_ts.dropna()) adf.lags = lags print(adf.summary().as_text()) return adf
def test_adf_buggy_timeseries2(): x = np.asarray([0, 0]) adf = ADF(x) # IndexError: index 0 is out of bounds for axis 0 with size 0 msg = "A minimum of 4 observations are needed" with pytest.raises(InfeasibleTestException, match=msg): assert np.isfinite(adf.stat)
def check_cointegration(self): results = [] pairs_series = {} total_pairs_length = len(self.pairs_list) for i, pair in enumerate(self.pairs_list): x, y = self.log_price.loc[:, pair].values.T pair_name = "|".join(pair) pair_id = "|".join(sorted(pair)) residuals = self._get_residuals(x, y) adf_test = ADF(residuals, lags=1) p_value = adf_test.pvalue test_stat = adf_test.stat results.append({ "id": pair_id, "p_value": p_value, "stat": test_stat, "pair": pair_name }) pairs_series[pair_name] = residuals current = (i + 1) print( f"{current}/{total_pairs_length} ({current / total_pairs_length:.2%})", end="\r", flush=True) pairs_series = pd.DataFrame(pairs_series, index=self.price.index) results = pd.DataFrame(results).set_index("id") results = results.sort_values( "p_value", ascending=False).groupby(level=0).first() self.cointegration_result = results.set_index("pair") valid_pairs = [s.split("|") for s in results.index] self.cointegrated_pairs_list = valid_pairs self.spreads = pairs_series
def test_adf_critical_value(self): adf = ADF(self.inflation, trend="c", lags=3) adf_cv = adf.critical_values temp = polyval(tau_2010["c"][0, :, ::-1].T, 1.0 / adf.nobs) cv = {"1%": temp[0], "5%": temp[1], "10%": temp[2]} for k, v in cv.items(): assert_almost_equal(v, adf_cv[k])
def test_adf_critical_value(self): adf = ADF(self.inflation, trend='c', lags=3) adf_cv = adf.critical_values temp = polyval(tau_2010['c'][0, :, ::-1].T, 1. / adf.nobs) cv = {'1%': temp[0], '5%': temp[1], '10%': temp[2]} for k, v in iteritems(cv): assert_almost_equal(v, adf_cv[k])
def test_low_memory_singular(): x = np.zeros(1000) x[:3] = np.random.standard_normal() x[-3:] = np.random.standard_normal() match = "The maximum lag you are" with pytest.raises(InfeasibleTestException, match=match): ADF(x, max_lags=10, low_memory=True).stat
def test_adf_buggy_timeseries1(): x = np.asarray([0]) adf = ADF(x) # ValueError: maxlag should be < nobs msg = "A minimum of 4 observations are needed" with pytest.raises(InfeasibleTestException, match=msg): assert np.isfinite(adf.stat)
def test_adf_buggy_timeseries3(): x = np.asarray([1] * 1000) adf = ADF(x) # AssertionError: Number of manager items must equal union of block items # # manager items: 1, # tot_items: 0 with pytest.raises(InfeasibleTestException, match="The maximum lag you are"): assert np.isfinite(adf.stat)
def cointegration(self, priceX, priceY): ''' 协整关系判断 ''' if priceX is None or priceY is None: print("缺少价格序列") logX = np.log(priceX) logY = np.log(priceY) results = sm.OLS(logY, sm.add_constant(logX)).fit() resid = results.resid adfSpread = ADF(resid) if adfSpread.pvalue >= 0.05: print("""交易价格不具有协整关系。 p-value of ADF test:%f, Coefficients of regression: Intercept: %f beta: %f """ % (adfSpread.pvalue, results.params[0], results.params[1])) return None else: print("""交易价格具有协整关系。 p-value of ADF test:%f, Coefficients of regression: Intercept: %f beta: %f """ % (adfSpread.pvalue, results.params[0], results.params[1])) return (results.params[0], results.params[1])
def Cointegration(self, priceX, priceY): if priceX is None or priceY is None: print('缺少价格序列') priceX = np.log(priceX) priceY = np.log(priceY) results = sm.OLS(priceY, sm.add_constant(priceX)).fit() resid = results.resid adfSpread = ADF(resid) if adfSpread.pvalue >= 0.05: print('''交易价格不具有协整关系. P-value of ADF text: %f Coefficients of regression: Intercept : %f Beta: %f ''' % (adfSpread.pvalue, results.params[0], results.params[1])) return (None) else: print('''交易价格具有协整关系. P-value of ADF text: %f Coefficients of regression: Intercept : %f Beta: %f ''' % (adfSpread.pvalue, results.params[0], results.params[1])) return (results.params[0], results.params[1], adfSpread.pvalue)
def test_load_source_values(): df = get_ytw_test() log = setlogfile('test.load.source.values', logging.INFO) from statsmodels.tsa.stattools import adfuller # adf_tstat, pvalue, tstat, results = adfuller(df['CS-Aaa-3MO'].values) # , maxlag=40, regression='ct', autolag='BIC', regresults=True) # print(adf_tstat) #print(pvalue) #print(tstat) #print(results) adf = adfuller( df['CS-Aaa-3MO'].values ) # , maxlag=40, regression='ct', autolag='BIC', regresults=True) print(adf) from arch.unitroot import ADF adf = ADF(y=df['CS-Aaa-3MO'].values) #, lags=40, trend='ct', method='BIC') print(adf.pvalue) import cs_data_analysis as cs cs.print_full(df, log) pass '''
def unitroot_test(series): # Basic statistic plt.figure() plt.plot(series) plot_pacf(series) # ADF test # AIC & BIC from lags 12 to 1 print('$p$ & AIC & BIC \\\\') max_lags = 12 for lags in (max_lags - i for i in range(max_lags)): ar_model = AutoReg(series, lags, 'n') res = ar_model.fit() print(f'{lags} & {round(res.aic, 3)} & {round(res.bic, 3)} \\\\') # Best lags by `ar_select_order` sel = ar_select_order(series, max_lags, trend='n') lags = sel.ar_lags[-1] print(f'Lags selection: {sel.ar_lags}') # Start ADF test adf = ADF(series, lags) print(adf.summary()) # PP test pp_tau = PhillipsPerron(series, 3, test_type='tau') # q = 3 pp_rho = PhillipsPerron(series, 3, test_type='rho') # q = 3 print(pp_tau.summary()) print(pp_rho.summary())
def test_adf_short_timeseries(): # GH 262 import numpy as np from arch.unitroot import ADF x = np.asarray([0., 0., 0., 0., 0., 0., 1., 1., 0., 0.]) adf = ADF(x) assert_almost_equal(adf.stat, -2.236, decimal=3) assert adf.lags == 1
def adf_test(timeseries, trend): from arch.unitroot import ADF adf = ADF(timeseries) adf.trend = str(trend) reg_res = adf.regression #print('ADF statistic: {0:0.4f}'.format(adf.stat)) #print('ADF p-value: {0:0.4f}'.format(adf.pvalue)) #print(reg_res.summary().as_text()) return (adf.stat, adf.pvalue)
def test_adf_auto_t_stat(self): adf = ADF(self.inflation, method="t-stat") assert_equal(adf.lags, 11) adf2 = ADF(self.inflation, method="t-stat", low_memory=True) assert_equal(adf2.lags, 11) old_stat = adf.stat adf.lags += 1 assert adf.stat != old_stat old_stat = adf.stat assert_equal(adf.y, self.inflation) adf.trend = "ctt" assert adf.stat != old_stat assert adf.trend == "ctt" assert len(adf.valid_trends) == len(("nc", "c", "ct", "ctt")) for d in adf.valid_trends: assert d in ("nc", "c", "ct", "ctt") assert adf.null_hypothesis == "The process contains a unit root." assert adf.alternative_hypothesis == "The process is weakly " "stationary."
def test_adf_auto_t_stat(self): adf = ADF(self.inflation, method='t-stat') assert_equal(adf.lags, 11) adf2 = ADF(self.inflation, method='t-stat', low_memory=True) assert_equal(adf2.lags, 11) old_stat = adf.stat adf.lags += 1 assert adf.stat != old_stat old_stat = adf.stat assert_equal(adf.y, self.inflation) adf.trend = 'ctt' assert adf.stat != old_stat assert adf.trend == 'ctt' assert len(adf.valid_trends) == len(('nc', 'c', 'ct', 'ctt')) for d in adf.valid_trends: assert d in ('nc', 'c', 'ct', 'ctt') assert adf.null_hypothesis == 'The process contains a unit root.' assert adf.alternative_hypothesis == 'The process is weakly ' \ 'stationary.'
def test_representations(trend): rnd = np.random.RandomState(12345) y = np.cumsum(rnd.randn(250)) adf = ADF(y, trend=trend, max_lags=16) check = 'Constant' if trend == 'nc': check = 'No Trend' assert check in adf.__repr__() assert check in adf.__repr__() assert check in adf._repr_html_() assert 'class="simpletable"' in adf._repr_html_()
def test_no_change_lags_trend(self): adf = ADF(self.inflation) lags = adf.lags with pytest.warns(FutureWarning, match="Mutating unit root"): adf.lags = lags trend = adf.trend with pytest.warns(FutureWarning, match="Mutating unit root"): adf.trend = trend ml = adf.max_lags with pytest.warns(FutureWarning, match="Mutating unit root"): adf.max_lags = ml
def test_representations(trend): rnd = np.random.RandomState(12345) y = np.cumsum(rnd.standard_normal(250)) adf = ADF(y, trend=trend, max_lags=16) check = "Constant" if trend == "n": check = "No Trend" assert check in adf.__repr__() assert check in adf.__repr__() assert check in adf._repr_html_() assert 'class="simpletable"' in adf._repr_html_()
def isCointegration(self, priceX, priceY): logX = priceX logY = priceY results = sm.OLS(logY, sm.add_constant(logX)).fit() resid = results.resid adfSpread = ADF(resid) if adfSpread.pvalue >= 0.05: # 残差序列是非平稳时间序列,不具有协整关系 return (False, results.params[0], results.params[1]) else: # 残差序列是平稳时间序列,具有协整关系 return (True, results.params[0], results.params[1])
def UnitRootTest(ret, isdefaultmethod=True, isprintsummary=False): """ 進行 Augmented Dickey-Fuller (ADF) 單根檢定 若接受虛無假設則表示有單根,序列為非定態 (non-stationary),回傳值為False 若拒絕虛無假設則表示無單根,序列為弱定態 (weakly stationary),回傳值為True """ # 除非進行除錯,否則建議參數使用預設值 if isdefaultmethod: # default & recommended in the current version results = ADF(ret, lags = None, max_lags = None, \ trend = 'c', method = "AIC") if isprintsummary: print(results.summary().as_text()) if results.pvalue < PVal: # ADF測試對應p-value小於5% if isprintsummary: print("- - - - - - - - - - - - - - - - - - - - - - - - - -") print("ADF stats p-value < %.2f -> weakly stationary!" % PVal) print("- - - - - - - - - - - - - - - - - - - - - - - - - -") return True else: if isprintsummary: print("- - - - - - - - - - - - - - - - - - - - - - - - - -") print("ADF stats p-value > %.2f -> non-stationary!" % PVal) print("- - - - - - - - - - - - - - - - - - - - - - - - - -") return False else: # future preference results = stattools.adfuller(ret, maxlag = None, \ regression = 'c', autolag = "AIC", \ store = False, regresults = False) # returns: adfstat, pvalue, usedlag, nobs, critvalues(, icbest) if isprintsummary: print("ADF test statistics: ", results[0]) print("MacKinnon approximated p-value: ", results[1]) print("# of lags used: ", results[2]) print("# of obs. used for ADF test: ", results[3]) print("Critical values for p-value = 0.01, 0.05, and 0.10: ") print(results[4]["1%"]) print(results[4]["5%"]) print(results[4]["10%"]) print("The best information criterion (min. AIC): ", results[5]) if results[1] < PVal: # ADF測試對應p-value小於5% if isprintsummary: print("- - - - - - - - - - - - - - - - - - - - - - - - - -") print("ADF stats p-value < %.2f -> weakly stationary!" % PVal) print("- - - - - - - - - - - - - - - - - - - - - - - - - -") return True else: if isprintsummary: print("- - - - - - - - - - - - - - - - - - - - - - - - - -") print("ADF stats p-value > %.2f -> non-stationary!" % PVal) print("- - - - - - - - - - - - - - - - - - - - - - - - - -") return False
def pares(dados, intervalo = [100, 120, 140, 160, 180, 200, 220, 250], char_excluido=-3, min_period = 5): coluna_df = ['Ativo_Independente', 'Ativo_Dependente', 'ADF-100', 'ADF-120', 'ADF-140', 'ADF-160', 'ADF-180', 'ADF-200', 'ADF-220', 'ADF-250', 'Total'] df = pd.DataFrame(columns = coluna_df) m = 0 for i in range(len(dados.columns)): ativo_x = dados.columns[i][:char_excluido] for j in range(len(dados.columns)): if j != i: ativo_y = dados.columns[j][:char_excluido] par = dados.iloc[:,[i,j]] df.loc[m, 'Ativo_Independente'] = ativo_x df.loc[m, 'Ativo_Dependente'] = ativo_y ####################### ESCOLHENDO O INTERVALO ######################## l_ini = len(par.index) k = 2 soma = 0 for n in intervalo: par_n = par.iloc[l_ini-n:,:] ############################################################## ###### REGRESSÃO E RESÍDUOS ################################# X = par_n.iloc[:, 0].values y = par_n.iloc[:, 1].values X = X.reshape(-1, 1) modelo = LinearRegression() modelo.fit(X,y) y_pred = modelo.predict(X) residuos = y - y_pred adf = ADF(residuos) if adf.stat < adf.critical_values['5%']: df.iloc[m,k] = float(adf.stat) soma = soma + 1 else: df.iloc[m,k] = 0 k = k+1 df.loc[m,'Total'] = soma m = m + 1 df = df.loc[df['Total'] >= min_period, :] return df
def ADF_test(df_ts, lags=None): """ ADF from arch formula: xt-xt-1 ~ b0 + (b1-1)*xt-1 + e test if b1-1 == 0 ~ DF statistics :param df_ts: :param lags: :return: """ if lags == 'None': try: adf = ADF(df_ts) except: adf = ADF(df_ts.dropna()) else: try: adf = ADF(df_ts) except: adf = ADF(df_ts.dropna()) adf.lags = lags print(adf.summary().as_text()) return adf
def test(): df = merge_prices(get_price('300033'), get_price('300059')) train_df, test_df = split_by_date(df, '2015-12-31') x, y = train_df.close_x, train_df.close_y # print(ADF(x).summary()) # print(ADF(y).summary()) # print(ADF(np.diff(x)).summary()) # print(ADF(np.diff(y)).summary()) modol = sm.OLS(y, sm.add_constant(x)) result = modol.fit() # print(result.summary()) residual = result.resid adf = ADF(residual) print(adf.summary())
def time_series(self): # 时间序列 rate1 = self.rate # 计算自相关系数 acfs = stattools.acf(rate1) # 计算偏自相关系数 pacfs = stattools.pacf(rate1) # 绘制自相关系数图 plot_acf(rate1, use_vlines=True, lags=30) # 绘制偏自相关系数图 plot_pacf(rate1, use_vlines=True, lags=30) # 平稳性 1 看时序图 2 看自相关和偏自相关 3 单位根检验DF ADF PP检验 # ADF检验 adfrate = ADF(rate1) print(adfrate.summary().as_text()) pass
def diff_selection(df, max_diff=12): dict_p = {} for i in range(1, max_diff+1): tmp = df.copy() tmp.loc[:, 'diff'] = tmp.loc[:, tmp.columns[0]].diff(i) tmp.dropna(inplace=True) pvalue = ADF(tmp.loc[:, 'diff']).pvalue dict_p[i] = pvalue df_p = pd.DataFrame.from_dict(dict_p, orient="index", columns=['p_value']) n = 0 while n < len(df_p): if df_p.loc[:, 'p_value'].iloc[n] < 0.01: best_diff = i break n += 1 return best_diff
def diff_process(self): self.p_value = acorr_ljungbox(self.df.iloc[:, 0], lags=1) print('白噪声检验p值:', self.p_value[1], '\n') #大于0.05认为是白噪声,即序列在时间上不具有相关性 self.ADF_value = ADF(self.df.iloc[:, 0]) #p值为0小于0.05认为是平稳的(单位根检验) self.diff_period = self.df.iloc[:, 0].diff(self.period) #季节性差分 self.diff_period = self.diff_period.dropna() self.diff_ = self.diff_period.diff() #一次差分 self.diff_ = self.diff_.dropna() fig = plt.figure(figsize=(20, 6)) ax1 = fig.add_subplot(311) #原始数据图 ax1.plot(self.df.iloc[:, 0]) ax2 = fig.add_subplot(312) #季节性查分差分后 无周期性 但是不平稳 ax2.plot(self.diff_period) ax3 = fig.add_subplot(313) #再一次差分之后 平稳 ax3.plot(self.diff_) plt.show()
def coint_period(dados, ativo_x, ativo_y, period = 100, model = False): X = dados.loc[:, ativo_x].values[-period:] y = dados.loc[:, ativo_y].values[-period:] X = X.reshape(-1, 1) modelo = LinearRegression() modelo.fit(X,y) y_pred = modelo.predict(X) residuos = y - y_pred adf = ADF(residuos) coint99 = adf.stat < adf.critical_values['1%'] coint95 = adf.stat < adf.critical_values['5%'] result = {'coef':float(modelo.coef_), 'intercept':float(modelo.intercept_), 'media_res':np.mean(residuos), 'desvio_res':np.std(residuos), 'adf_stats':adf.stat, 'Coint_99':coint99, 'Coint_95':coint95} if model == True: return modelo, y_pred, residuos, np.mean(residuos), np.std(residuos) else: return result