def test_pp(self): pp = PhillipsPerron(self.inflation, lags=12) assert_almost_equal(pp.stat, -7.8076512, DECIMAL_4) assert pp.test_type == 'tau' pp.test_type = 'rho' assert_almost_equal(pp.stat, -108.1552688, DECIMAL_2) pp.summary()
def test_pp_auto(self): pp = PhillipsPerron(self.inflation) n = self.inflation.shape[0] - 1 lags = ceil(12.0 * ((n / 100.0) ** (1.0 / 4.0))) assert_equal(pp.lags, lags) assert_almost_equal(pp.stat, -8.135547778, DECIMAL_4) pp.test_type = 'rho' assert_almost_equal(pp.stat, -118.7746451, DECIMAL_2)
def test_pp(self): pp = PhillipsPerron(self.inflation, lags=12) assert_almost_equal(pp.stat, -7.8076512, DECIMAL_4) assert pp.test_type == "tau" with pytest.warns(FutureWarning, match="Mutating unit root"): pp.test_type = "rho" assert_almost_equal(pp.stat, -108.1552688, DECIMAL_2) pp.summary()
def test_pp_auto(self): pp = PhillipsPerron(self.inflation) n = self.inflation.shape[0] - 1 lags = ceil(12.0 * ((n / 100.0)**(1.0 / 4.0))) assert_equal(pp.lags, lags) assert_almost_equal(pp.stat, -8.135547778, DECIMAL_4) with pytest.warns(FutureWarning, match="Mutating unit root"): pp.test_type = "rho" assert_almost_equal(pp.stat, -118.7746451, DECIMAL_2)
def test_stationarity(self, test: str, threshold=0.05, **kwargs): """ Test for the stationarity of a given series around a deterministic trend. Interesting ref: https://stats.stackexchange.com/questions/88407/adf-test-pp-test-kpss-test-which-test-to-prefer :param test: unit root test. The first three all start with the null of a unit root and have an alternative of a stationary process. The last one, KPSS, has a null of a stationary process with an alternative of a unit root. - 'adf' for Augmented Dickey Fuller test, - 'dfgls' for Elliott, Rothenberg and Stock’s GLS version of the Dickey-Fuller test - 'pp' for Phillips–Perron test - 'kpss' for Kwiatkowski–Phillips–Schmidt–Shin test, :param threshold: confidence level for p_value. The p-value is the probability score based on which you can decide whether to reject the null hypothesis or not. If the p-value is less than a predefined alpha level (typically 0.05), we reject the null hypothesis. :param **kwargs: Any additional arguments for the test in question i.e. :return: True if p-value below threshold (for DF and ADF) or above threshold (for KPSS), otherwise False. """ if re.search('adf', test, re.IGNORECASE): test_statistic, p_value, n_lags_used, _, critical_values, _ = adfuller(self.series, **kwargs) return p_value < threshold elif re.search('kpss', test, re.IGNORECASE): test_statistic, p_value, n_lags_used, critical_values = kpss(self.series, **kwargs) return p_value > threshold # for KPSS, series is NOT stationary if < threshold elif re.search('pp', test, re.IGNORECASE): result = PhillipsPerron(self.series) test_statistic, p_value, n_lags_used, critical_values = result.stat, result.pvalue, result.lags, result.critical_values return p_value < threshold else: raise Exception("Invalid `test`")
def stationarity_test(self,stock_data,column): rolmean = self.stock_data.rolling(30).mean() rolstd = self.stock_data.rolling(30).std() plt.plot(self.stock_data,color='blue',label='Original') plt.plot(rolmean,color='red',label='Rolling Mean') plt.plot(rolstd,color='black',label='Rolling Std') plt.legend(loc='best') plt.title("Rolling Mean & Standard Deviation") plt.show() # Perform Dickey Fuller Test print("Results of Dickey-Fuller Test") self.stock_data.dropna(inplace=True) dftest = adfuller(self.stock_data[column]) print(dftest) dfoutput = pd.Series(dftest[0:4],index=['Test Statistic','p-value','#Lags Used','Number of Observations Used']) for key,Value in dftest[4].items(): dfoutput['Critical Value(%s)'%key] = Value print(dfoutput) print ('Results of KPSS Test:') print("------------------------------------------------------------------------------") kpsstest = kpss(self.stock_data[column], regression='c') kpss_output = pd.Series(kpsstest[0:3], index=['Test Statistic','p-value','Lags Used']) for key,value in kpsstest[3].items(): kpss_output['Critical Value (%s)'%key] = value print (kpss_output) print("------------------------------------------------------------------------------") print ('Results of Phillips-Perron Test:') pptest = PhillipsPerron(self.stock_data[column]) print(pptest) print("------------------------------------------------------------------------------")
def stationarity(self,y): adf = adfuller(y,regression='nc')[1] pp = PhillipsPerron(y,trend='nc').pvalue if adf < 0.05 and pp < 0.05: print("Data is stationary") else: sys.exit("Data not stationary")
def PhilipsPerronTest(data, printResults=True, trend=None, lags=None): options_Trend = trend if trend != None else {'nc','c','ct'} options_Lags = lags if lags != None else {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24} results = dict() for column in data.columns: print("Philips Perron test for column: " + column) results_Trend = dict() for option_Trend in options_Trend: results_Lag = dict() for option_Lag in options_Lags: result = PhillipsPerron(data[column].dropna(), trend=option_Trend, lags=option_Lag) if printResults: result.summary() results_Lag[option_Lag] = result results_Trend[option_Trend] = results_Lag results[column] = results_Trend return results
def generate_stationarity_dataframe(potential_pairs_index, cv_spreads_train): adfuller_t = [] adfuller_p = [] kpss_t = [] kpss_p = [] pp_t = [] pp_p = [] vr_t = [] vr_p = [] warnings.filterwarnings('ignore') for i, pair in enumerate(potential_pairs_index): temp_spread = cv_spreads_train[pair] temp_adfuller = ts.adfuller(temp_spread) temp_adfuller_t = temp_adfuller[0] temp_adfuller_p = temp_adfuller[1] temp_kpss = ts.kpss(temp_spread) temp_kpss_t = temp_kpss[0] temp_kpss_p = temp_kpss[1] temp_pp = PhillipsPerron(temp_spread) temp_pp_t = temp_pp.stat temp_pp_p = temp_pp.pvalue temp_vr = VarianceRatio(temp_spread) temp_vr_t = temp_vr.stat temp_vr_p = temp_vr.pvalue adfuller_t.append(temp_adfuller_t) adfuller_p.append(temp_adfuller_p) kpss_t.append(temp_kpss_t) kpss_p.append(temp_kpss_p) pp_t.append(temp_pp_t) pp_p.append(temp_pp_p) vr_t.append(temp_vr_t) vr_p.append(temp_vr_p) cv_stationary_tests = pd.DataFrame( { 'adf_t_stat': adfuller_t, 'adf_p_value': adfuller_p, 'kpss_t_stat': kpss_t, 'kpss_p_value': kpss_p, 'pp_t_stat': pp_t, 'pp_p_value': pp_p, 'vr_t_stat': vr_t, 'vr_p_value': vr_p }, index=potential_pairs_index) return cv_stationary_tests
def unitroot_test(series): # Basic statistic plt.figure() plt.plot(series) plot_pacf(series) # ADF test # AIC & BIC from lags 12 to 1 print('$p$ & AIC & BIC \\\\') max_lags = 12 for lags in (max_lags - i for i in range(max_lags)): ar_model = AutoReg(series, lags, 'n') res = ar_model.fit() print(f'{lags} & {round(res.aic, 3)} & {round(res.bic, 3)} \\\\') # Best lags by `ar_select_order` sel = ar_select_order(series, max_lags, trend='n') lags = sel.ar_lags[-1] print(f'Lags selection: {sel.ar_lags}') # Start ADF test adf = ADF(series, lags) print(adf.summary()) # PP test pp_tau = PhillipsPerron(series, 3, test_type='tau') # q = 3 pp_rho = PhillipsPerron(series, 3, test_type='rho') # q = 3 print(pp_tau.summary()) print(pp_rho.summary())
def trend_test(vals): pp_result = PhillipsPerron(vals).pvalue kpss_result = KPSS(vals).pvalue # reject both. technically, we do not know and should consider different kinds of long range dependencies if (pp_result <= .05) and (kpss_result <= .05): return "Unknown", "Unknown" # reject H0 of pp, do not reject H0 of KPSS elif (pp_result <= .05) and (kpss_result > .05): return True, "deterministic" # reject H0 of KPSS. Do not reject H0 of pp elif (pp_result > .05) and (kpss_result <= .05): return True, "stochastic" # do not reject either. technically, we do not know and the time series is not informative enough else: return "Unknown", "Unknown"
def PP_Test(self, timeseries, printResults=True): """ Phillips-Perron (PP) Test Null Hypothesis is Unit Root Reject Null Hypothesis >> Series is stationary >> Use price levels Fail to Reject >> Series has a unit root >> Use price returns """ ppTest = PhillipsPerron(timeseries) self.pValue = ppTest.pvalue if (self.pValue < self.SignificanceLevel): self.isStationary = True else: self.isStationary = False if printResults: print('Phillips-Perron (PP) Test Results: {}'.format( 'Stationary' if self.isStationary else 'Not Stationary'))
def coint_test(self, x: np.ndarray, y: np.ndarray, alpha: 0.05) -> bool: """Performs Engle Granger co-integration test :param x: log price :type x: np.ndarray, shape = (n_samples,) :param y: log price :type y: np.ndarray, shape = (n_samples,) :param alpha: significance level :type alpha: 0.05 :return: True if two series are co-integrated :rtype: bool """ # Perform a regression of y on x self.lr.fit(x[:, np.newaxis], y) # Check if residuals are stationary pp = PhillipsPerron(self.lr.residuals) # Null hypothesis: process is not stationary if pp.pvalue < alpha: return True return False
print('Lean Hogs Future skewness is {}'.format(lh.skew(axis=0)[0])) print('Lean Hogs Future kurtosis is {}'.format(lh.kurtosis(axis=0)[0])) sns.distplot(lh['Close'], color='blue') #density plot plt.title('1986–2018 Lean Hogs Future return frequency') plt.xlabel('Possible range of data values') # Pull up summary statistics print(lh.describe()) adf = ADF(lh['Close']) print(adf.summary().as_text()) kpss = KPSS(lh['Close']) print(kpss.summary().as_text()) dfgls = DFGLS(lh['Close']) print(dfgls.summary().as_text()) pp = PhillipsPerron(lh['Close']) print(pp.summary().as_text()) za = ZivotAndrews(lh['Close']) print(za.summary().as_text()) vr = VarianceRatio(lh['Close'], 12) print(vr.summary().as_text()) from arch import arch_model X = 100 * lh import datetime as dt am = arch_model(X, p=4, o=0, q=0, vol='Garch', dist='StudentsT') res = am.fit(last_obs=dt.datetime(2003, 12, 31)) forecasts = res.forecast(horizon=1, start='2004-1-1') cond_mean = forecasts.mean['2004':]
def test_pp_bad_type(self): pp = PhillipsPerron(self.inflation, lags=12) with pytest.raises(ValueError): pp.test_type = 'unknown'
# for AAPL # MA(15) fits model the best. # stats: 3.04749309416, pvalue: 1.0 # stats: 5.14080783224, pvalue: 0.999999999468 # h0: rho == 1 is not rejected. There is unit root. test_series = goog_df from arch.unitroot import PhillipsPerron # fit MA on resid. import statsmodels.tsa.arima_model as arma for ma_lag in [7, 8, 9, 10, 11, 12, 13, 15, 20]: model = arma.ARMA(test_series, (0, ma_lag)).fit() print 'lag: {}, aic: {}'.format(ma_lag, model.aic) pp = PhillipsPerron(test_series, trend='c', lags=10, test_type='tau') print 'stats: {}, pvalue: {}'.format(pp.stat, pp.pvalue) pp = PhillipsPerron(test_series, trend='c', lags=10, test_type='rho') print 'stats: {}, pvalue: {}'.format(pp.stat, pp.pvalue) ### using adf test ### goog and aapl t-value # 0.876758903592 # 0.999070159449 test_series = goog_df from arch.unitroot import ADF adf = ADF(goog_df, lags=24) print adf.pvalue adf = ADF(aapl_df, lags=24) print adf.pvalue
def test_phillips_perron_specifed_lag(): y = np.zeros((10, )) with pytest.raises(InfeasibleTestException, match="A minimum of 12 observations"): assert np.isfinite(PhillipsPerron(y, lags=12).stat)
def pp(serie, tipo="nc", lag=False): results = PhillipsPerron(serie, trend=tipo) if lag: return results.lags else: return results.pvalue
# Augmented Dickey-Fuller from statsmodels.tsa.stattools import adfuller adf1 = adfuller(df.adjClose, regression="ct") print("\n", "Teste 'Augmented Dickey-Fuller' para série por níveis (p-value):", adf1[1]) # P-value > 0.05 => série é não estacionária adf2 = adfuller(tsReturns, regression="nc") print("\n", "Teste 'Augmented Dickey-Fuller' para série de retornos (p-value):", adf2[1]) # P-value = 0 < 0.05 => série é estacionária # Phippips Perron (PP), com H_0: série é não estacionária from arch.unitroot import PhillipsPerron pp1 = PhillipsPerron(df.adjClose) print("\n", "Teste 'Phillips Perron' para série por níveis:", pp1) # P-value > 0.05 => série é não estacionária pp2 = PhillipsPerron(tsReturns) print("\n", "Teste 'Phillips Perron' para série de retornos:", pp2) # P-value = 0 < 0.05 => série é estacionária # Kwiatkowski-Phillips-Schmidt-Shin (KPSS), com H_0: série é estacionária from statsmodels.tsa.stattools import kpss kpss1 = kpss(df.adjClose, regression='ct') print("\n", "Teste KPSS para série por níveis:", kpss1[1]) # P-value < 0.05 => série é não estacionária kpss2 = kpss(tsReturns, regression='ct')
def test_pp_regression(self): pp = PhillipsPerron(self.inflation, lags=12) reg = pp.regression assert len(reg.params) == 2 assert "(HAC) using 12 lags" in str(reg.summary())
def get_phillips_perron(timeseries): from arch.unitroot import PhillipsPerron pp = PhillipsPerron(timeseries) print(pp.summary().as_text())
'CME Lean Hogs Future Close Price', 'CME Lean Hogs Future Annualized Volatility', 'CME Lean Hogs Future Return' ]) #plt.savefig(files.image_path + '\LH_close_vol_return.png') data = lh['Close'].resample('M').mean() # resample daily data to monthly data data = data['1992':'2004'] data = np.log(data / data.shift(1)).dropna() * 100 # d 1 adf = ADF(data) print(adf.summary().as_text()) kpss = KPSS(data) print(kpss.summary().as_text()) dfgls = DFGLS(data) print(dfgls.summary().as_text()) pp = PhillipsPerron(data) print(pp.summary().as_text()) za = ZivotAndrews(data) print(za.summary().as_text()) vr = VarianceRatio(data, 12) print(vr.summary().as_text()) print(data.describe()) print('Lean Hogs Future skewness is {}'.format(data.skew(axis=0))) print('Lean Hogs Future kurtosis is {}'.format(data.kurtosis(axis=0))) import matplotlib.gridspec as gridspec import statsmodels.api as sm import scipy.stats as stats import seaborn as sns