def vector_check(y, x, n=5): print('\nunit root test for regressand\n', adf(y)) print('\nunit root test for regressor\n', adf(x)) print('\ngranger causality test\n') print('x to y') (gct(pd.concat([y, x], axis=1), maxlag=n)) print('\ny to x') (gct(pd.concat([x, y], axis=1), maxlag=n)) print('\n\nEngle-Granger') x_sm = sm.add_constant(x) m = sm.OLS(y, x_sm).fit() print('\n', adf(m.resid))
def ADF_test(residuals, output_log = False, title = "ADF Test Results"): t0 = residuals t1 = residuals.shift() shifted = t1 - t0 shifted.dropna(inplace = True) plt.plot(shifted, c='green') plt.show() adf_value = adf(shifted, regression = 'nc') test_statistic = adf_value[0] pvalue = adf_value[1] usedlags = adf_value[2] nobs = adf_value[3] if output_log: #output on figure eventually, that looks really professional print title print "Test Statistic: %.4f\nP-Value: %.4f\nLags Used: %d\nObservations: %d" % (test_statistic, pvalue, usedlags, nobs) for crit in adf_value[4]: print crit, adf_value[4][crit] #print "Critical Value (%s): %.3f" % (crit, adf_value[crit]) return adf_value
def SARIMAX(df, future, m=1, n=1, o=1, lag=12): print(adf(df.diff().fillna(df.bfill()))) fig = plt.figure(figsize=(20, 20)) ax = fig.add_subplot(211) sm.graphics.tsa.plot_pacf(df, ax=ax) bx = fig.add_subplot(212) sm.graphics.tsa.plot_acf(df, ax=bx) plt.show() m = sm.tsa.statespace.SARIMAX(df, order=(m, n, o), seasonal_order=(1, 1, 1, lag), enforce_stationarity=False, enforce_invertibility=False).fit() m.plot_diagnostics(figsize=(20, 10)) print(m.summary()) p = m.get_forecast(steps=future) fig = plt.figure(figsize=(20, 10)) ax = fig.add_subplot(111) ax.plot(p.predicted_mean, label='forecast', c=pick_a_color()) ax.plot(m.predict(), label='fitted', c=pick_a_color()) ax.plot(df, label='actual', c=pick_a_color()) ax.fill_between(p.conf_int().index, \ p.conf_int().iloc[:, 0], \ p.conf_int().iloc[:, 1], \ alpha=.25,color=pick_a_color()) plt.legend(loc='best') plt.title('%s steps ahead forecast' % (future)) plt.show()
def seasonality_check(df, freq='monthly'): lag=np.select([freq=='monthly',freq=='quarterly'], \ [12,4]) print('ARIMA decomposition') df2 = sd(df, freq=lag) print(adf(df)) sm.graphics.tsa.plot_acf(df) plt.show() sm.graphics.tsa.plot_pacf(df) plt.show() df.plot() plt.title('original') plt.show() df2.trend.plot(c=pick_a_color()) plt.title('trend') plt.show() df2.seasonal.plot(c=pick_a_color()) plt.title('seasonality') plt.show() df2.resid.plot(c=pick_a_color()) plt.title('residual') plt.show() print('HP filter') hplag=np.select([freq=='monthly',freq=='quarterly',freq=='annual'], \ [14400,1600,100]) cycle, trend = sm.tsa.filters.hpfilter(df, hplag) cycle.plot(c=pick_a_color()) plt.title('cycle') plt.show() trend.plot(c=pick_a_color()) plt.title('trend') plt.show() print('differential') df3 = df - df.shift(1) - (df.shift(lag - 1) - df.shift(lag)) df3.plot(c=pick_a_color()) plt.show() print('weighted') var = locals() for i in range(1, lag + 1): var['seasonal_weight'+str(i)]= \ np.mean(df[df.index.month==i])/np.mean(df) print(var['seasonal_weight' + str(i)]) df_adj = pd.Series(df) for j in df.index: df_adj[j:j] = df[j:j] / var['seasonal_weight' + str(j.month)] df_adj.plot(c=pick_a_color()) plt.show()
def draw_picture(): parameter_type = ['W01', '060', 'W02', '101', 'W07'] wdp_mode={'W01':'水温','060':'氨氮','W02':'溶解氧','101':'总磷','W07':'高锰酸盐'} data_set={parameter:get_data(parameter) for parameter in parameter_type} for key,value in data_set.items(): print(wdp_mode[key],adf(value['data_value'])) mpl.rcParams['font.sans-serif'] = ['SimHei'] #正常显示中文 mpl.rcParams['axes.unicode_minus'] = False # plt.title(wdp_mode[key]) # 显示图标题 # plt.show() return 0
def run_test(self, num_timesteps_back, alpha=0.05): results = [] for fn, data in self.stats.items(): try: adf_res = adf(data[-num_timesteps_back:])[1] < alpha except ValueError as e: adf_res = None try: ttest_res = ttest( data[int(-num_timesteps_back):int(-num_timesteps_back / 2)], data[int(-num_timesteps_back / 2):])[1] > alpha except ValueError as e: ttest_res = None results.append(adf_res and ttest_res) return np.all(results)
ax = plt.figure(figsize=(10, 5)).add_subplot(111) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) df['epsilon'].hist(histtype='bar', color='#ede574', width=0.007, bins=80) plt.title('OLS vs Elastic Net', fontsize=15) plt.xticks(fontsize=10) plt.yticks(fontsize=10) plt.grid(False) plt.ylabel('Frequency') plt.xlabel('Interval') plt.show() print(adf(df['epsilon'])) #unit root test results: #(-2.4689818197725981, 0.12320492058022914, 2, 1286, {'1%': -3.4354451795550935, '5%': -2.863790090661305, '10%': -2.5679679660127368}, -6151.8371655225037) #hence, its not a stationary process # In[6]: #next step is to compare mean and standard deviation of two approaches df['sk_residual'] = df['nok'] - df['sk_fit'] df['ols_residual'] = df['nok'] - df['ols_fit'] print(np.mean(df['sk_residual']) > np.mean(df['ols_residual'])) print(np.std(df['sk_residual']) > np.std(df['ols_residual'])) #boolean values:
#we have to use Engle-Granger two step! #salute to Engle, mentor of my mentor Gallo #to the nobel prize winner #im not gonna explain much here #if u have checked my other codes, u sould know #details are in pair trading session # https://github.com/je-suis-tm/quant-trading/blob/master/Pair%20trading%20backtest.py x2=df['eur'][df.index<'2017-04-25'] x3=sm.add_constant(x2) model=sm.OLS(y,x3).fit() ero=model.resid print(adf(ero)) print(model.summary()) #(-2.5593457642922992, 0.10169409761939013, 0, 1030, #{'1%': -3.4367147300588341, '5%': -2.8643501440982058, '10%': -2.5682662399849185}, -1904.8360920752475) #0.731199409071 #unfortunately, the residual hasnt even reached 90% confidence interval #we cant conclude any cointegration from the test #still, from the visualization #we can tell nok and eur are somewhat correlated #our rsquared suggested euro has the power of 73% explanation on nok # In[14]:
plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正确显示中文 plt.rcParams['axes.unicode_minus'] = False # 用来正确显示负号 arima_data.plot() plt.show() # In[57]: #自相关图 from statsmodels.graphics.tsaplots import plot_acf plot_acf(arima_data) # In[58]: from statsmodels.tsa.stattools import adfuller as adf print(adf(arima_data[u'销量'])) # In[59]: D_arima_data = arima_data.diff().dropna() D_arima_data.columns = [u'时间差分'] D_arima_data.plot() # In[60]: plot_acf(D_arima_data) # In[64]: from statsmodels.graphics.tsaplots import plot_pacf plot_pacf(D_arima_data) #偏自相关图
diff.append(value) return Series(diff) # Revert dataset back from 'deseasonlization' def inverse_diff(history, yhat, interval=12): return yhat + history[-interval] # Determine initial p,d,q values for ARIMA station = diff(X) # Check if stationary result = adf(station) print('ADF Statistic: %f' % result[0]) print('p-value: %f' % result[1]) print('Critical Values:') for percent, value in result[4].items(): print('\t%s: %.3f' % (percent, value)) # P value is smaller than 1%. dataset is stationary and null hypothesis can be rejected. # d will start with a value of 0 # Determine p,q values by plotting ACF and PACF # distribution is not Gaussian, so ACF may be useless plt.figure() plt.subplot(211) plot_acf(station, ax=plt.gca()) plt.subplot(212)
df = pd.read_excel('timeseries.xlsx') df.index = pd.to_datetime(df['Date']) df['Person Rate'].plot() import statsmodels.api as sm import matplotlib.pyplot as plt fig = plt.figure(figsize=(12,8)) ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(df['Person Rate'],lags = 16, ax=ax1) ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(df['Person Rate'], lags = 16, ax=ax2) plt.show() from statsmodels.tsa.stattools import adfuller as adf x = df['Call Rate'] result = adf(x) print('ADF Statistic: %f' % result[0]) print('p-value: %f' % result[1]) print('Critical Values:') model=sm.tsa.ARIMA(endog=df['Call Rate'],order=(0,1,6)) results=model.fit(start_params=) print(results.summary()) def test_stationarity(timeseries): #Determing Rolling Statistics rolmean = pd.rolling_mean(timeseries, window=12) rolstd = pd.rolling_std(timeseries, window=12) #plot rolling statistics
Y = list(training_df.loc[:, stock2]) # Use TLS to determine the hedge ratio linear = odrpack.Model(f) mydata = odrpack.RealData(X, Y) myodr = odrpack.ODR(mydata, linear, beta0=[1., 2.]) myoutput = myodr.run() intercept = myoutput.beta[1] slope = myoutput.beta[0] residual = [] for i in range(len(X)): residual.append(Y[i] - slope * X[i] - intercept) result = adf(residual) AIC_test_value = result[0] p_value = result[1] vol_dict[pairs] = np.std(residual) vol_list.append(np.std(residual)) pairs_testvalue_dict[pairs] = AIC_test_value sorted_by_value = sorted(pairs_testvalue_dict.items(), key=lambda x: x[1], reverse=False) sorted_by_vol = sorted(vol_dict.items(), key=lambda x: x[1], reverse=True) Top_pairs = [] print(max(vol_list))
mean_var_Y = np.mean(var_Y) * 100 error_mean_var_Y = np.std(var_Y) * 100 mean_var_C = np.mean(var_C) * 100 error_mean_var_C = np.std(var_C) * 100 mean_var_I = np.mean(var_I) * 100 error_mean_var_I = np.std(var_I) * 100 relative_variance_Y = mean_var_Y / mean_var_Y relative_variance_C = mean_var_C / mean_var_Y relative_variance_I = mean_var_I / mean_var_Y adf_Y = [] adf_C = [] adf_I = [] for column in Y: adf_y = adf(Y[column]) adf_c = adf(C[column]) adf_i = adf(I[column]) adf_Y.append(adf_y) adf_C.append(adf_c) adf_I.append(adf_i) adf_Y_99 = [] adf_C_99 = [] adf_I_99 = [] adf_Y_95 = [] adf_C_95 = [] adf_I_95 = [] adf_Y_90 = [] adf_C_90 = [] adf_I_90 = []
def _test_adf_threshold(spp, num_timesteps_back, alpha=0.05): result = adf(spp.Nt[-num_timesteps_back:])[1] < alpha return result
pADF[name] = np.nan pHypo[name] = np.nan pBeta0[name] = np.nan pBeta1[name] = np.nan print(name) for k in range(len(stock) - 244): if stock[[i, j]][k:k + 244].isna().sum().sum() == 0: mdl = VECM(stock[[i, j]][k:k + 244], coint_rank=1, deterministic='co') res = mdl.fit() x = (res.beta[0] * stock[i][k:k + 244] + res.beta[1] * stock[j][k:k + 244]) pBeta0[name][k + 244] = res.beta[0] pBeta1[name][k + 244] = res.beta[1] c = adf(x[:244], regression='c')[0] pADF[name][k + 244] = c pHypo[name][k + 244] = c <= -2.8741898504150574 pADF.drop(['TEMP'], axis=1, inplace=True) pHypo.drop(['TEMP'], axis=1, inplace=True) pBeta0.drop(['TEMP'], axis=1, inplace=True) pBeta1.drop(['TEMP'], axis=1, inplace=True) stock.drop(['TEMP'], axis=1, inplace=True) pRank = pADF.rank(axis=1, method='min') * pHypo pRank[pRank == 0] = 999 pADF.to_csv('pADF.csv') pHypo.to_csv('pHypo.csv') pBeta0.to_csv('pBeta0.csv')
def test_stationarity(self, coin1, coin2, beta): temp = coin2 - beta * coin1 if adf(temp)[1] < self.significance: return True else: return False