def irgendwas(): pdf = pd.DataFrame(expdata.T, columns=[0, 1, 2, 3]) pdf = pd.DataFrame(exp_data.T, columns=[0, 1, 2, 3]) out = coint_johansen(pdf) from statsmodels.tsa.vector_ar.vecm import coint_johansen out = coint_johansen(transform_data. - 1, 5) out = coint_johansen(transform_data, -1, 5) out = coint_johansen(pdf, -1, 5) out out.lr1 out.cvt[:, 1] out.cvt alpha = 0.01 cvt = out.cvt[:, int(np.round((0.1 - alpha) / alpha))] cvt = out.cvt[:, int(np.round((0.1 - alpha) / 0.05))] int(np.round((0.1 - alpha) / 0.05)) alpha = 0.05 int(np.round((0.1 - alpha) / 0.05)) alpha = 0.1 int(np.round((0.1 - alpha) / 0.05)) alpha = 0.05 cvt = out.cvt[:, int(np.round((0.1 - alpha) / 0.05))] cvt traces out.lr1 import statsmodels.tsa.api as smt from statsmodels.tsa.api import VAR
def runCointTestBasketsJoh(etf, tickers, start, end): coint_data = pd.DataFrame( columns=['ticker', 'critical-values', 'trace-stat']) etf_data = yf.download(etf, start=start, end=end) etf_data = etf_data[['Close']] etfLogPrice = np.log(etf_data['Close'].values) tickers_data = yf.download(tickers, start=start, end=end) tickers_subsets = [] for i in range(2, len(tickers) + 1): for subset in itertools.combinations(tickers, i): tickers_subsets.append(list(subset)) for i, t_list in enumerate(tickers_subsets): if i % 500 == 0: print(i, "done, out of a total of ", len(tickers_subsets)) df = tickers_data['Close'][t_list] df = df.apply(np.log) df['etf'] = etfLogPrice if df.isnull().values.any(): print('err') continue jres = coint_johansen(df, det_order=0, k_ar_diff=1) coint_data.loc[i] = [ t_list, jres.trace_stat_crit_vals[-1], jres.trace_stat[-1] ] return coint_data.sort_values(by='trace-stat', ascending=True)
def cointegration_test_result(data, num_lag_diff=2): """ Perform Johanson's Cointegration Test and Report Summary """ output_coint = coint_johansen(data, -1, num_lag_diff) critical_val_dict = {"0.9": 0, "0.95": 1, "0.99": 2} # read each variable trace value traces_value = output_coint.lr1 def adjust(str_char, lengtht=6): return str(str_char).ljust(lengtht) max_char_len = max([len(var) for var in data.columns]) + 1 # read the corresponding columns of critical values for each variable alpha_val = [0.10, 0.05, 0.01] print( "\n Significance of granger-casuality at different critical values level.\n " ) for alpha in alpha_val: coint_crit_val = output_coint.cvt[:, critical_val_dict[str(1 - alpha)]] print( adjust("Name", max_char_len), " :: ", "Test Stat > C(%.1f%s) => Signif \n" % ((1 - alpha) * 100, "%"), "---" * max_char_len) for col_name, trace, cvt in zip(data.columns, traces_value, coint_crit_val): print(adjust(col_name, max_char_len), " :: ", adjust(round(trace, 2), 9), ">", adjust(cvt, 8), " => ", trace > cvt) print("\n")
def find_pairs(prices, coint_set_amount, johansen_lag): # Check all pairs inside one cluster for cointegration result = {} traded_assets = prices.columns total_pairs = 1 for i in range(coint_set_amount): total_pairs *= (len(traded_assets) - i) for i in range(1, coint_set_amount + 1): total_pairs /= i with tqdm_notebook(total=total_pairs) as pbar: for combination in itertools.combinations(traded_assets, coint_set_amount): combination_prices = prices[list(combination)] johansen_result = coint_johansen(combination_prices.values, det_order=0, k_ar_diff=johansen_lag) weights = johansen_result.lr1.reshape(-1, 1) >= johansen_result.cvt weights = weights.any(axis=1) weights = johansen_result.evec[weights] if (johansen_result.lr1.reshape(-1, 1) >= johansen_result.cvt).any(): result[combination] = weights[0] pbar.update(1) return result
def fit(self, ts: pd.DataFrame): """ Use the Johansen test to calculate the portfolio shares for each instrument. This test uses some nice linear algebra to test wether "A", the first autoregression coefficient matrix (of course a matrix, we have multiple timeseries vectors here) is zero (null hypothesis) or not. To achieve this an eigenvalue decomposition of "A" is carried out. The rank of the matrix is given by and the Johansen test sequentially tests whether this rank is equal to zero, equal to one, through to r=n-1, where n is the number of time series under test. The eigenvalue decomposition results in a set of eigenvectors. The eigenvectors generated from the Johansen test can be used as hedge ratios to form a stationary portfolio out of the input price series, and the one with the largest eigenvalue is the one with the shortest half-life. :param ts: dataframe with each column being an instrument ts """ jh = vm.coint_johansen(ts.values, det_order=0, k_ar_diff=1) # constant term, 1-lag difference # assert that the trace statistics are greater than their 90% critical value curred_sign = sign( jh.cvt[0, 2], jh.cvt[0, 0] ) # note that the 0 index corresponds to the 90% cv and the 2 index is the 99% cv # assert (curred_sign(jh.lr1, jh.cvt[:, 0])).all() # assert that the maximum eigenvalue statistics are greater than their 90% critical value curred_sign = sign(jh.cvm[0, 2], jh.cvm[0, 0]) # assert (curred_sign(jh.lr2 > jh.cvm[:, 0])).all() # E.P.Chan: the eigenvectors (represented as column vectors in r.evec) are ordered in decreasing order of their corresponding eigenvalues. So we should expect the first cointegrating relation to be the “strongest”; that is, have the shortest half-life for mean reversion. # assert np.argmax(jh.eig) == 0 # check it to be sure eheh self.ws = jh.evec[:, 0] # create the mean reverting time series yport = np.dot( ts.values, self.ws) # it's also the (net) market value of portfolio self.hl = halflife(yport) self._fitted = True return self
def johansen_coint_(merged_s, pvalues=0.05): merged_s = merged_s.dropna() result = coint_johansen(merged_s, 0, 1) trace_stat = result.lr1 max_stat = result.lr2 cvm = result.cvm cvt = result.cvt def crit_range(st, crits): for i, _ in enumerate(st): print("The t-stat of it is {}".format(st[i])) if (st[i] <= crits[i][0]): print('r<{} failed being rejected.'.format(i + 1)) elif (st[i] <= crits[i][1]): print('r<{} rejected at 90%.'.format(i + 1)) elif (st[i] <= crits[i][2]): print('r<{} rejected at 95%.'.format(i + 1)) else: print('r<{} rejected at 99%.'.format(i + 1)) print("Maximum statistic testing...") crit_range(max_stat, cvm) print("Tracing statistic testing...") crit_range(trace_stat, cvt) return result.eig
def compute_pair_metrics(security, candidates): security = security.div(security.iloc[0]) ticker = security.name candidates = candidates.div(candidates.iloc[0]) spreads = candidates.sub(security, axis=0) n, m = spreads.shape X = np.ones(shape=(n, 2)) X[:, 1] = np.arange(1, n + 1) drift = (( np.linalg.inv(X.T @ X) @ X.T @ spreads).iloc[1].to_frame('drift')) vol = spreads.std().to_frame('vol') corr_ret = (candidates.pct_change().corrwith( security.pct_change()).to_frame('corr_ret')) corr = candidates.corrwith(security).to_frame('corr') metrics = drift.join(vol).join(corr).join(corr_ret).assign(n=n) tests = [] for candidate, prices in candidates.items(): df = pd.DataFrame({'s1': security, 's2': prices}) var = VAR(df.values) lags = var.select_order() # select VAR order k_ar_diff = lags.selected_orders['aic'] # Johansen Test with constant Term and estd. lag order cj0 = coint_johansen(df, det_order=0, k_ar_diff=k_ar_diff) # Engle-Granger Tests t1, p1 = coint(security, prices, trend='c')[:2] t2, p2 = coint(prices, security, trend='c')[:2] tests.append([ticker, candidate, t1, p1, t2, p2, k_ar_diff, *cj0.lr1]) columns = [ 's1', 's2', 't1', 'p1', 't2', 'p2', 'k_ar_diff', 'trace0', 'trace1' ] tests = pd.DataFrame(tests, columns=columns).set_index('s2') return metrics.join(tests)
def johansen_coint(df, report=True): samples = data_frame_to_samples(df) m, _ = samples.shape df = pandas.DataFrame(samples.T) result = vecm.coint_johansen(df, 0, 1) l = result.lr1 cv = result.cvt # 0: 90% 1:95% 2: 99% rank = None for r in range(m): if report: print(f"Critical Value: {cv[r, 2]}, Trace Statistic: {l[r]}") if l[r] < cv[r, 2]: rank = r break ρ2 = result.eig M = numpy.matrix(result.evec) if report: print(f"Rank={rank}") print("Eigen Values\n", ρ2) print("Eigen Vectors\n", M) if rank is None: print("Reduced Rank Solution Does Not Exist") return None return ρ2[:rank], M[:,:rank]
def cointegration_test(self, df, signif=0.05): """Perform Johanson's Cointegration Test and Report Summary""" st.subheader('cointegration test') out = coint_johansen(df, -1, 5) d = {'0.90': 0, '0.95': 1, '0.99': 2} traces = out.lr1 cvts = out.cvt[:, d[str(1 - signif)]] def adjust(val, length=6): return str(val).ljust(length) # Summary # print('Name :: Test Stat > C(95%) => Signif \n', '--'*20) vet_name = [] vet_test = [] vet_c = [] vet_sign = [] for col, trace, cvt in zip(df.columns, traces, cvts): vet_name.append(adjust(col)) vet_test.append(adjust(round(trace, 2), 9)) vet_c.append(adjust(cvt, 8)) vet_sign.append(trace > cvt) # print(adjust(col), ':: ', adjust(round(trace,2), 9), ">", adjust(cvt, 8), ' => ' , trace > cvt) df_cointegration = pd.DataFrame() df_cointegration['name'] = vet_name df_cointegration['test'] = vet_test df_cointegration['c(95%)'] = vet_c df_cointegration['signif'] = vet_sign st.dataframe(df_cointegration)
def setup_class(cls): cls.res = coint_johansen(dta, 1, 2) cls.nobs_r = 173 - 1 - 2 cls.res1_m = np.array([241.985452556075, 166.4781461662553, 110.3298006342814, 70.79801574443575, 44.90887371527634, 27.22385073668511, 11.74205493173769, 3.295435325623445, 169.0618, 133.7852, 102.4674, 75.1027, 51.6492, 32.0645, 16.1619, 2.7055, 175.1584, 139.278, 107.3429, 79.34220000000001, 55.2459, 35.0116, 18.3985, 3.8415, 187.1891, 150.0778, 116.9829, 87.7748, 62.5202, 41.0815, 23.1485, 6.6349]) cls.res2_m = np.array([75.50730638981975, 56.14834553197396, 39.5317848898456, 25.8891420291594, 17.68502297859124, 15.48179580494741, 8.446619606114249, 3.295435325623445, 52.5858, 46.5583, 40.5244, 34.4202, 28.2398, 21.8731, 15.0006, 2.7055, 55.7302, 49.5875, 43.4183, 37.1646, 30.8151, 24.2522, 17.1481, 3.8415, 62.1741, 55.8171, 49.4095, 42.8612, 36.193, 29.2631, 21.7465, 6.6349,]) evec = np.array([ 0.01102517075074406, -0.2185481584930077, 0.04565819524210763, -0.06556394587400775, 0.04711496306104131, -0.1500111976629196, 0.03775327003706507, 0.03479475877437702, 0.007517888890275335, -0.2014629352546497, 0.01526001455616041, 0.0707900418057458, -0.002388919695513273, 0.04486516694838273, -0.02936314422571188, 0.009900554050392113, 0.02846074144367176, 0.02021385478834498, -0.04276914888645468, 0.1738024290422287, 0.07821155002012749, -0.1066523077111768, -0.3011042488399306, 0.04965189679477353, 0.07141291326159237, -0.01406702689857725, -0.07842109866080313, -0.04773566072362181, -0.04768640728128824, -0.04428737926285261, 0.4143225656833862, 0.04512787132114879, -0.06817130121837202, 0.2246249779872569, -0.009356548567565763, 0.006685350535849125, -0.02040894506833539, 0.008131690308487425, -0.2503209797396666, 0.01560186979508953, 0.03327070126502506, -0.263036624535624, -0.04669882107497259, 0.0146457545413255, 0.01408691619062709, 0.1004753600191269, -0.02239205763487946, -0.02169291468272568, 0.08782313160608619, -0.07696508791577318, 0.008925177304198475, -0.06230900392092828, -0.01548907461158638, 0.04574831652028973, -0.2972228156126774, 0.003469819004961912, -0.001868995544352928, 0.05993345996347871, 0.01213394328069316, 0.02096614212178651, -0.08624395993789938, 0.02108183181049973, -0.08470307289295617, -5.135072530480897e-005]) cls.evec_m = evec.reshape(cls.res.evec.shape, order='F') cls.eig_m = np.array([0.3586376068088151, 0.2812806889719111, 0.2074818815675726, 0.141259991767926, 0.09880133062878599, 0.08704563854307619, 0.048471840356709, 0.01919823444066367])
def setup_class(cls): cls.res = coint_johansen(dta, 0, 9) cls.nobs_r = 173 - 1 - 9 #fprintf(1, '%18.16g, ', r1) cls.res1_m = np.array([307.6888935095814, 205.3839229398245, 129.1330243009336, 83.3101865760208, 52.51955460357912, 30.20027050520502, 13.84158157562689, 0.4117390188204866, 153.6341, 120.3673, 91.109, 65.8202, 44.4929, 27.0669, 13.4294, 2.7055, 159.529, 125.6185, 95.7542, 69.8189, 47.8545, 29.7961, 15.4943, 3.8415, 171.0905, 135.9825, 104.9637, 77.8202, 54.6815, 35.4628, 19.9349, 6.6349]) #r2 = [res.lr2 res.cvm] cls.res2_m = np.array([102.3049705697569, 76.25089863889085, 45.82283772491284, 30.7906319724417, 22.31928409837409, 16.35868892957814, 13.4298425568064, 0.4117390188204866, 49.2855, 43.2947, 37.2786, 31.2379, 25.1236, 18.8928, 12.2971, 2.7055, 52.3622, 46.2299, 40.0763, 33.8777, 27.5858, 21.1314, 14.2639, 3.8415, 58.6634, 52.3069, 45.8662, 39.3693, 32.7172, 25.865, 18.52, 6.6349])
def Johansen(self, p, verbose): """ Get the cointegration vectors at 95% level of significance given by the trace statistic test. """ y = self.data[self.name_lyst] N, l = y.shape jres = coint_johansen(y, 0, p) tr_stats = pd.DataFrame(jres.lr1, columns={"Trace Statistic"}) tr_stats.index.names = ["NULL: r <= "] tr_stats["Criti_90%"], tr_stats["Criti_95%"], tr_stats[ "Criti_99%"] = jres.cvt[:, 0], jres.cvt[:, 1], jres.cvt[:, 2] eign_stats = pd.DataFrame(jres.lr2, columns={"Eigen Statistic"}) eign_stats.index.names = ["NULL: r <= "] eign_stats["Criti_90%"], eign_stats["Criti_95%"], eign_stats[ "Criti_99%"] = jres.cvm[:, 0], jres.cvm[:, 1], jres.cvm[:, 2] eigen = pd.DataFrame(jres.eig, columns={"Eigen Value"}) EVEC = pd.DataFrame(jres.evec) if verbose == True: print(tr_stats, "\n") print(eign_stats, "\n") print(eigen, "\n") print(EVEC) jres.trace = (tr_stats["Trace Statistic"] > tr_stats["Criti_95%"]) jres.eigen = (eign_stats["Eigen Statistic"] > eign_stats["Criti_95%"]) jres.max_eigen_ix = np.argmax(jres.eig) jres.max_evec = EVEC[jres.max_eigen_ix] return jres, np.dot(y, jres.max_evec)
def get_johansen(y, p): """ Get the cointegration vectors at 95% level of significance given by the trace statistic test. """ return_vec = [] try: result = coint_johansen(y, det_order=0, k_ar_diff=p) result_table = np.hstack((np.expand_dims(np.round(result.lr2, 4), axis=1), result.cvm)) result_evec = np.round(result.evec, 4) #print('This is the result table {}'.format(result_table)) for i in range(result_table.shape[0]): if result_table[i][0] > result_table[i][2]: continue else: return_vec.append(i) break # if return_vec is not None: # highest_eigval_indx = np.argmax(np.max(result.eig, axis=0)) # highest_eigvec = result_evec[:,highest_eigval_indx] # return_vec.append(list(highest_eigvec)) return return_vec except np.linalg.LinAlgError: return None
def cointegration_test(data, alpha=0.05): ''' Cointegration test: To find out how many lagging terms are required for a TS to become stationary. With two or more TS, they are considered cointegrated if they have a statistically significant relationship. This means, there exists a linear combination of them that has an order of integration less than that of the individual series. - https://en.wikipedia.org/wiki/Cointegration - http://www-stat.wharton.upenn.edu/~steele/Courses/434/434Context/Co-integration/Murray93DrunkAndDog.pdf - https://en.m.wikipedia.org/wiki/Johansen_test - https://en.wikipedia.org/wiki/Error_correction_model null hypothesis: no cointegrating equations, alternate hypothesis: at least 1 cointegrating relationship ''' out = coint_johansen(data, -1, 5) d = {'0.90': 0, '0.95': 1, '0.99': 2} traces = out.lr1 cvts = out.cvt[:, d[str(1 - alpha)]] def adjust(val, length=6): return str(val).ljust(length) # Summary print('Name :: Test Stat > C(95%) => Signif \n', '--' * 20) for col, trace, cvt in zip(data.columns, traces, cvts): print(adjust(col), ':: ', adjust(round(trace, 2), 9), ">", adjust(cvt, 8), ' => ', trace > cvt)
def check(self): _log_price_a = np.log(self._prices_a) _log_price_b = np.log(self._prices_b) _values = np.stack((_log_price_b, _log_price_a), axis=-1) rst = coint_johansen(_values, det_order=0, k_ar_diff=1) beta_b, beta_a = rst.evec[0] # self._spread = _log_price_b * beta_b + _log_price_a * beta_a # res_adf = adfuller(self._spread, maxlag=1, regression='c', autolag=None) # print(res_adf) self._beta_b = beta_b self._beta_a = beta_a self._beta = beta_a / beta_b self._spread = _log_price_b + _log_price_a * beta_a / beta_b res_adf = adfuller(self._spread, maxlag=1, regression='c', autolag=None) # ipdb.set_trace() self._p_value = mackinnonp(res_adf[0], regression='c', N=2) self._t_stats = res_adf[0]
def cointegration_test(data): #checking stationarity from statsmodels.tsa.vector_ar.vecm import coint_johansen # if all absolute eigen values are less than 1 data are stationary res = coint_johansen(data, -1, 1).eig return res
def johansen_test_result(self): self.data = self.import_data() result = coint_johansen(self.data, 0, 1) self.share_allocation = result.evec[:, 0] self.data['port'] = pd.DataFrame.sum(self.share_allocation * self.data, axis=1) return self.data
def test_coint_johansen_0lag(reset_randomstate): # GH 5731 x_diff = np.random.normal(0, 1, 1000) x = pd.Series(np.cumsum(x_diff)) e1 = np.random.normal(0, 1, 1000) y = x + 5 + e1 data = pd.concat([x, y], axis=1) result = coint_johansen(data, det_order=-1, k_ar_diff=0) assert result.eig.shape == (2, )
def setup_class(cls): cls.res = coint_johansen(dta, 2, 5) cls.nobs_r = 173 - 1 - 5 #Note: critical values not available if trend>1 cls.res1_m = np.array([270.1887263915158, 171.6870096307863, 107.8613367358704, 70.82424032233558, 44.62551818267534, 25.74352073857572, 14.17882426926978, 4.288656185006764, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) cls.res1_m[cls.res1_m == 0] = np.nan cls.res2_m = np.array([98.50171676072955, 63.82567289491584, 37.03709641353485, 26.19872213966024, 18.88199744409963, 11.56469646930594, 9.890168084263012, 4.288656185006764, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) cls.res2_m[cls.res2_m == 0] = np.nan
def johansen_trace(y, p): N, l = y.shape joh_trace = coint_johansen(y, 0, p) r = 0 for i in range(l): if joh_trace.lr1[i] > joh_trace.cvt[i, 1]: r = i + 1 joh_trace.r = r return joh_trace
def johansen_Test(data,det_order,lagged_diff): results = vecm.coint_johansen(data, det_order, lagged_diff) format_res = [] format_res.append(results.eig) format_res.append(results.lr2) cols = ["eig","max eig",'90%',"95%","90%"] df = pd.DataFrame(np.hstack((np.array(format_res).T,results.cvm))) df.columns=cols df.index=["H(0)","H(1)"] return df
def setup_class(cls): with warnings.catch_warnings(): warnings.simplefilter("ignore", category=HypothesisTestWarning) cls.res = coint_johansen(dta, 2, 5) cls.nobs_r = 173 - 1 - 5 #Note: critical values not available if trend>1 cls.res1_m = np.array([270.1887263915158, 171.6870096307863, 107.8613367358704, 70.82424032233558, 44.62551818267534, 25.74352073857572, 14.17882426926978, 4.288656185006764, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) cls.res1_m[cls.res1_m == 0] = np.nan cls.res2_m = np.array([98.50171676072955, 63.82567289491584, 37.03709641353485, 26.19872213966024, 18.88199744409963, 11.56469646930594, 9.890168084263012, 4.288656185006764, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) cls.res2_m[cls.res2_m == 0] = np.nan
def cointegration_test(df, alpha=0.05): out = coint_johansen(df,-1,5) d = {'0.90':0, '0.95':1, '0.99':2} traces = out.lr1 cvts = out.cvt[:, d[str(1-alpha)]] def adjust(val, length= 6): return str(val).ljust(length) # Summary print('Name :: Test Stat > C(95%) => Signif \n', '--'*20) for col, trace, cvt in zip(df.columns, traces, cvts): print(adjust(col), ':: ', adjust(round(trace,2), 9), ">", adjust(cvt, 8), ' => ' , trace > cvt)
def coint_Johansen(data, det_order, k_ar_diff, return_pvalue=True): res = coint_johansen(data, det_order, k_ar_diff) stat = res.lr1[0] if not return_pvalue: return stat else: levels = (0.1 - 1e-6, 0.05 - 1e-6, 0.01 - 1e-6) critical_values = res.cvt[0] where = (stat > critical_values).nonzero()[0] if len(where): pvalue = levels[where[-1]] else: pvalue = 1. return stat, pvalue
def calculate_cointegration_johansen2(dataframe, k=1): """Checks for cointegration between two or MORE data series""" coint_data = dataframe #coint_data = coint_data.dropna() #coint_data.to_csv(store_dir + '/' + 'cointdata.csv') johansen_test = coint_johansen(coint_data, det_order=0, k_ar_diff=k) print('Johansen Test') print('Trace Stat: ', johansen_test.trace_stat) print('Trace critical values: \n ', johansen_test.trace_stat_crit_vals) print('Max EigenVectors : ', johansen_test.max_eig_stat) print('Max EigenVectors critical values: \n', johansen_test.max_eig_stat_crit_vals) print('EigenValues: \n', johansen_test.eig)
def calculate_cointegration_johansen(depseries, indepseries, k=1): """Checks for cointegration between two or MORE data series""" coint_data = pd.concat([depseries, indepseries], axis=1, keys=['<DEPSERIES>', '<INDEPSERIES>'], join='outer') coint_data = coint_data.dropna() coint_data.to_csv(store_dir + '/' + 'cointdata.csv') johansen_test = coint_johansen(coint_data, det_order=0, k_ar_diff=k) print('Johansen Test') print('Trace Stat: ', johansen_test.trace_stat) print('Trace critical values: \n ', johansen_test.trace_stat_crit_vals) print('Max EigenVectors : ', johansen_test.max_eig_stat) print('Max EigenVectors critical values: \n', johansen_test.max_eig_stat_crit_vals)
def run_johansen_test(data): result = coint_johansen(data, det_order=0, k_ar_diff=1) """r = 0 means no cointegration, r<=1 means up to one cointegration relationship etc We have m hypothesised numbers of cointegrated equations: here at most 0, at most 1 cvt - Critical values (90%, 95%, 99%) of trace statistic lr1 - Trace statistic Trace test: H0: 0 cointegration equations H1: coint. eq. exist > 0 explanation https://www.youtube.com/watch?v=TB4m9M1sIJ0 """ stat_r0 = result.lr1[0] crits_r0 = result.cvt[0] # eig_stat_r0 = result.lr2[0] # eig_crits_r0 = result.cvm[0] stat_res = trace_results(stat_r0, crits_r0) # there are 0 coint. equations. pass if rejected # eig_res = trace_results(eig_stat_r0, eig_crits_r0) # there are 0 coint. equations. pass if rejected return stat_res
def _search_best_coint_vec(self, comb): self._logger.info("Processing {0}...".format(",".join(comb))) #comb = ('EURJPY Index','GBPJPY Index', 'CHFJPY Index', 'AUDJPY Index', 'NZDJPY Index') comb = np.sort(comb).tolist() weight_df = pd.DataFrame() pvalue_list = [] for i in tqdm(range(self._term, self._fx_rate_df.shape[0])): value_date = self._fx_rate_df.index[i] start_date = self._fx_rate_df.index[i-self._term] #value_date = date(2019,1,4) #start_date = value_date - relativedelta(weeks=self._term) target_fx = self._fx_rate_df[list(comb)].query("index>@start_date & index<=@value_date") min_pvalue = 1.0 target_vec = [] eigen_vec = coint_johansen(#endog=self._fx_rate_df[list(comb)].query("index>@start_date & index<=@value_date"),#.iloc[i - self._term:i], endog=target_fx, det_order=self._order, k_ar_diff=self._ar_diff).evec for j in range(len(eigen_vec)): try: pvalue = sm.tsa.stattools.adfuller((target_fx*eigen_vec[j]).sum(axis=1), #(self._fx_rate_df[list(comb)].iloc[i - self._term:i] * eigen_vec[j]).sum(axis=1), #(self._fx_rate_df[list(comb)].query("index>@start_date & index<=@value_date") * eigen_vec[j]).sum(axis=1), regression=self._reg)[1] except: pvalue = 1.0 if min_pvalue >= pvalue: min_pvalue = pvalue target_vec = eigen_vec[j] pvalue_list.append(min_pvalue) #import pdb;pdb.set_trace() weight_df = weight_df.append(pd.DataFrame(np.array([np.repeat(','.join(comb), len(target_vec)), comb, target_vec]).T, index=np.repeat(value_date, len(target_vec)), columns=['Portfolio', 'Ccy', 'Weight'])) weight_df.index.name='ValueDate' pvalue_df = pd.DataFrame(pvalue_list, columns=[",".join(comb)], index=self._fx_rate_df.index[self._term:]) return pvalue_df, weight_df
def get_johansen(self, y, p): """ Get the cointegration vectors at 95% level of significance given by the trace statistic test. """ N, l = y.shape jres = coint_johansen(y, 0, p) trstat = jres.lr1 # trace statistic tsignf = jres.cvt # critical values print(trstat) print(tsignf) for i in range(l): if trstat[i] > tsignf[i, 1]: # 0: 90% 1:95% 2: 99% r = i + 1 jres.r = r jres.evecr = jres.evec[:, :r] return jres
def get_hedge_ratio(self, pair_prices): """ Helper function that uses the Johansen test to calculate hedge ratio. This is applied to the pair prices on a rolling basis in prices_to_signals. """ pair_prices = pair_prices.dropna() # Skip if we don't have at least 75% of the expected observations if len(pair_prices) < self.LOOKBACK_WINDOW * 0.75: return pd.Series(0, index=pair_prices.columns) # The second and third parameters indicate constant term, with a lag of 1. # See Chan, Algorithmic Trading, chapter 2. result = coint_johansen(pair_prices, 0, 1) # The first column of eigenvectors contains the best weights weights = list(result.evec[0]) return pd.Series(weights, index=pair_prices.columns)
def generate_hedge_ratio_from_df(df): """ Uses matrix generated from df to calcuate hedge ratio with coint_johansen statistical test Parameters: :param df: pd.DataFrame to generate hedge_ratio for :type df: pd.DataFrame :return: hedge ratio :rtype: List """ ts_row, ts_col = df.shape matrix = np.zeros((ts_row, ts_col)) for i, sec in enumerate(df): matrix[:, i] = df[sec] results = jh.coint_johansen(matrix, 0, 1) return results.evec[:, 0]
def setup_class(cls): cls.res = coint_johansen(dta, -1, 8) cls.nobs_r = 173 - 1 - 8 cls.res1_m = np.array([260.6786029744658, 162.7966072512681, 105.8253545950566, 71.16133060790817, 47.68490211260372, 28.11843682526138, 13.03968537077271, 2.25398078597622, 137.9954, 106.7351, 79.5329, 56.2839, 37.0339, 21.7781, 10.4741, 2.9762, 143.6691, 111.7797, 83.9383, 60.0627, 40.1749, 24.2761, 12.3212, 4.1296, 154.7977, 121.7375, 92.7136, 67.63670000000001, 46.5716, 29.5147, 16.364, 6.9406]) cls.res2_m = np.array([97.88199572319769, 56.97125265621156, 34.66402398714837, 23.47642849530445, 19.56646528734234, 15.07875145448866, 10.7857045847965, 2.25398078597622, 45.893, 39.9085, 33.9271, 27.916, 21.837, 15.7175, 9.4748, 2.9762, 48.8795, 42.7679, 36.6301, 30.4428, 24.1592, 17.7961, 11.2246, 4.1296, 55.0335, 48.6606, 42.2333, 35.7359, 29.0609, 22.2519, 15.0923, 6.9406])