def get_johansen(y, p): """ Get the cointegration vectors at 95% level of significance given by the trace statistic test. """ N, l = y.shape jres = coint_johansen(y, 0, p) trstat = jres.lr1 # trace statistic tsignf = jres.cvt # critical values for i in range(l): if trstat[i] > tsignf[i, 1]: # 0: 90% 1:95% 2: 99% r = i + 1 jres.r = r jres.evecr = jres.evec[:, :r] return jres
if __name__ == '__main__': #Use in-sample data to calculate the Eigen Vector symbols = ['FED/RXI_N_B_JA', 'LBMA/GOLD'] num_assets = len(symbols) FX_data = quandl_stocks(symbols, [1, 2], (2010, 6, 10), (2015, 6, 9)) FX_data.fillna(method='ffill', inplace=True) FX_data.iloc[:, 0] = 1 / FX_data.iloc[:, 0] FX_log_data = pd.DataFrame(index=FX_data.index.copy()) FX_log_data = np.log(FX_data) FX_log_ret_data = pd.DataFrame(index=FX_data.index.copy()) jres = coint_johansen(FX_log_data, 0, 1) # The inputs are log price levels. weights = jres.evec[0, :] / jres.evec[0, 0] # The Eigen Vector is calculated. #Use out-of-sample data to calculate the stationary series to generate trading signals. FX_data = quandl_stocks(symbols, [1, 2], (2015, 6, 10), (2018, 6, 10)) FX_data.fillna(method='ffill', inplace=True) FX_data.iloc[:, 0] = 1 / FX_data.iloc[:, 0] FX_log_data = np.log(FX_data) coint_series = np.matmul(FX_log_data, weights.reshape(2, 1)) coint_series_data = pd.DataFrame(index=FX_data.index.copy()) coint_series_data['value'] = coint_series coint_series_data.plot() z = zscore(coint_series_data, 26) z.fillna(0, inplace=True)
def main(): ########################################################################### # import data from CSV file ########################################################################### path = my_path('PC') data = pd.read_csv(path + 'CAD_AUD.csv', index_col='Date') CAD = 1 / pd.DataFrame(data['USDCAD']) AUD = pd.DataFrame(data['AUDUSD'], index=data.index) y = AUD.join(CAD) # strategy parameters lookback = 20 trainlen = 250 hedgeratio = pd.DataFrame(np.zeros([len(data), 2]), columns=y.columns, index=y.index) numunits = pd.DataFrame(np.zeros([len(data), 1]), columns=['numunits'], index=y.index) for t in range(trainlen + 1, len(data)): results = coint_johansen(log(data[t - trainlen:t]), 0, 1, print_on_console=False) hedgeratio.iloc[t] = results.evec[0, 0], results.evec[1, 0] # we apply the t+0 hedgeratio to all the data in the lookback period # for calculation of the yport yport = np.sum(y[t - lookback:t] * repmat(hedgeratio.iloc[t], lookback, 1), axis=1) ma = np.mean(yport) std = np.std(yport) numunits.iloc[t] = -(yport[-1] - ma) / std positions = repmat(numunits, 1, 2) * hedgeratio * y pnl = np.sum(positions.shift(1) * y.pct_change(1), axis=1) rtn = pnl / np.sum(np.abs(positions.shift(1)), axis=1) rtn = rtn[trainlen + 2:] #rtn.to_csv(path+'rtn.csv') cumret = pd.DataFrame(np.cumprod((1 + rtn)) - 1, index=rtn.index) #print(rtn.head()) print(cumret.head()) print(cumret.tail()) #cumret = cumret.fillna(method='pad') # compute performance statistics sharpe = (np.sqrt(252) * np.mean(rtn)) / np.std(rtn) APR = np.prod(1 + rtn)**(252 / len(rtn)) - 1 ################################################## # print the results ################################################## print('Sharpe: {:.4}'.format(sharpe)) print('APR: {:.4%}'.format(APR)) ########################################################################### # plotting the chart ########################################################################### fig = plt.figure() ax = fig.add_subplot(111) ax.plot(cumret) ax.set_title('C') ax.set_xlabel('Data points') ax.set_ylabel('acumm rtn') ax.text(1000, -0.05, 'Sharpe: {:.4}'.format(sharpe)) ax.text(1000, 0, 'APR: {:.4%}'.format(APR)) plt.show()
def main(): ########################################################################### # import data from CSV file ########################################################################### path = my_path('PC') data = pd.read_csv(path + 'CAD_AUD.csv' , index_col='Date') CAD = 1 / pd.DataFrame(data['USDCAD']) AUD = pd.DataFrame(data['AUDUSD'], index=data.index) y = AUD.join(CAD) # strategy parameters lookback = 20 trainlen = 250 hedgeratio = pd.DataFrame(np.zeros([len(data),2]), columns=y.columns, index=y.index) numunits = pd.DataFrame(np.zeros([len(data),1]), columns=['numunits'], index=y.index) for t in range(trainlen+1,len(data)): results = coint_johansen(log(data[t-trainlen:t]), 0, 1 , print_on_console=False) hedgeratio.iloc[t] = results.evec[0,0], results.evec[1,0] # we apply the t+0 hedgeratio to all the data in the lookback period # for calculation of the yport yport = np.sum(y[t-lookback:t] * repmat(hedgeratio.iloc[t], lookback, 1), axis=1) ma = np.mean(yport) std = np.std(yport) numunits.iloc[t] = -(yport[-1] - ma) / std positions = repmat(numunits, 1, 2) * hedgeratio * y pnl = np.sum(positions.shift(1) * y.pct_change(1), axis=1) rtn = pnl / np.sum(np.abs(positions.shift(1)), axis=1) rtn= rtn[trainlen+2:] #rtn.to_csv(path+'rtn.csv') cumret = pd.DataFrame(np.cumprod((1+rtn))-1, index=rtn.index) #print(rtn.head()) print(cumret.head()) print(cumret.tail()) #cumret = cumret.fillna(method='pad') # compute performance statistics sharpe = (np.sqrt(252)*np.mean(rtn)) / np.std(rtn) APR = np.prod(1+rtn)**(252/len(rtn))-1 ################################################## # print the results ################################################## print('Sharpe: {:.4}'.format(sharpe)) print('APR: {:.4%}'.format(APR)) ########################################################################### # plotting the chart ########################################################################### fig = plt.figure() ax = fig.add_subplot(111) ax.plot(cumret) ax.set_title('C') ax.set_xlabel('Data points') ax.set_ylabel('acumm rtn') ax.text(1000, -0.05, 'Sharpe: {:.4}'.format(sharpe)) ax.text(1000, 0, 'APR: {:.4%}'.format(APR)) plt.show()
# MAC: '/Users/Javi/Documents/MarketData/' # WIN: 'C:/Users/javgar119/Documents/Python/Data' filename = 'GLD_SLV_daily.csv' full_path = root_path + filename data = pd.read_csv(full_path, index_col='Date') #create a series with the data range asked #start_date = '2010-01-13' #end_date = '2014-05-13' #data = subset_dataframe(data, start_date, end_date) #print('data import is {} lines'.format(str(len(data)))) #print(data.head(10)) #print(data.tail(5)) #johansen test with non-zero offset but zero drift, and with the lag k=1. results = coint_johansen(data, 0, 1) # those are the weigths of the portfolio # the first eigenvector because it shows the strongest cointegration relationship w = results.evec[:, 0] print('Best eigenvector is: {}.'.format(str(w))) # (net) market value of portfolio # this is the syntetic asset we are going to trade. A freshly # new mean reverting serie compose of the three assets in # proportions given by the eigenvector yport = pd.DataFrame.sum(w*data, axis=1) #print('yport') #print(yport.tail(10))
stks_list = list(stks.columns) isCoint = pd.DataFrame(np.zeros([1,len(stks_list)], dtype=bool), columns=stks_list, index=['isCoint']) # set the confidence level confidence = 0.90 if confidence==0.95: X=1 elif confidence==0.99:x=2 else: x=0 for col in isCoint: # join the SPY to each of the stocks members in a Nx2 dataframe # clean for missing values y2 = etf_train.join(stks_train[col]).dropna() # run johansen test for each of the N stocks vs SPY. if len(y2) > 250: results = coint_johansen(y2, 0, 1, print_on_console=False) if results.lr1[0] > results.cvt[0,x]: isCoint[col] = True print('Johansen Test results for each stock vs SPY') print('--------------------------------------------------') print('Universe is {} stocks in the index'.format(len(stks_list))) print('Johansen Test over {} data points'.format(len(stks_train))) print('There are {} stocks that cointegrate with {:.2%} confidence'. format(np.count_nonzero(isCoint), confidence)) print('--------------------------------------------------') print() print() # capital allocation
isCoint = pd.DataFrame(np.zeros([1, len(stks_list)], dtype=bool), columns=stks_list, index=['isCoint']) # set the confidence level confidence = 0.90 if confidence == 0.95: X = 1 elif confidence == 0.99: x = 2 else: x = 0 for col in isCoint: # join the SPY to each of the stocks members in a Nx2 dataframe # clean for missing values y2 = etf_train.join(stks_train[col]).dropna() # run johansen test for each of the N stocks vs SPY. if len(y2) > 250: results = coint_johansen(y2, 0, 1, print_on_console=False) if results.lr1[0] > results.cvt[0, x]: isCoint[col] = True print('Johansen Test results for each stock vs SPY') print('--------------------------------------------------') print('Universe is {} stocks in the index'.format(len(stks_list))) print('Johansen Test over {} data points'.format(len(stks_train))) print('There are {} stocks that cointegrate with {:.2%} confidence'.format( np.count_nonzero(isCoint), confidence)) print('--------------------------------------------------') print() print() # capital allocation yN = repmat(isCoint, len(stks_train), 1) * stks_train