def setup_class(cls): cls.res = coint_johansen(dta, 2, 5) cls.nobs_r = 173 - 1 - 5 #Note: critical values not available if trend>1 cls.res1_m = np.array([270.1887263915158, 171.6870096307863, 107.8613367358704, 70.82424032233558, 44.62551818267534, 25.74352073857572, 14.17882426926978, 4.288656185006764, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) cls.res2_m = np.array([98.50171676072955, 63.82567289491584, 37.03709641353485, 26.19872213966024, 18.88199744409963, 11.56469646930594, 9.890168084263012, 4.288656185006764, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
def cointegrated_series(Ar, statTest = True): #Ar = [[TS(1)E(1), TS(2)E(1), ...TS(n)E(1)] # [TS(1)E(2), TS(2)E(2), ...TS(n)E(2)] # ... # [TS(1)E(n), TS(2)E(n), ...TS(n)E(n)]] johansenTest = coint_johansen(Ar,0,2) lookback, marketVal, evec = halfLife_coint(Ar, johansenTest.evec[:,0]) marketVal = pd.DataFrame(marketVal) MA = pd.DataFrame(np.transpose(movingAve(marketVal.values, lookback))) SD = pd.DataFrame(np.transpose(movingStd(marketVal.values, lookback))) numUnits = -(marketVal-MA)/SD AA = repmat(numUnits,1,len(np.transpose(Ar))) BB = np.multiply(repmat(evec,len(Ar),1),Ar) positions = np.multiply(AA,BB) pnl = np.sum(np.divide(np.multiply(positions[:-1],np.diff(Ar,axis = 0)), Ar[:-1]),1) returns = np.divide(pnl,np.roll(np.sum(abs(positions[:-1]),1),-1)) pnlCumSum = [0]*len(pnl) for count, pnlsum in enumerate(returns): if count>=int(lookback): pnlCumSum[count]+=pnlCumSum[count-1]+pnlsum else: pnlCumSum[count]=0 if statTest==True: print('Lookback:\t',lookback) plt.plot(pnlCumSum) plt.show()
def setup_class(cls): cls.res = coint_johansen(dta, 0, 9) cls.nobs_r = 173 - 1 - 9 #fprintf(1, '%18.16g, ', r1) cls.res1_m = np.array([307.6888935095814, 205.3839229398245, 129.1330243009336, 83.3101865760208, 52.51955460357912, 30.20027050520502, 13.84158157562689, 0.4117390188204866, 153.6341, 120.3673, 91.109, 65.8202, 44.4929, 27.0669, 13.4294, 2.7055, 159.529, 125.6185, 95.7542, 69.8189, 47.8545, 29.7961, 15.4943, 3.8415, 171.0905, 135.9825, 104.9637, 77.8202, 54.6815, 35.4628, 19.9349, 6.6349]) #r2 = [res.lr2 res.cvm] cls.res2_m = np.array([102.3049705697569, 76.25089863889085, 45.82283772491284, 30.7906319724417, 22.31928409837409, 16.35868892957814, 13.4298425568064, 0.4117390188204866, 49.2855, 43.2947, 37.2786, 31.2379, 25.1236, 18.8928, 12.2971, 2.7055, 52.3622, 46.2299, 40.0763, 33.8777, 27.5858, 21.1314, 14.2639, 3.8415, 58.6634, 52.3069, 45.8662, 39.3693, 32.7172, 25.865, 18.52, 6.6349])
def setup_class(cls): cls.res = coint_johansen(dta, 1, 2) cls.nobs_r = 173 - 1 - 2 cls.res1_m = np.array([241.985452556075, 166.4781461662553, 110.3298006342814, 70.79801574443575, 44.90887371527634, 27.22385073668511, 11.74205493173769, 3.295435325623445, 169.0618, 133.7852, 102.4674, 75.1027, 51.6492, 32.0645, 16.1619, 2.7055, 175.1584, 139.278, 107.3429, 79.34220000000001, 55.2459, 35.0116, 18.3985, 3.8415, 187.1891, 150.0778, 116.9829, 87.7748, 62.5202, 41.0815, 23.1485, 6.6349]) cls.res2_m = np.array([75.50730638981975, 56.14834553197396, 39.5317848898456, 25.8891420291594, 17.68502297859124, 15.48179580494741, 8.446619606114249, 3.295435325623445, 52.5858, 46.5583, 40.5244, 34.4202, 28.2398, 21.8731, 15.0006, 2.7055, 55.7302, 49.5875, 43.4183, 37.1646, 30.8151, 24.2522, 17.1481, 3.8415, 62.1741, 55.8171, 49.4095, 42.8612, 36.193, 29.2631, 21.7465, 6.6349,]) evec = np.array([0.01102517075074406, -0.2185481584930077, 0.04565819524210763, -0.06556394587400775, 0.04711496306104131, -0.1500111976629196, 0.03775327003706507, 0.03479475877437702, 0.007517888890275335, -0.2014629352546497, 0.01526001455616041, 0.0707900418057458, -0.002388919695513273, 0.04486516694838273, -0.02936314422571188, 0.009900554050392113, 0.02846074144367176, 0.02021385478834498, -0.04276914888645468, 0.1738024290422287, 0.07821155002012749, -0.1066523077111768, -0.3011042488399306, 0.04965189679477353, 0.07141291326159237, -0.01406702689857725, -0.07842109866080313, -0.04773566072362181, -0.04768640728128824, -0.04428737926285261, 0.4143225656833862, 0.04512787132114879, -0.06817130121837202, 0.2246249779872569, -0.009356548567565763, 0.006685350535849125, -0.02040894506833539, 0.008131690308487425, -0.2503209797396666, 0.01560186979508953, 0.03327070126502506, -0.263036624535624, -0.04669882107497259, 0.0146457545413255, 0.01408691619062709, 0.1004753600191269, -0.02239205763487946, -0.02169291468272568, 0.08782313160608619, -0.07696508791577318, 0.008925177304198475, -0.06230900392092828, -0.01548907461158638, 0.04574831652028973, -0.2972228156126774, 0.003469819004961912, -0.001868995544352928, 0.05993345996347871, 0.01213394328069316, 0.02096614212178651, -0.08624395993789938, 0.02108183181049973, -0.08470307289295617, -5.135072530480897e-005]) cls.evec_m = evec.reshape(cls.res.evec.shape, order='F') cls.eig_m = np.array([0.3586376068088151, 0.2812806889719111, 0.2074818815675726, 0.141259991767926, 0.09880133062878599, 0.08704563854307619, 0.048471840356709, 0.01919823444066367])
def generate_positions(self, long_only, beg_date, end_date=datetime.date.today(), max_holding_period=20): """Method builds best combination of positions that you can access with get_positions. Returns True if they were generated and False if there is no way they can be generated (no stationarity) or on error. Time Frame parameter sets look back period for stationarity check and bollinger bands. Default is 20.""" result = [] names = [] securities_list= [] if isinstance(self._securities, list): securities_list = self._securities else: raise Exception("Error: for cointegration strategy you have to choose more than one security.") #generate the matrix of price series: prices = [] # CHECK ALL THE DATES OF PRICES ARE THE SAME: for i in range(0, len(securities_list)): tmp = securities_list[i] print("I:{}".format(i)) prices.append([ tmp.get_dates(beg_date, end_date), tmp.get_prices(beg_date, end_date)]) print(prices[0]) #<<<<<<<<<<<<<<<<<<<<<<<<<< shortest_dates_length = len(prices[0][0]) shortest_dates_index = 0 max = len(prices) for i in range(1, max): tmp_length = len(prices[i][0]) if tmp_length < shortest_dates_length: shortest_dates_length = tmp_length shortest_dates_index = i df = pd.DataFrame({"dates": prices[shortest_dates_index][0], securities_list[shortest_dates_index].get_name(): prices[shortest_dates_index][1]}) print("::::::::::::::::::::::::::::::") print(df) for i in range(0, len(securities_list)): print("current i is {}".format(i)) print("Name is {}".format(securities_list[i].get_name())) if i == shortest_dates_index: continue df_new = pd.DataFrame({"dates": prices[i][0], securities_list[i].get_name(): prices[i][1]}) df = df.merge(df_new, on='dates', how="inner") # NOW df should contain only dates and prices for the shortest period of dates among all the securities... print("..................................................") print(df) # MAYBE RETURN THE BEGINNIG OF ANALYZED PERIOD AND END? extra_output = [] extra_output.append("The beginning of the analyzed period is {}".format(df['dates'].irow(0))) extra_output.append("The end of the analyzed period is {}".format(df['dates'].irow(-1))) df = df.drop("dates", axis = 1) price_matrix = df.as_matrix() print(price_matrix.shape) print("Price matrix is this:") print(price_matrix) res = jn.coint_johansen(price_matrix, 0, 1) """ esult.eig = eigenvalues (m x 1) % result.evec = eigenvectors (m x m), where first % r columns are normalized coint vectors % result.lr1 = likelihood ratio trace statistic for r=0 to m-1 % (m x 1) vector where r is the number of cointegrating combinations, m is the total number of variables. Remember, if r=m, then all the variables are stationary by themselves and we do not need cointegration anymore. % result.lr2 = maximum eigenvalue statistic for r=0 to m-1 % (m x 1) vector. % result.cvt = critical values for trace statistic % (m x 3) vector [90% 95% 99%] So each row is given for each test (r=0, r=1, r=2..., r=m-1). And then there are 3 columns for 90, 95 and 99 % result.cvm = critical values for max eigen value statistic % (m x 3) vector [90% 95% 99%] % result.ind = index of co-integrating variables ordered by % size of the eigenvalues from large to small""" print("trace statistic: \n{}".format(res.lr1)) print("trace statistic crit values: \n{}".format(res.cvt)) print("Eigenvalue statistic: \n{}".format(res.lr2)) print("Eigenvalue statistic crit values: \n{}".format(res.cvm)) print("Eigenvalues: \n{}".format(res.eig)) print("Eigen vectors: \n{}".format(res.evec)) names = df.columns.values extra_output.append("\n") extra_output.append("Trace Value statistics:\n {}".format(res.lr1)) extra_output.append("Trace Critical Values:\n {}".format(res.cvt)) extra_output.append("\n") extra_output.append("Eigen Value statistics:\n {}".format(res.lr2)) extra_output.append("Versus Critical Values:\n {}".format(res.cvm)) extra_output.append("\n") # Check how many cointegrating combinations there are: num_combinations = 0 weights = [] for i in range(0, len(res.lr1)): print(res.lr1[i]) print(res.cvt[i, 1]) print("_____") if res.lr1[i] < res.cvt[i, 1]: # HAVE TO CHECK WHETHER 1 here is really refer to a column index... num_combinations = i break if num_combinations == 0: for j in range(0, len(names)): weights.append(0) result = [names, weights, extra_output] # ALSO RETURN TEXT THAT NO COMBINATIONS FOUND?? else: #FIND HIGHEST EIGENVALUE AND RETURNS ITS CORRESPONDING COLUMN IN EIGEN VECTORS best_index = 0 for i in range(1, len(res.lr2)): if res.lr2[i] > res.cvm[i, 1]: if res.lr2[i] > res.lr2[best_index]: best_index = i for j in range(0, len(names)): weights.append(res.evec[j, best_index]) # express weights in relation to first security: divisor = weights[0] for j in range(0, len(weights)): weights[j] = weights[j] / divisor #For the half life analysis, I need: price_series = pd.Series() max = len(price_matrix[:, 0]) for i in range(0, max): agg_price = 0 for k in range(0, len(price_matrix[i, :])): agg_price = agg_price + price_matrix[i, k] * weights[k] # Aggregate price of given weights price_series.set_value(i, agg_price) half_life = BasicMR.get_half_life(price_series) extra_output.append("Half life period: {}".format(half_life)) result = [names, weights, extra_output] # For further backtesting, add weights and half life to the properties of this object.... return result
def run_coint_test(self): result = coint_johansen(self.merged_df, 0 , 1) return result
df["coint"].plot() # plt.show() import pyconometrics print pyconometrics.cadf(np.matrix(df["ewa"]).H, np.matrix(df["ewc"]).H, 0, 1) import statsmodels.tsa.stattools as st import hurst print "hurst", hurst.hurst(df["coint"]) print st.adfuller(df["coint"], maxlag=1) from johansen import coint_johansen, print_johan_stats res = coint_johansen(df[["ewa", "ewc"]], 0, 1) print_johan_stats(res) cols = ["ewc", "ewa", "ige"] res3 = coint_johansen(df[cols], 0, 1) print_johan_stats(res3) df["yport"] = np.dot(df[cols], res3.evec[:, 0]) plt.hold(False) df["yport"].plot() # plt.show() import halflife hf = halflife.halflife(df, "yport")[1] data_mean = pd.rolling_mean(df["yport"], window=hf)
usdcad2 = usdcad[(usdcad.tday > 20090101) & (usdcad.hhmm == 1659)] audusd2 = audusd[(audusd.tday > 20090101) & (audusd.hhmm == 1659)] usdcad2 = usdcad2.set_index(['tday', 'hhmm']) audusd2 = audusd2.set_index(['tday', 'hhmm']) cad = 1 / usdcad2.cl aud = audusd2.cl y = pd.concat([aud, cad], axis=1) from johansen import coint_johansen trainlen = 250 lookback = 20 numUnits = np.ones(len(y)) * np.nan hedgeRatio = np.ones(y.shape) * np.nan for t in range(trainlen, len(y)): df = y[t - trainlen:t] hedgeRatio[t] = coint_johansen(df, 0, 1).evec[:, 0] tmp1 = np.array(y[t - lookback:t]) tmp2 = np.kron(np.ones((lookback, 1)), hedgeRatio[t]) yport = np.sum(tmp1 * tmp2, axis=1) ma = np.mean(yport) mstd = np.std(yport) zScore = (yport[-1] - ma) / mstd numUnits[t] = -(yport[-1] - ma) / mstd # copy positions in multiple coumns. positions are market values of # AUDUSD and CADUSD in portfolio expressed in US. tmp1 = np.kron(np.ones((y.shape[1], 1)), numUnits) positions = tmp1.T * hedgeRatio * y pnl = positions.shift(1) * (y - y.shift(1)) / y.shift(1) pnl = pnl.sum(axis=1) ret = pnl / np.sum(np.abs(positions.shift(1)), axis=1)
# import hurst # print 'hurst', hurst.hurst(df['coint']) # print st.adfuller(df['coint'],maxlag=1) #=============================================================================== from johansen import coint_johansen, print_johan_stats #=============================================================================== # res = coint_johansen(df[['ewa','ewc']], 0, 1) # print 'Now it is time to see the johan stats' # print_johan_stats(res) #=============================================================================== cols = ['ewc','ewa','ige'] print type(df[cols]) print df[cols] res3 = coint_johansen(df[cols], 0, 1) print_johan_stats(res3) df['yport'] = np.dot(df[cols], res3.evec[:,0]) #plt.hold(False) df['yport'].plot() plt.show() ######### import halflife hf = halflife.halflife(df, 'yport')[1] print 'hhhhhhhhhhhhhhhh' print hf
dfspy3 = pd.read_csv(z.open('SPY3.csv'), sep=',') dfspy3 = dfspy3.set_index('Date') train = dfspy3[(dfspy3.index >= 20070101) & (dfspy3.index <= 20071231)] testspy3 = dfspy3[(dfspy3.index > 20071231)] resdf = pd.DataFrame(index=dfspy3.columns) resdf['isCoint'] = np.nan from johansen import coint_johansen, print_johan_stats for s in dfspy3.columns: if s == 'SPY': continue # johansen cagrisini kullaniyoruz boylece y,x hangisi secmemiz # gerekmiyor data = train[[s, 'SPY']].dropna() if len(data) < 250: continue res = coint_johansen(data, 0, 1) if res.lr1[0] > res.cvt[0][0]: resdf.loc[s, 'isCoint'] = True print resdf.isCoint.sum() coint_cols = list(resdf[resdf.isCoint == True].index) yN = train[coint_cols] logMktVal_long = np.log(yN).sum(axis=1) ytest = pd.concat([logMktVal_long, np.log(train.SPY)], axis=1) res = coint_johansen(ytest, 0, 1) print_johan_stats(res) tmp1 = np.ones((len(testspy3), resdf.isCoint.sum())) * res.evec[0, 0] tmp2 = np.ones((len(testspy3), 1)) * res.evec[1, 0] weights = np.hstack((tmp1, tmp2)) yNplus = testspy3[coint_cols + ['SPY']]
import johansen import quantstats as qs import sp500 if not os.path.exists('TS'): os.makedirs('TS') ts = [] for stock in sp500.sp500: filename = 'TS/' + stock + '.csv' qs.download(stock, filename) if os.path.isfile(filename): ts.append(qs.load(filename)) # shutil.rmtree("TS", True) minimum = sys.maxint for item in ts: if len(item) < minimum: minimum = len(item) for index in enumerate(ts): ts[index[0]] = ts[index[0]][len(ts[index[0]]) - minimum:] jresults = johansen.coint_johansen(ts, 0, 1) for item in jresults.keys(): print '%s: ' % item, jresults[item] # print 'Creation date: %s UTC' % str(time.strftime('%Y-%m-%d')) # print 'Pair Member 1,Pair Member 2,p-value'
x=eurusd_gbpusd['Date'] y1=eurusd_gbpusd['Price_x'] y2=eurusd_gbpusd['Price_y'] ax2 = ax1.twinx() ax1.plot(x, y1, 'g-') ax1.plot(x, y2, 'b-') eurusd_gbpusd.Price_x = pd.to_numeric(eurusd_gbpusd.Price_x) eurusd_gbpusd.Price_y = pd.to_numeric(eurusd_gbpusd.Price_y) ts.adfuller(eurusd_gbpusd['Price_y']) result = ts.coint(eurusd_gbpusd['Price_x'],eurusd_gbpusd['Price_y']) eurusd_gbpusd_test = eurusd_gbpusd[['Price_x','Price_y']] coint_johansen(eurusd_gbpusd_test, -1, 1) ### Analyzing spreads and ratios ### eurusd_gbpusd['Price_diff'] = eurusd_gbpusd.Price_x - eurusd_gbpusd.Price_y eurusd_gbpusd['Log_ret'] = np.log(eurusd_gbpusd.Price_x/eurusd_gbpusd.Price_y) eurusd_gbpusd['ret'] = eurusd_gbpusd.Price_x/eurusd_gbpusd.Price_y eurusd_gbpusd.plot('Date','ret') plt.show() ######################################## eurusd_gbpusd_norm = MR_one.lag_function(eurusd_gbpusd,60) matr,Regimes = MR_one.mixture_gauging(eurusd_gbpusd_norm) df = MR_one.mat_cal(eurusd_gbpusd,eurusd_gbpusd_norm,Regimes)
import numpy as np import pandas as pd import matplotlib.pyplot as plt import pandas_datareader.data as web from johansen import coint_johansen """ johansen test has a null hypothesis that time series are not cointegrated. """ start = '2006-4-26' end = '2012-4-9' symbol1 = 'EWA' symbol2 = 'EWC' symbol3 = 'IGE' ps1 = web.DataReader(symbol1, 'yahoo', start, end) ps2 = web.DataReader(symbol2, 'yahoo', start, end) plt.figure() plt.plot(ps1.index, ps1['Adj Close'].values, 'r') plt.plot(ps2.index, ps2['Adj Close'].values, 'b') plt.legend(['EWA', 'EWC']) plt.show() ps3 = web.DataReader(symbol3, 'yahoo', start, end) df = pd.DataFrame( {symbol1:ps1['Adj Close'] , symbol2:ps2['Adj Close'], symbol3:ps3['Adj Close']}, \ index=ps1.index ) print(coint_johansen(df, 0, 1))
def setup_class(cls): cls.res = coint_johansen(dta, -1, 8) cls.nobs_r = 173 - 1 - 8 cls.res1_m = np.array([260.6786029744658, 162.7966072512681, 105.8253545950566, 71.16133060790817, 47.68490211260372, 28.11843682526138, 13.03968537077271, 2.25398078597622, 137.9954, 106.7351, 79.5329, 56.2839, 37.0339, 21.7781, 10.4741, 2.9762, 143.6691, 111.7797, 83.9383, 60.0627, 40.1749, 24.2761, 12.3212, 4.1296, 154.7977, 121.7375, 92.7136, 67.63670000000001, 46.5716, 29.5147, 16.364, 6.9406]) cls.res2_m = np.array([97.88199572319769, 56.97125265621156, 34.66402398714837, 23.47642849530445, 19.56646528734234, 15.07875145448866, 10.7857045847965, 2.25398078597622, 45.893, 39.9085, 33.9271, 27.916, 21.837, 15.7175, 9.4748, 2.9762, 48.8795, 42.7679, 36.6301, 30.4428, 24.1592, 17.7961, 11.2246, 4.1296, 55.0335, 48.6606, 42.2333, 35.7359, 29.0609, 22.2519, 15.0923, 6.9406])
account_id = '5640873' oanda = oandapy.API(environment="practice", access_token=token) trainlen = 96 lookback = 8 filename = 'data.csv' initial_prices_CAD = 1 / getHistPrices('USD_CAD', trainlen) initial_prices_AUD = getHistPrices('AUD_USD', trainlen) initial_prices = pd.DataFrame(zip(initial_prices_CAD, initial_prices_AUD), columns=['CAD_USD', 'AUD_USD']) initial_prices.to_csv(filename, index=False) res = coint_johansen(initial_prices[['CAD_USD','AUD_USD']], 0, 1) initial_prices['yport'] = initial_prices['CAD_USD'] * res.evec[0][0] + initial_prices['AUD_USD'] * res.evec[1][0] ma = initial_prices['yport'][-lookback:].mean() mstd = initial_prices['yport'][-lookback:].std() zscore = (float(initial_prices['yport'].tail(1)) - ma) / mstd #print_johan_stats(res) response = oanda.get_prices(instruments="USD_CAD") USD_CAD_ASK = response.get("prices")[0].get("ask") USD_CAD_BID = response.get("prices")[0].get("bid") CAD_USD_MID = ((1 / USD_CAD_ASK) + (1 / USD_CAD_BID)) / 2
usdcad2 = usdcad[(usdcad.tday>20090101) & (usdcad.hhmm == 1659)] audusd2 = audusd[(audusd.tday>20090101) & (audusd.hhmm == 1659)] usdcad2 = usdcad2.set_index(['tday','hhmm']) audusd2 = audusd2.set_index(['tday','hhmm']) cad = 1 / usdcad2.cl aud=audusd2.cl y = pd.concat([aud, cad],axis=1) from johansen import coint_johansen trainlen=250 lookback=20 numUnits = np.ones(len(y))*np.nan hedgeRatio = np.ones(y.shape)*np.nan for t in range(trainlen,len(y)): df = y[t-trainlen:t] hedgeRatio[t] = coint_johansen(df, 0, 1).evec[:,0] tmp1 = np.array(y[t-lookback:t]) tmp2 = np.kron(np.ones((lookback,1)),hedgeRatio[t]) yport = np.sum(tmp1*tmp2,axis=1) ma=np.mean(yport) mstd=np.std(yport) zScore=(yport[-1]-ma)/mstd; numUnits[t] = -(yport[-1]-ma)/mstd # copy positions in multiple coumns. positions are market values of AUDUSD and CADUSD in portfolio expressed # in US. tmp1=np.kron(np.ones((y.shape[1],1)),numUnits) positions = tmp1.T * hedgeRatio * y pnl = positions.shift(1) * (y - y.shift(1)) / y.shift(1) pnl = pnl.sum(axis=1)
df['coint'] = df['ewc']-hedgeRatio*df['ewa'] plt.hold(False) df['coint'].plot() #plt.show() import pyconometrics print pyconometrics.cadf(np.matrix(df['ewa']).H, np.matrix(df['ewc']).H,0,1) import statsmodels.tsa.stattools as st import hurst print 'hurst', hurst.hurst(df['coint']) print st.adfuller(df['coint'],maxlag=1) from johansen import coint_johansen, print_johan_stats res = coint_johansen(df[['ewa','ewc']], 0, 1) print_johan_stats(res) cols = ['ewc','ewa','ige'] res3 = coint_johansen(df[cols], 0, 1) print_johan_stats(res3) df['yport'] = np.dot(df[cols], res3.evec[:,0]) plt.hold(False) df['yport'].plot() #plt.show() import halflife hf = halflife.halflife(df, 'yport')[1] data_mean = pd.rolling_mean(df['yport'], window=hf) data_std = pd.rolling_std(df['yport'], window=hf)
# to save a matlab matrix # save('dosya','A') import pandas as pd, sys from scipy import io as spio import numpy as np, os base = '%s/Dropbox/Public/data' % os.environ['HOME'] a = spio.loadmat(base + '/inputData_USDCAD_20120426.mat') cols = ['cl','lo','hi','hhmm','op'] df = pd.concat([pd.DataFrame(a[x]) for x in cols], axis=1) df.columns = cols print df sys.path.append('/home/burak/Documents/classnotes/tser/tser_coint') from johansen import coint_johansen res = coint_johansen(df[['ewa','ewc']], 0, 1)
# to save a matlab matrix # save('dosya','A') import pandas as pd, sys from scipy import io as spio import numpy as np, os base = '%s/Dropbox/Public/data' % os.environ['HOME'] a = spio.loadmat(base + '/inputData_USDCAD_20120426.mat') cols = ['cl', 'lo', 'hi', 'hhmm', 'op'] df = pd.concat([pd.DataFrame(a[x]) for x in cols], axis=1) df.columns = cols print df sys.path.append('/home/burak/Documents/classnotes/tser/tser_coint') from johansen import coint_johansen res = coint_johansen(df[['ewa', 'ewc']], 0, 1)
df['coint'] = df['ewc'] - hedgeRatio * df['ewa'] plt.hold(False) df['coint'].plot() #plt.show() import pyconometrics print pyconometrics.cadf(np.matrix(df['ewa']).H, np.matrix(df['ewc']).H, 0, 1) import statsmodels.tsa.stattools as st import hurst print 'hurst', hurst.hurst(df['coint']) print st.adfuller(df['coint'], maxlag=1) from johansen import coint_johansen, print_johan_stats res = coint_johansen(df[['ewa', 'ewc']], 0, 1) print_johan_stats(res) cols = ['ewc', 'ewa', 'ige'] res3 = coint_johansen(df[cols], 0, 1) print_johan_stats(res3) df['yport'] = np.dot(df[cols], res3.evec[:, 0]) plt.hold(False) df['yport'].plot() #plt.show() import halflife hf = halflife.halflife(df, 'yport')[1] data_mean = pd.rolling_mean(df['yport'], window=hf) data_std = pd.rolling_std(df['yport'], window=hf)
if __name__ == '__main__': np.random.seed(9642567) nobs = 500 fact = np.cumsum(0.2 + np.random.randn(nobs, 4),0) xx = np.random.randn(nobs+2, 6) xx = xx[2:] + 0.6 * xx[1:-1] + 0.25 * xx[:-2] xx[:,:2] += fact[:,0][:,None] #xx[:,2:3] += fact[:,1][:,None] xx[:,2:4] += fact[:,1][:,None] xx[:,4:] += fact[:,-2:] p, k = 1, 2 result = coint_johansen(xx, p, k) print result.lr1 > result.cvt.T print result.lr2 > result.cvm.T print np.round(result.evec,4) print result.eig #I guess test statistic looks good, but #print np.round(result.evec,4) #looks strange, I don't see the interpretation #changed the DGP: I had some I(0) not integrated series included # now all series are I(1) in the DGP # -> evec looks better now import matplotlib.pyplot as plt
dfspy3 = pd.read_csv(z.open('SPY3.csv'),sep=',') dfspy3 = dfspy3.set_index('Date') train = dfspy3[(dfspy3.index>=20070101) & (dfspy3.index<=20071231)] testspy3 = dfspy3[(dfspy3.index > 20071231)] resdf = pd.DataFrame(index=dfspy3.columns) resdf['isCoint'] = np.nan from johansen import coint_johansen, print_johan_stats for s in dfspy3.columns: if s == 'SPY': continue # johansen cagrisini kullaniyoruz boylece y,x hangisi secmemiz # gerekmiyor data = train[[s,'SPY']].dropna() if len(data) < 250: continue res = coint_johansen(data, 0, 1) if res.lr1[0] > res.cvt[0][0]: resdf.loc[s,'isCoint'] = True print resdf.isCoint.sum() coint_cols = list(resdf[resdf.isCoint==True].index) yN = train[coint_cols] logMktVal_long = np.log(yN).sum(axis=1) ytest = pd.concat([logMktVal_long, np.log(train.SPY)],axis=1) res = coint_johansen(ytest, 0, 1) print_johan_stats(res) tmp1 = np.ones((len(testspy3),resdf.isCoint.sum()))*res.evec[0,0] tmp2 = np.ones((len(testspy3),1))*res.evec[1,0] weights = np.hstack((tmp1,tmp2))