Ejemplo n.º 1
0
    def setup_class(cls):
        cls.res = coint_johansen(dta, 2, 5)
        cls.nobs_r = 173 - 1 - 5

        #Note: critical values not available if trend>1
        cls.res1_m = np.array([270.1887263915158,  171.6870096307863,  107.8613367358704,  70.82424032233558,  44.62551818267534,  25.74352073857572,  14.17882426926978,  4.288656185006764,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0])
        cls.res2_m = np.array([98.50171676072955,  63.82567289491584,  37.03709641353485,  26.19872213966024,  18.88199744409963,  11.56469646930594,  9.890168084263012,  4.288656185006764,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0])
Ejemplo n.º 2
0
def cointegrated_series(Ar, statTest = True):   #Ar =  [[TS(1)E(1), TS(2)E(1), ...TS(n)E(1)]
                                                #       [TS(1)E(2), TS(2)E(2), ...TS(n)E(2)]
                                                #       ...
                                                #       [TS(1)E(n), TS(2)E(n), ...TS(n)E(n)]]
    johansenTest = coint_johansen(Ar,0,2)
    lookback, marketVal, evec = halfLife_coint(Ar, johansenTest.evec[:,0])

    marketVal = pd.DataFrame(marketVal)
    MA = pd.DataFrame(np.transpose(movingAve(marketVal.values, lookback)))
    SD = pd.DataFrame(np.transpose(movingStd(marketVal.values, lookback)))
    numUnits = -(marketVal-MA)/SD
    AA = repmat(numUnits,1,len(np.transpose(Ar)))
    BB = np.multiply(repmat(evec,len(Ar),1),Ar)
    positions = np.multiply(AA,BB)
    pnl = np.sum(np.divide(np.multiply(positions[:-1],np.diff(Ar,axis = 0)), Ar[:-1]),1)
    returns = np.divide(pnl,np.roll(np.sum(abs(positions[:-1]),1),-1))
    
    pnlCumSum = [0]*len(pnl)
    for count, pnlsum in enumerate(returns):
        if count>=int(lookback):
            pnlCumSum[count]+=pnlCumSum[count-1]+pnlsum
        else:
            pnlCumSum[count]=0

    if statTest==True:
        print('Lookback:\t',lookback)
        
    plt.plot(pnlCumSum)
    plt.show()
Ejemplo n.º 3
0
 def setup_class(cls):
     cls.res = coint_johansen(dta, 0, 9)
     cls.nobs_r = 173 - 1 - 9
     #fprintf(1, '%18.16g, ', r1)
     cls.res1_m = np.array([307.6888935095814,  205.3839229398245,  129.1330243009336,   83.3101865760208,  52.51955460357912,  30.20027050520502,  13.84158157562689, 0.4117390188204866,           153.6341,           120.3673,             91.109,            65.8202,            44.4929,            27.0669,            13.4294,             2.7055,            159.529,           125.6185,            95.7542,            69.8189,            47.8545,            29.7961,            15.4943,             3.8415,           171.0905,           135.9825,           104.9637,            77.8202,            54.6815,            35.4628,            19.9349,             6.6349])
     #r2 = [res.lr2 res.cvm]
     cls.res2_m = np.array([102.3049705697569,  76.25089863889085,  45.82283772491284,   30.7906319724417,  22.31928409837409,  16.35868892957814,   13.4298425568064, 0.4117390188204866,            49.2855,            43.2947,            37.2786,            31.2379,            25.1236,            18.8928,            12.2971,             2.7055,            52.3622,            46.2299,            40.0763,            33.8777,            27.5858,            21.1314,            14.2639,             3.8415,            58.6634,            52.3069,            45.8662,            39.3693,            32.7172,             25.865,              18.52,             6.6349])
Ejemplo n.º 4
0
    def setup_class(cls):
        cls.res = coint_johansen(dta, 1, 2)
        cls.nobs_r = 173 - 1 - 2

        cls.res1_m = np.array([241.985452556075,  166.4781461662553,  110.3298006342814,  70.79801574443575,  44.90887371527634,  27.22385073668511,  11.74205493173769,  3.295435325623445,           169.0618,           133.7852,           102.4674,            75.1027,            51.6492,            32.0645,            16.1619,             2.7055,           175.1584,            139.278,           107.3429,  79.34220000000001,            55.2459,            35.0116,            18.3985,             3.8415,           187.1891,           150.0778,           116.9829,            87.7748,            62.5202,            41.0815,            23.1485,             6.6349])
        cls.res2_m = np.array([75.50730638981975,  56.14834553197396,   39.5317848898456,   25.8891420291594,  17.68502297859124,  15.48179580494741,  8.446619606114249,  3.295435325623445,            52.5858,            46.5583,            40.5244,            34.4202,            28.2398,            21.8731,            15.0006,             2.7055,            55.7302,            49.5875,            43.4183,            37.1646,            30.8151,            24.2522,            17.1481,             3.8415,            62.1741,            55.8171,            49.4095,            42.8612,             36.193,            29.2631,            21.7465,             6.6349,])

        evec = np.array([0.01102517075074406, -0.2185481584930077, 0.04565819524210763, -0.06556394587400775, 0.04711496306104131, -0.1500111976629196, 0.03775327003706507, 0.03479475877437702, 0.007517888890275335, -0.2014629352546497, 0.01526001455616041, 0.0707900418057458, -0.002388919695513273, 0.04486516694838273, -0.02936314422571188, 0.009900554050392113, 0.02846074144367176, 0.02021385478834498, -0.04276914888645468, 0.1738024290422287, 0.07821155002012749, -0.1066523077111768, -0.3011042488399306, 0.04965189679477353, 0.07141291326159237, -0.01406702689857725, -0.07842109866080313, -0.04773566072362181, -0.04768640728128824, -0.04428737926285261, 0.4143225656833862, 0.04512787132114879, -0.06817130121837202, 0.2246249779872569, -0.009356548567565763, 0.006685350535849125, -0.02040894506833539, 0.008131690308487425, -0.2503209797396666, 0.01560186979508953, 0.03327070126502506, -0.263036624535624, -0.04669882107497259, 0.0146457545413255, 0.01408691619062709, 0.1004753600191269, -0.02239205763487946, -0.02169291468272568, 0.08782313160608619, -0.07696508791577318, 0.008925177304198475, -0.06230900392092828, -0.01548907461158638, 0.04574831652028973, -0.2972228156126774, 0.003469819004961912, -0.001868995544352928, 0.05993345996347871, 0.01213394328069316, 0.02096614212178651, -0.08624395993789938, 0.02108183181049973, -0.08470307289295617, -5.135072530480897e-005])
        cls.evec_m = evec.reshape(cls.res.evec.shape, order='F')

        cls.eig_m = np.array([0.3586376068088151, 0.2812806889719111, 0.2074818815675726,  0.141259991767926, 0.09880133062878599, 0.08704563854307619,  0.048471840356709, 0.01919823444066367])
Ejemplo n.º 5
0
    def generate_positions(self, long_only, beg_date, end_date=datetime.date.today(), max_holding_period=20):
        """Method builds best combination of positions that you can access with get_positions.
        Returns True if they were generated and False if there is no way they can be generated (no stationarity) or
        on error.
        Time Frame parameter sets look back period for stationarity check and bollinger bands. Default is 20."""
        result = []
        names = []
        securities_list= []
        if isinstance(self._securities, list):
            securities_list = self._securities
        else:
            raise Exception("Error: for cointegration strategy you have to choose more than one security.")
        #generate the matrix of price series:
        prices = []
        # CHECK ALL THE DATES OF PRICES ARE THE SAME:
        for i in range(0, len(securities_list)):
            tmp = securities_list[i]
            print("I:{}".format(i))
            prices.append([ tmp.get_dates(beg_date, end_date), tmp.get_prices(beg_date, end_date)])
        print(prices[0]) #<<<<<<<<<<<<<<<<<<<<<<<<<<
        shortest_dates_length = len(prices[0][0])
        shortest_dates_index = 0
        max = len(prices)
        for i in range(1, max):
            tmp_length = len(prices[i][0])
            if tmp_length < shortest_dates_length:
                shortest_dates_length = tmp_length
                shortest_dates_index = i
        df = pd.DataFrame({"dates": prices[shortest_dates_index][0], securities_list[shortest_dates_index].get_name(): prices[shortest_dates_index][1]})
        print("::::::::::::::::::::::::::::::")
        print(df)
        for i in range(0, len(securities_list)):
            print("current i is {}".format(i))
            print("Name is {}".format(securities_list[i].get_name()))
            if i == shortest_dates_index:
                continue
            df_new = pd.DataFrame({"dates": prices[i][0], securities_list[i].get_name(): prices[i][1]})
            df = df.merge(df_new, on='dates', how="inner")
        # NOW df should contain only dates and prices for the shortest period of dates among all the securities...
        print("..................................................")
        print(df)
        # MAYBE RETURN THE BEGINNIG OF ANALYZED PERIOD AND END?
        extra_output = []
        extra_output.append("The beginning of the analyzed period is {}".format(df['dates'].irow(0)))
        extra_output.append("The end of the analyzed period is {}".format(df['dates'].irow(-1)))
        df = df.drop("dates", axis = 1)
        price_matrix = df.as_matrix()
        print(price_matrix.shape)
        print("Price matrix is this:")
        print(price_matrix)
        res = jn.coint_johansen(price_matrix, 0, 1)
        """ esult.eig  = eigenvalues  (m x 1)
%          result.evec = eigenvectors (m x m), where first
%                        r columns are normalized coint vectors
%          result.lr1  = likelihood ratio trace statistic for r=0 to m-1
%                        (m x 1) vector where r is the number of cointegrating combinations, m is the
                         total number of variables. Remember, if r=m, then all the variables are stationary by
                         themselves and we do not need cointegration anymore.
%          result.lr2  = maximum eigenvalue statistic for r=0 to m-1
%                        (m x 1) vector.
%          result.cvt  = critical values for trace statistic
%                        (m x 3) vector [90% 95% 99%]
                         So each row is given for each test (r=0, r=1, r=2..., r=m-1). And then there are 3 columns for 90, 95 and 99
%          result.cvm  = critical values for max eigen value statistic
%                        (m x 3) vector [90% 95% 99%]
%          result.ind  = index of co-integrating variables ordered by
%                        size of the eigenvalues from large to small"""
        print("trace statistic: \n{}".format(res.lr1))
        print("trace statistic crit values: \n{}".format(res.cvt))
        print("Eigenvalue statistic: \n{}".format(res.lr2))
        print("Eigenvalue statistic crit values: \n{}".format(res.cvm))
        print("Eigenvalues: \n{}".format(res.eig))
        print("Eigen vectors: \n{}".format(res.evec))
        names = df.columns.values
        extra_output.append("\n")
        extra_output.append("Trace Value statistics:\n {}".format(res.lr1))
        extra_output.append("Trace Critical Values:\n {}".format(res.cvt))
        extra_output.append("\n")
        extra_output.append("Eigen Value statistics:\n {}".format(res.lr2))
        extra_output.append("Versus Critical Values:\n {}".format(res.cvm))
        extra_output.append("\n")
        # Check how many cointegrating combinations there are:
        num_combinations = 0
        weights = []
        for i in range(0, len(res.lr1)):
            print(res.lr1[i])
            print(res.cvt[i, 1])
            print("_____")
            if res.lr1[i] < res.cvt[i, 1]:    # HAVE TO CHECK WHETHER 1 here is really refer to a column index...
                num_combinations = i
                break
        if num_combinations == 0:
            for j in range(0, len(names)):
                weights.append(0)
            result = [names, weights, extra_output]   # ALSO RETURN TEXT THAT NO COMBINATIONS FOUND??
        else:
            #FIND HIGHEST EIGENVALUE AND RETURNS ITS CORRESPONDING COLUMN IN EIGEN VECTORS
            best_index = 0
            for i in range(1, len(res.lr2)):
                if res.lr2[i] > res.cvm[i, 1]:
                    if res.lr2[i] > res.lr2[best_index]:
                        best_index = i
            for j in range(0, len(names)):
                weights.append(res.evec[j, best_index])
            # express weights in relation to first security:
            divisor = weights[0]
            for j in range(0, len(weights)):
                weights[j] = weights[j] / divisor
            #For the half life analysis, I need:
            price_series = pd.Series()
            max = len(price_matrix[:, 0])
            for i in range(0, max):
                agg_price = 0
                for k in range(0, len(price_matrix[i, :])):
                    agg_price = agg_price + price_matrix[i, k] * weights[k]  # Aggregate price of given weights
                price_series.set_value(i, agg_price)
            half_life = BasicMR.get_half_life(price_series)
            extra_output.append("Half life period: {}".format(half_life))
            result = [names, weights, extra_output]
        # For further backtesting, add weights and half life to the properties of this object....
        return result
Ejemplo n.º 6
0
    def run_coint_test(self):

        result = coint_johansen(self.merged_df, 0 , 1) 

        return result
Ejemplo n.º 7
0
df["coint"].plot()
# plt.show()

import pyconometrics

print pyconometrics.cadf(np.matrix(df["ewa"]).H, np.matrix(df["ewc"]).H, 0, 1)

import statsmodels.tsa.stattools as st
import hurst

print "hurst", hurst.hurst(df["coint"])
print st.adfuller(df["coint"], maxlag=1)

from johansen import coint_johansen, print_johan_stats

res = coint_johansen(df[["ewa", "ewc"]], 0, 1)
print_johan_stats(res)

cols = ["ewc", "ewa", "ige"]
res3 = coint_johansen(df[cols], 0, 1)
print_johan_stats(res3)

df["yport"] = np.dot(df[cols], res3.evec[:, 0])
plt.hold(False)
df["yport"].plot()
# plt.show()

import halflife

hf = halflife.halflife(df, "yport")[1]
data_mean = pd.rolling_mean(df["yport"], window=hf)
Ejemplo n.º 8
0
usdcad2 = usdcad[(usdcad.tday > 20090101) & (usdcad.hhmm == 1659)]
audusd2 = audusd[(audusd.tday > 20090101) & (audusd.hhmm == 1659)]
usdcad2 = usdcad2.set_index(['tday', 'hhmm'])
audusd2 = audusd2.set_index(['tday', 'hhmm'])
cad = 1 / usdcad2.cl
aud = audusd2.cl
y = pd.concat([aud, cad], axis=1)

from johansen import coint_johansen
trainlen = 250
lookback = 20
numUnits = np.ones(len(y)) * np.nan
hedgeRatio = np.ones(y.shape) * np.nan
for t in range(trainlen, len(y)):
    df = y[t - trainlen:t]
    hedgeRatio[t] = coint_johansen(df, 0, 1).evec[:, 0]
    tmp1 = np.array(y[t - lookback:t])
    tmp2 = np.kron(np.ones((lookback, 1)), hedgeRatio[t])
    yport = np.sum(tmp1 * tmp2, axis=1)
    ma = np.mean(yport)
    mstd = np.std(yport)
    zScore = (yport[-1] - ma) / mstd
    numUnits[t] = -(yport[-1] - ma) / mstd

# copy positions in multiple coumns. positions are market values of
# AUDUSD and CADUSD in portfolio expressed in US.
tmp1 = np.kron(np.ones((y.shape[1], 1)), numUnits)
positions = tmp1.T * hedgeRatio * y
pnl = positions.shift(1) * (y - y.shift(1)) / y.shift(1)
pnl = pnl.sum(axis=1)
ret = pnl / np.sum(np.abs(positions.shift(1)), axis=1)
Ejemplo n.º 9
0
# import hurst 
# print 'hurst', hurst.hurst(df['coint'])
# print st.adfuller(df['coint'],maxlag=1)
#===============================================================================

from johansen import coint_johansen, print_johan_stats
#===============================================================================
# res = coint_johansen(df[['ewa','ewc']], 0, 1)
# print 'Now it is time to see the johan stats'
# print_johan_stats(res)
#===============================================================================

cols = ['ewc','ewa','ige']
print type(df[cols])
print df[cols]
res3 = coint_johansen(df[cols], 0, 1)
print_johan_stats(res3)

df['yport'] = np.dot(df[cols], res3.evec[:,0])
#plt.hold(False)
df['yport'].plot()
plt.show()


#########
import halflife
hf = halflife.halflife(df, 'yport')[1]
print 'hhhhhhhhhhhhhhhh'
print hf

Ejemplo n.º 10
0
    dfspy3 = pd.read_csv(z.open('SPY3.csv'), sep=',')
dfspy3 = dfspy3.set_index('Date')

train = dfspy3[(dfspy3.index >= 20070101) & (dfspy3.index <= 20071231)]
testspy3 = dfspy3[(dfspy3.index > 20071231)]
resdf = pd.DataFrame(index=dfspy3.columns)
resdf['isCoint'] = np.nan

from johansen import coint_johansen, print_johan_stats
for s in dfspy3.columns:
    if s == 'SPY': continue
    # johansen cagrisini kullaniyoruz boylece y,x hangisi secmemiz
    # gerekmiyor
    data = train[[s, 'SPY']].dropna()
    if len(data) < 250: continue
    res = coint_johansen(data, 0, 1)
    if res.lr1[0] > res.cvt[0][0]:
        resdf.loc[s, 'isCoint'] = True
print resdf.isCoint.sum()

coint_cols = list(resdf[resdf.isCoint == True].index)
yN = train[coint_cols]
logMktVal_long = np.log(yN).sum(axis=1)
ytest = pd.concat([logMktVal_long, np.log(train.SPY)], axis=1)
res = coint_johansen(ytest, 0, 1)
print_johan_stats(res)

tmp1 = np.ones((len(testspy3), resdf.isCoint.sum())) * res.evec[0, 0]
tmp2 = np.ones((len(testspy3), 1)) * res.evec[1, 0]
weights = np.hstack((tmp1, tmp2))
yNplus = testspy3[coint_cols + ['SPY']]
Ejemplo n.º 11
0
import johansen
import quantstats as qs
import sp500

if not os.path.exists('TS'):
	  os.makedirs('TS')

ts = []

for stock in sp500.sp500:
    filename = 'TS/' + stock + '.csv'
    qs.download(stock, filename)
    if os.path.isfile(filename):
        ts.append(qs.load(filename))
# shutil.rmtree("TS", True)
minimum = sys.maxint
for item in ts:
    if len(item) < minimum:
        minimum = len(item)

for index in enumerate(ts):
    ts[index[0]] = ts[index[0]][len(ts[index[0]]) - minimum:]

jresults = johansen.coint_johansen(ts, 0, 1)

for item in jresults.keys():
    print '%s: ' % item, jresults[item]
# print 'Creation date: %s UTC' % str(time.strftime('%Y-%m-%d'))
# print 'Pair Member 1,Pair Member 2,p-value'
x=eurusd_gbpusd['Date']
y1=eurusd_gbpusd['Price_x']
y2=eurusd_gbpusd['Price_y']
ax2 = ax1.twinx()
ax1.plot(x, y1, 'g-')
ax1.plot(x, y2, 'b-')

eurusd_gbpusd.Price_x = pd.to_numeric(eurusd_gbpusd.Price_x)
eurusd_gbpusd.Price_y = pd.to_numeric(eurusd_gbpusd.Price_y)

ts.adfuller(eurusd_gbpusd['Price_y'])
result = ts.coint(eurusd_gbpusd['Price_x'],eurusd_gbpusd['Price_y'])

eurusd_gbpusd_test = eurusd_gbpusd[['Price_x','Price_y']]

coint_johansen(eurusd_gbpusd_test, -1, 1)

### Analyzing spreads and ratios ###

eurusd_gbpusd['Price_diff'] = eurusd_gbpusd.Price_x - eurusd_gbpusd.Price_y
eurusd_gbpusd['Log_ret'] = np.log(eurusd_gbpusd.Price_x/eurusd_gbpusd.Price_y)
eurusd_gbpusd['ret'] = eurusd_gbpusd.Price_x/eurusd_gbpusd.Price_y

eurusd_gbpusd.plot('Date','ret')
plt.show()

########################################

eurusd_gbpusd_norm = MR_one.lag_function(eurusd_gbpusd,60)
matr,Regimes = MR_one.mixture_gauging(eurusd_gbpusd_norm)
df = MR_one.mat_cal(eurusd_gbpusd,eurusd_gbpusd_norm,Regimes)
Ejemplo n.º 13
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas_datareader.data as web
from johansen import coint_johansen
"""
johansen test has a null hypothesis that time series are not cointegrated.
"""

start = '2006-4-26'
end = '2012-4-9'
symbol1 = 'EWA'
symbol2 = 'EWC'
symbol3 = 'IGE'
ps1 = web.DataReader(symbol1, 'yahoo', start, end)
ps2 = web.DataReader(symbol2, 'yahoo', start, end)

plt.figure()
plt.plot(ps1.index, ps1['Adj Close'].values, 'r')
plt.plot(ps2.index, ps2['Adj Close'].values, 'b')
plt.legend(['EWA', 'EWC'])
plt.show()
ps3 = web.DataReader(symbol3, 'yahoo', start, end)


df = pd.DataFrame( {symbol1:ps1['Adj Close'] , symbol2:ps2['Adj Close'], symbol3:ps3['Adj Close']}, \
                    index=ps1.index )

print(coint_johansen(df, 0, 1))
Ejemplo n.º 14
0
    def setup_class(cls):
        cls.res = coint_johansen(dta, -1, 8)
        cls.nobs_r = 173 - 1 - 8

        cls.res1_m = np.array([260.6786029744658,  162.7966072512681,  105.8253545950566,  71.16133060790817,  47.68490211260372,  28.11843682526138,  13.03968537077271,   2.25398078597622,           137.9954,           106.7351,            79.5329,            56.2839,            37.0339,            21.7781,            10.4741,             2.9762,           143.6691,           111.7797,            83.9383,            60.0627,            40.1749,            24.2761,            12.3212,             4.1296,           154.7977,           121.7375,            92.7136,  67.63670000000001,            46.5716,            29.5147,             16.364,             6.9406])
        cls.res2_m = np.array([97.88199572319769,  56.97125265621156,  34.66402398714837,  23.47642849530445,  19.56646528734234,  15.07875145448866,   10.7857045847965,   2.25398078597622,             45.893,            39.9085,            33.9271,             27.916,             21.837,            15.7175,             9.4748,             2.9762,            48.8795,            42.7679,            36.6301,            30.4428,            24.1592,            17.7961,            11.2246,             4.1296,            55.0335,            48.6606,            42.2333,            35.7359,            29.0609,            22.2519,            15.0923,             6.9406])
Ejemplo n.º 15
0
account_id = '5640873'

oanda = oandapy.API(environment="practice", access_token=token)

trainlen = 96
lookback = 8

filename = 'data.csv'

initial_prices_CAD = 1 / getHistPrices('USD_CAD', trainlen) 
initial_prices_AUD = getHistPrices('AUD_USD', trainlen) 

initial_prices = pd.DataFrame(zip(initial_prices_CAD, initial_prices_AUD), columns=['CAD_USD', 'AUD_USD'])
initial_prices.to_csv(filename, index=False)

res = coint_johansen(initial_prices[['CAD_USD','AUD_USD']], 0, 1)
initial_prices['yport'] = initial_prices['CAD_USD'] * res.evec[0][0] + initial_prices['AUD_USD'] * res.evec[1][0]
ma = initial_prices['yport'][-lookback:].mean()
mstd = initial_prices['yport'][-lookback:].std()

zscore = (float(initial_prices['yport'].tail(1)) - ma) / mstd

#print_johan_stats(res)

   

response = oanda.get_prices(instruments="USD_CAD")
USD_CAD_ASK = response.get("prices")[0].get("ask")
USD_CAD_BID = response.get("prices")[0].get("bid")
CAD_USD_MID = ((1 / USD_CAD_ASK) + (1 / USD_CAD_BID)) / 2
Ejemplo n.º 16
0
usdcad2 = usdcad[(usdcad.tday>20090101) & (usdcad.hhmm == 1659)]
audusd2 = audusd[(audusd.tday>20090101) & (audusd.hhmm == 1659)]
usdcad2 = usdcad2.set_index(['tday','hhmm'])
audusd2 = audusd2.set_index(['tday','hhmm'])
cad = 1 / usdcad2.cl
aud=audusd2.cl
y = pd.concat([aud, cad],axis=1)

from johansen import coint_johansen
trainlen=250
lookback=20
numUnits = np.ones(len(y))*np.nan
hedgeRatio = np.ones(y.shape)*np.nan
for t in range(trainlen,len(y)):
   df = y[t-trainlen:t]
   hedgeRatio[t] = coint_johansen(df, 0, 1).evec[:,0]
   tmp1 = np.array(y[t-lookback:t])
   tmp2 = np.kron(np.ones((lookback,1)),hedgeRatio[t])
   yport = np.sum(tmp1*tmp2,axis=1)
   ma=np.mean(yport)
   mstd=np.std(yport)
   zScore=(yport[-1]-ma)/mstd;
   numUnits[t] = -(yport[-1]-ma)/mstd


# copy positions in multiple coumns. positions are market values of AUDUSD and CADUSD in portfolio expressed
# in US.
tmp1=np.kron(np.ones((y.shape[1],1)),numUnits)
positions = tmp1.T * hedgeRatio * y
pnl = positions.shift(1) * (y - y.shift(1))  / y.shift(1)
pnl = pnl.sum(axis=1)
Ejemplo n.º 17
0
df['coint'] = df['ewc']-hedgeRatio*df['ewa']
plt.hold(False)
df['coint'].plot()
#plt.show()

import pyconometrics
print pyconometrics.cadf(np.matrix(df['ewa']).H,
                         np.matrix(df['ewc']).H,0,1)

import statsmodels.tsa.stattools as st
import hurst 
print 'hurst', hurst.hurst(df['coint'])
print st.adfuller(df['coint'],maxlag=1)

from johansen import coint_johansen, print_johan_stats
res = coint_johansen(df[['ewa','ewc']], 0, 1)
print_johan_stats(res)

cols = ['ewc','ewa','ige']
res3 = coint_johansen(df[cols], 0, 1)
print_johan_stats(res3)

df['yport'] = np.dot(df[cols], res3.evec[:,0])
plt.hold(False)
df['yport'].plot()
#plt.show()

import halflife
hf = halflife.halflife(df, 'yport')[1]
data_mean = pd.rolling_mean(df['yport'], window=hf)
data_std = pd.rolling_std(df['yport'], window=hf)
Ejemplo n.º 18
0
Archivo: tmp.py Proyecto: zjmirving/kod
# to save a matlab matrix
# save('dosya','A')
import pandas as pd, sys
from scipy import io as spio
import numpy as np, os

base = '%s/Dropbox/Public/data' % os.environ['HOME']
a = spio.loadmat(base + '/inputData_USDCAD_20120426.mat')
cols = ['cl','lo','hi','hhmm','op']
df = pd.concat([pd.DataFrame(a[x]) for x in cols], axis=1)
df.columns = cols
print df

sys.path.append('/home/burak/Documents/classnotes/tser/tser_coint')
from johansen import coint_johansen
res = coint_johansen(df[['ewa','ewc']], 0, 1)

Ejemplo n.º 19
0
# to save a matlab matrix
# save('dosya','A')
import pandas as pd, sys
from scipy import io as spio
import numpy as np, os

base = '%s/Dropbox/Public/data' % os.environ['HOME']
a = spio.loadmat(base + '/inputData_USDCAD_20120426.mat')
cols = ['cl', 'lo', 'hi', 'hhmm', 'op']
df = pd.concat([pd.DataFrame(a[x]) for x in cols], axis=1)
df.columns = cols
print df

sys.path.append('/home/burak/Documents/classnotes/tser/tser_coint')
from johansen import coint_johansen
res = coint_johansen(df[['ewa', 'ewc']], 0, 1)
Ejemplo n.º 20
0
df['coint'] = df['ewc'] - hedgeRatio * df['ewa']
plt.hold(False)
df['coint'].plot()
#plt.show()

import pyconometrics
print pyconometrics.cadf(np.matrix(df['ewa']).H, np.matrix(df['ewc']).H, 0, 1)

import statsmodels.tsa.stattools as st
import hurst
print 'hurst', hurst.hurst(df['coint'])
print st.adfuller(df['coint'], maxlag=1)

from johansen import coint_johansen, print_johan_stats
res = coint_johansen(df[['ewa', 'ewc']], 0, 1)
print_johan_stats(res)

cols = ['ewc', 'ewa', 'ige']
res3 = coint_johansen(df[cols], 0, 1)
print_johan_stats(res3)

df['yport'] = np.dot(df[cols], res3.evec[:, 0])
plt.hold(False)
df['yport'].plot()
#plt.show()

import halflife
hf = halflife.halflife(df, 'yport')[1]
data_mean = pd.rolling_mean(df['yport'], window=hf)
data_std = pd.rolling_std(df['yport'], window=hf)
Ejemplo n.º 21
0
if __name__ == '__main__':

    np.random.seed(9642567)
    nobs = 500
    fact = np.cumsum(0.2 + np.random.randn(nobs, 4),0)

    xx = np.random.randn(nobs+2, 6)
    xx = xx[2:] + 0.6 * xx[1:-1] + 0.25 * xx[:-2]
    xx[:,:2] += fact[:,0][:,None]
    #xx[:,2:3] += fact[:,1][:,None]
    xx[:,2:4] += fact[:,1][:,None]
    xx[:,4:] += fact[:,-2:]

    p, k = 1, 2

    result = coint_johansen(xx, p, k)

    print result.lr1 > result.cvt.T
    print result.lr2 > result.cvm.T
    print np.round(result.evec,4)
    print result.eig


    #I guess test statistic looks good, but
    #print np.round(result.evec,4)
    #looks strange, I don't see the interpretation
    #changed the DGP: I had some I(0) not integrated series included
    #      now all series are I(1) in the DGP
    # -> evec looks better now

    import matplotlib.pyplot as plt
Ejemplo n.º 22
0
    dfspy3 =  pd.read_csv(z.open('SPY3.csv'),sep=',')
dfspy3 = dfspy3.set_index('Date')

train = dfspy3[(dfspy3.index>=20070101) & (dfspy3.index<=20071231)]
testspy3 = dfspy3[(dfspy3.index > 20071231)]
resdf = pd.DataFrame(index=dfspy3.columns)
resdf['isCoint'] = np.nan

from johansen import coint_johansen, print_johan_stats
for s in dfspy3.columns: 
   if s == 'SPY': continue
   # johansen cagrisini kullaniyoruz boylece y,x hangisi secmemiz 
   # gerekmiyor
   data = train[[s,'SPY']].dropna()
   if len(data) < 250: continue
   res = coint_johansen(data, 0, 1)
   if res.lr1[0] > res.cvt[0][0]: 
       resdf.loc[s,'isCoint'] = True
print resdf.isCoint.sum()


coint_cols = list(resdf[resdf.isCoint==True].index)
yN = train[coint_cols]
logMktVal_long = np.log(yN).sum(axis=1)
ytest = pd.concat([logMktVal_long, np.log(train.SPY)],axis=1)
res = coint_johansen(ytest, 0, 1)
print_johan_stats(res)

tmp1 = np.ones((len(testspy3),resdf.isCoint.sum()))*res.evec[0,0]
tmp2 = np.ones((len(testspy3),1))*res.evec[1,0]
weights = np.hstack((tmp1,tmp2))