예제 #1
0
def gradient_boosting_result(_ticker='SP500'):
    univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')[_ticker].values
    z = list(np.where(univ_ib_gd.astype('int') == 1)[0])

    _prc_names = ['CLS', 'A2D', 'A3D', 'A4D', 'A2W', 'A3W', 'A4W', 'A2M', 'A3M', 'A4M']
    _trn_names = ['LVL', 'CH0', 'CH1', 'CH2']
    _fil_names = ['LRB', 'QRB', 'QRG']
    _hoz_names = []
    for j in range(5, 305, 5):
        if j < 10:
            _hoz_names.append('00' + str(j))
        elif j < 100:
            _hoz_names.append('0' + str(j))
        else:
            _hoz_names.append(str(j))

    # get volatility forecast
    univ_ib_vl = cr_vol_all_adj.retrieve(univ_ib_eqidx_ext + 'vol_pb_120')[_ticker].values[z]

    # get return
    univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')[_ticker].values[z]
    univ_ib_cl = filt.ret(univ_ib_cl)

    univ_ib_sig_all = None
    for k in _trn_names:
        for i in _prc_names:
            for j in _fil_names:
                for mi, m in enumerate(_hoz_names):
                    fn = univ_ib_eqidx_ext+i+'_'+j+'_'+k+'_'+m
                    univ_ib_sig = cr_sig_mr_sg.retrieve(fn)[_ticker].values[z]
                    if univ_ib_sig_all is None:
                        univ_ib_sig_all = DataFrame({i+'_'+j+'_'+k+'_'+m: univ_ib_sig})
                    else:
                        univ_ib_sig_all.col_bind(DataFrame({i+'_'+j+'_'+k+'_'+m: univ_ib_sig}))

    # just for storage - incase something happens
    # univ_ib_gb = DataFrame({'Close': univ_ib_cl, 'Volatility': univ_ib_vl})
    # univ_ib_gb.col_bind(univ_ib_sig_all)
    # cr_cret.store(univ_ib_eqidx_ext + 'GBM', univ_ib_gb)

    reg_lookback = [120, 240, 360, 480]
    new_col_names = list(univ_ib_sig_all.columns)

    for i in new_col_names:
        # i = new_col_names[0]
        test_ = np.empty(0)
        for j in reg_lookback:
            # j = reg_lookback[0]
            uic = univ_ib_cl
            uiv = univ_ib_vl
            uis = filt.lag(univ_ib_sig_all[i].values, 1)
            uic, uis, uiv = reduce_nonnan(uic, uis, uiv)
            b1 = qreg.roll_e_ladreg_1d(uic, uis, j)
            b2 = qreg.roll_e_ladreg_1d(uic/uiv, uis/uiv, j)

            resid1 = uic - filt.lag(b1)*uis
            resid2 = uic - filt.lag(b2)*uis
            resid1, resid2 = reduce_nonnan(resid1, resid2)
            test_ = np.hstack((test_, np.array([np.median(abs(resid1)), np.median(abs(resid2))])))
        print(i+' : '+np_to_str(test_))
예제 #2
0
def response_curve(x1, _ticker='SP500', f=None, md=True, sigd=False):
    univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')[_ticker].values
    univ_ib_vl = cr_vol_all_adj.retrieve(univ_ib_eqidx_ext + 'vol_pb_120')[_ticker].values
    univ_ib_s1 = cr_sig_mr_sg.retrieve(univ_ib_eqidx_ext + x1)[_ticker].values
    univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')[_ticker].values
    z = list(np.where(univ_ib_gd.astype('int') == 1)[0])
    univ_ib_s1 = univ_ib_s1[z]
    univ_ib_cl = univ_ib_cl[z]
    univ_ib_vl = univ_ib_vl[z]

    univ_ib_cl = filt.ret(univ_ib_cl)/filt.lag(univ_ib_vl, 1)
    univ_ib_s1 = filt.lag(univ_ib_s1, 1)/filt.lag(univ_ib_vl, 1)

    univ_ib_cl, univ_ib_s1 = reduce_nonnan(univ_ib_cl, univ_ib_s1)

    _bins = 20
    _range = np.maximum(np.percentile(univ_ib_s1, 99), -np.percentile(univ_ib_s1, 1))
    _delta = _range/_bins
    if f is not None:
        pyl.figure(f)
    else:
        pyl.figure(1)
    for i in range(0, 16):
        if i == 0:
            uis1 = univ_ib_s1
            uic1 = univ_ib_cl
        else:
            uis1 = filt.lag(univ_ib_s1, i)
            uic1 = filt.sma(univ_ib_cl, i+1)
        uis1, uic1 = reduce_nonnan(uis1, uic1)
        uis1_b = np.linspace(-_range, _range, num=_bins+1)
        uic1_b = np.zeros(_bins+1)*np.nan
        for j in range(0, _bins+1):
            # j = 1
            if j==0:
                tmp__ = np.where(uis1 <= uis1_b[j]+_delta)[0]
            elif j == _bins+1:
                tmp__ = np.where(uis1 > uis1_b[j]-_delta)[0]
            else:
                tmp__ = np.where((uis1 <= uis1_b[j]+_delta) & (uis1 > uis1_b[j]-_delta))[0]
            if tmp__.shape[0] > 0:
                if md:
                    if not sigd:
                        uic1_b[j] = np.nanmedian(uic1[tmp__]) #/np.nanstd(uic1[tmp__])
                    else:
                        uic1_b[j] = np.nanmedian(uic1[tmp__])/np.nanstd(uic1[tmp__])
                else:
                    if not sigd:
                        uic1_b[j] = np.nanmean(uic1[tmp__]) #/np.nanstd(uic1[tmp__])
                    else:
                        uic1_b[j] = np.nanmean(uic1[tmp__])/np.nanstd(uic1[tmp__])
        pyl.subplot(4, 4, i+1)
        pyl.plot(uis1_b, uic1_b)
예제 #3
0
def return_stats_for_various_vol():
    univ_ib_cl = cr_cret.retrieve(univ_ib_ext + "Close")
    univ_ib_rt1 = univ_ib_cl.copy()
    univ_ib_rt2 = univ_ib_cl.copy()
    univ_ib_rt3 = univ_ib_cl.copy()
    for i in univ_ib_cl.tick_cols():
        univ_ib_cl[i] = filt.ret(univ_ib_cl[i].values)
    for i in univ_ib_rt1.tick_cols():
        univ_ib_rt1[i] = filt.ret(univ_ib_rt1[i].values, 27)
        univ_ib_rt1[i] = filt.lag(univ_ib_rt1[i].values)
    for i in univ_ib_rt2.tick_cols():
        univ_ib_rt2[i] = filt.ret(univ_ib_rt2[i].values, 80)
        univ_ib_rt2[i] = filt.lag(univ_ib_rt2[i].values)
    for i in univ_ib_rt3.tick_cols():
        univ_ib_rt3[i] = filt.ret(univ_ib_rt3[i].values, 240)
        univ_ib_rt3[i] = filt.lag(univ_ib_rt3[i].values)
    univ_ib_cl_data = univ_ib_cl[univ_ib_cl.tick_cols()].values.reshape(-1)
    univ_ib_rt1_data = univ_ib_rt1[univ_ib_rt1.tick_cols()].values.reshape(-1)
    univ_ib_rt2_data = univ_ib_rt2[univ_ib_rt2.tick_cols()].values.reshape(-1)
    univ_ib_rt3_data = univ_ib_rt3[univ_ib_rt3.tick_cols()].values.reshape(-1)
    univ_ib_cl_data = np.abs(univ_ib_cl_data)
    import warnings

    warnings.simplefilter("ignore", RuntimeWarning)
    univ_ib_rt1_data = np.sign(univ_ib_rt1_data)
    univ_ib_rt2_data = np.sign(univ_ib_rt2_data)
    univ_ib_rt3_data = np.sign(univ_ib_rt3_data)
    warnings.simplefilter("default", RuntimeWarning)

    for j in vol_names:
        for k in range(30, 330, 30):
            if k < 100:
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + "_0" + str(k) + "D")
            else:
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + "_" + str(k) + "D")
            for i in univ_ib_vol.tick_cols():
                univ_ib_vol[i] = filt.lag(univ_ib_vol[i].values)
            univ_ib_vol_data = univ_ib_vol[univ_ib_vol.tick_cols()].values.reshape(-1)

            # univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data)
            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt1_data < 0)

            univ_ib_vol_data = univ_ib_vol_data[univ_ib_nn]
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            rho = ss.spearmanr(univ_ib_vol_data, univ_ib_cl_data_).correlation
            if k < 100:
                print(j + "_0" + str(k) + "D:\t", rho)
            else:
                print(j + "_" + str(k) + "D:\t", rho)
        print("\n")
예제 #4
0
def autocorr(x, m=1):
    x1 = filt.lag(x, m)
    x2 = np.copy(x)
    x1, x2 = reduce_nonnan(x1, x2)
    x1m = (x1 - np.mean(x1))/np.std(x1)
    x2m = (x2 - np.mean(x2))/np.std(x2)
    return np.mean(x1m*x2m)
예제 #5
0
def spline_curve(x1, _ticker='SP500', f=None, x1_=None, x2_=None, scat=True):
    univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')[_ticker].values
    univ_ib_vl = cr_vol_all_adj.retrieve(univ_ib_eqidx_ext + 'vol_pb_120')[_ticker].values
    univ_ib_s1 = cr_sig_mr_sg.retrieve(univ_ib_eqidx_ext + x1)[_ticker].values
    univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')[_ticker].values
    z = list(np.where(univ_ib_gd.astype('int') == 1)[0])
    univ_ib_s1 = univ_ib_s1[z]
    univ_ib_cl = univ_ib_cl[z]
    univ_ib_vl = univ_ib_vl[z]

    univ_ib_cl = filt.ret(univ_ib_cl) / filt.lag(univ_ib_vl, 1)
    univ_ib_s1 = filt.lag(univ_ib_s1, 1) / filt.lag(univ_ib_vl, 1)

    univ_ib_cl, univ_ib_s1 = reduce_nonnan(univ_ib_cl, univ_ib_s1)

    print(spearmanr(univ_ib_cl, univ_ib_s1).correlation)

    test_weight = np.ones(univ_ib_cl.shape[0]) / univ_ib_cl.shape[0]

    if x1_ is None:
        x1 = np.percentile(univ_ib_s1, 25)
    else:
        x1 = x1_
    if x2_ is None:
        x2 = np.percentile(univ_ib_s1, 75)
    else:
        x2 = x2_
    testa = rpf2.cubic_fit_linreg(univ_ib_cl, univ_ib_s1, x1, x2, test_weight)
    x_ = np.linspace(-1, 1, num=101)
    b0 = testa[0]
    b1 = testa[1]
    b2 = testa[2]
    b3 = testa[3]
    a0 = testa[4]
    c0 = testa[5]
    y_ = (b0+a0*(x_<x1)+c0*(x_>x2))+(b1-3*(a0/x_)*(x_<x1)-3*(c0/x_)*(x_>x2))*x_+\
         (b2+3*(a0/(x_**2))*(x_<x1)+3*(c0/(x_**2))*(x_>x2))*(x_**2)+\
         (b3-(a0/(x_**3))*(x_<x1)-(c0/(x_**3))*(x_>x2))*(x_**3)
    y_[np.where(np.isnan(y_))[0]] = b0
    z_ = np.zeros(len(y_))
    pyl.plot(x_, y_)
    pyl.plot(x_, z_)
    if scat:
        pyl.scatter(univ_ib_s1, univ_ib_cl, c='c')
    return None
예제 #6
0
def test_measures():
    univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')
    _prc_names = ['CLS', 'A2D', 'A3D', 'A4D', 'A2W', 'A3W', 'A4W', 'A2M', 'A3M', 'A4M']
    _trn_names = ['LVL', 'CH0', 'CH1', 'CH2']
    _fil_names = ['LRB', 'QRB', 'QRG']
    _hoz_names = []
    for j in range(5, 305, 5):
        if j < 10:
            _hoz_names.append('00'+str(j))
        elif j < 100:
            _hoz_names.append('0'+str(j))
        else:
            _hoz_names.append(str(j))

    _ticker = 'SP500'

    z = list(np.where(univ_ib_gd[_ticker].values.astype('int') ==1)[0])
    univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')[_ticker].values[z]
    univ_ib_cl = filt.ret(univ_ib_cl)
    univ_ib_cl = univ_ib_cl[-2780:]

    # _len = 99999
    for i in _prc_names:
        # i = _prc_names[0]
        for j in _fil_names:
            # j = _fil_names[0]
            for k in _trn_names:
                # k = _trn_names[0]
                for m in _hoz_names:
                    # m = _hoz_names[0]
                    fn = univ_ib_eqidx_ext+i+'_'+j+'_'+k+'_'+m
                    # print(fn)
                    univ_ib_sig = cr_sig_mr_sg.retrieve(fn)[_ticker].values[z]
                    univ_ib_sig1 = filt.lag(univ_ib_sig)
                    univ_ib_sig2 = filt.lag(univ_ib_sig, 2)
                    univ_ib_sig1 = univ_ib_sig1[-2780:]
                    univ_ib_sig2 = univ_ib_sig2[-2780:]
                    t1 = spearmanr(univ_ib_cl, univ_ib_sig1).correlation
                    t2 = spearmanr(univ_ib_cl, univ_ib_sig2).correlation
                    d1 = spearmanr(np.sign(univ_ib_cl), np.sign(univ_ib_sig1)).correlation
                    d2 = spearmanr(np.sign(univ_ib_cl), np.sign(univ_ib_sig2)).correlation
                    td = np.array([t1, t2, d1, d2])*100
                    print(i+','+j+','+k+','+m+' :\t', np_to_str(td))
    return None
예제 #7
0
def pred_stats_for_various_vol():
    univ_ib_cl = cr_cret.retrieve(univ_ib_ext + "Close")
    univ_ib_rt = univ_ib_cl.copy()
    for i in univ_ib_cl.tick_cols():
        univ_ib_cl[i] = filt.ret(univ_ib_cl[i].values)
    for i in univ_ib_rt.tick_cols():
        univ_ib_rt[i] = filt.ret(univ_ib_rt[i].values, 30)
        univ_ib_rt[i] = filt.lag(univ_ib_rt[i].values)
    univ_ib_cl_data = univ_ib_cl[univ_ib_cl.tick_cols()].values.reshape(-1)
    univ_ib_rt_data = univ_ib_rt[univ_ib_rt.tick_cols()].values.reshape(-1)
    # univ_ib_cl_data = np.abs(univ_ib_cl_data)
    import warnings

    warnings.simplefilter("ignore", RuntimeWarning)
    univ_ib_rt_data = np.sign(univ_ib_rt_data)
    warnings.simplefilter("default", RuntimeWarning)

    for j in vol_names_sm:
        for k in range(30, 330, 30):  # changed here
            if k < 100:
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + "_0" + str(k) + "D")
            else:
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + "_" + str(k) + "D")
            for i in univ_ib_vol.tick_cols():
                univ_ib_vol[i] = filt.chg(filt.lag(1 / univ_ib_vol[i].values))
            univ_ib_vol_data = univ_ib_vol[univ_ib_vol.tick_cols()].values.reshape(-1)

            # univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data)
            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data)  # & (univ_ib_rt_data < 0)

            univ_ib_vol_data = univ_ib_vol_data[univ_ib_nn]
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            rho = ss.spearmanr(univ_ib_vol_data, univ_ib_cl_data_).correlation
            if k < 100:
                print(j + "_0" + str(k) + "D:\t", rho)
            else:
                print(j + "_" + str(k) + "D:\t", rho)
        print("\n")
예제 #8
0
def get_vol_adj():
    univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')
    univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')
    tick_cols = univ_ib_cl.tick_cols()
    for k in range(0, len(vol_list)):
        print('Processing volatility %s' % vol_names[k])
        for j in vol_lookbacks:
            print('Currently working on lookback %s' % str(j))
            if j < 100:
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_eqidx_ext + vol_names[k] + '_0' + str(j))
            else:
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_eqidx_ext + vol_names[k] + '_' + str(j))
            for i in tick_cols:
                z = np.where(univ_ib_gd[i].values.astype('int') == 1)[0]
                if z.shape[0] > 0:
                    univ_ib_vol_ = univ_ib_vol[i].values[z]
                    univ_ib_cl_ = univ_ib_cl[i].values[z]
                    # find the beta
                    univ_ib_vol__ = filt.lag(univ_ib_vol_)
                    univ_ib_cl_ = np.abs(filt.ret(univ_ib_cl_))
                    regp = qreg4.roll_e_ladreg_1d(univ_ib_cl_, univ_ib_vol__, adj_lookback)
                    # get the beta-adjusted volatility
                    vol__ = univ_ib_vol_ * regp * _lapl_mult
                    # smooth out the volatility
                    vol__1 = filt.lrma(vol__, 61, lg=True)
                    vol__2 = filt.lrma(vol__, 7, lg=True)
                    vol__3 = vol__1 + filt.lrma(vol__2 - vol__1, 16)
                    # push the new volatility back
                    univ_ib_vol[i] = np.nan
                    univ_ib_vol[list(z), i] = vol__3
                    univ_ib_vol[i] = filt.fill(univ_ib_vol[i].values)
                else:
                    univ_ib_vol[i] = np.nan
            if j < 100:
                cr_vol_all_adj.store(univ_ib_eqidx_ext + vol_names[k] + '_0' + str(j), univ_ib_vol)
            else:
                cr_vol_all_adj.store(univ_ib_eqidx_ext + vol_names[k] + '_' + str(j), univ_ib_vol)
    return None
예제 #9
0
from CrazyCod.Utilities.smth_price_results import smth_param
import warnings
from CrazyCod.Utilities.frames import DataFrame
import matplotlib.pyplot as plt
import CrazyCod.Utilities.boosting as bst

i = 'SP500'


# pure momentum set of signals

testv1 = mkt_retrieve(i, 'Stats', 'Volatility')
for vv in ['vol_gk240']:
    # vv = 'vol_gk240'
    # lag the volatility
    testv = filt.lag(testv1[vv].values)
    test1 = mkt_retrieve(i, 'Stats', 'Returns')

    # lag the signal
    test2 = mkt_retrieve(i, 'MovReg', 'Signals')
    tcl2 = test2.tick_cols()
    for k in tcl2:
        test2[k] = filt.lag(test2[k].values)/testv

    # get the average of returns
    test1_ = test1[['Date', 'Close']]
    test1_['Returns'] = filt.ret(test1_['Close'].values)
    fret1 = test1_['Returns'].values / testv

    num_col = len(tcl2)
    correl_vec = np.zeros(num_col)
예제 #10
0
lookbacks = ['120', '240', '360']

np_nice_options(linelen=250, numpres=8)

_norm_mult = np.sqrt(2/np.pi)
_lapl_mult = 1/np.sqrt(2)
_rand_mult = 0.60

tickers = ['SP500', 'DAX', 'Nikkei225', 'ESTX50', 'SMI', 'RDX', 'MSCIEM']

i = 'SP500'
univ_ib_cl = mkt_retrieve(i, 'Stats', 'Returns')['Close'].values
univ_ib_vl = mkt_retrieve(i, 'Stats', 'Volatility')['vol_pb240'].values

univ_ib_cl = np.abs(filt.ret(univ_ib_cl))
univ_ib_vl = filt.lag(univ_ib_vl)

univ_ib_cl, univ_ib_vl = reduce_nonnan(univ_ib_cl, univ_ib_vl )

univ_ib_vl *= _lapl_mult
univ_ib_vl2 = np.sqrt(univ_ib_vl)
univ_ib_vl2 = univ_ib_vl2 * med_abs_dev(univ_ib_vl)/med_abs_dev(univ_ib_vl2)

b41 = qreg.roll_e_ladreg_1d(univ_ib_cl, univ_ib_vl, 240)

b51 = qreg.roll_e_ladreg_2d(univ_ib_cl, mcc(univ_ib_vl, univ_ib_vl2), 240)

resid0 = univ_ib_cl - univ_ib_vl
resid1 = univ_ib_cl - univ_ib_vl * filt.lag(b41)
resid2 = univ_ib_cl - univ_ib_vl * filt.lag(np.ascontiguousarray(b51[:, 0])) - univ_ib_vl2 * filt.lag(np.ascontiguousarray(b51[:, 1]))
예제 #11
0
def autocorr(x, m=1):
    x1 = filt.lag(x, m)
    x2 = np.copy(x)
    x1, x2 = reduce_nonnan(x1, x2)
    return smart_kendall(x1, x2)
예제 #12
0
def test_measures2():
    pd.set_option('display.max_columns', 30)
    pd.set_option('display.max_rows', 100)
    univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')
    _prc_names = ['CLS', 'A2D', 'A3D', 'A4D', 'A2W', 'A3W', 'A4W', 'A2M', 'A3M', 'A4M']
    _trn_names = ['LVL', 'CH0', 'CH1', 'CH2']
    _fil_names = ['LRB', 'QRB', 'QRG']
    _hoz_names = []
    for j in range(5, 305, 5):
        if j < 10:
            _hoz_names.append('00'+str(j))
        elif j < 100:
            _hoz_names.append('0'+str(j))
        else:
            _hoz_names.append(str(j))

    univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')
    tick_cols = univ_ib_cl.tick_cols()
    for n in tick_cols:
        z = list(np.where(univ_ib_gd[n].values.astype('int') == 1)[0])
        univ_ib_cl_ = filt.ret(univ_ib_cl[n].values[z])
        univ_ib_cl[n] = np.nan
        univ_ib_cl[z, n] = univ_ib_cl_
        univ_ib_cl[n] = filt.fill1(univ_ib_cl[n].values, 0)

    # tick_cols_ = tick_cols[0:1]
    tick_cols = ['SP500', 'SP400Mid', 'Nikkei225', 'R2000', 'FTSE100', 'DAX',
                 'SMI', 'CAC40', 'AEX', 'MIBFTSE', 'IBEX35', 'OMXS30']
    siz_ = [5330, 7115, 6757, 7349, 5324, 6785, 6794, 6199, 7232, 7936, 7180, 7295]
    # siz = [2784, 999, 1357, 765, 2790, 1329, 1320, 1915, 882, 178, 934, 819]

    tick_cols = tick_cols[1:]
    siz_ = siz_[1:]

    for k in _trn_names:
        dummy_cols = []
        for i in _prc_names:
            for j in _fil_names:
                dummy_cols.append(i+'_'+j)
        x_ = np.random.random(len(_hoz_names))*np.nan
        y_ = np.random.random(len(_hoz_names))*np.nan
        dummy_df = DataFrame({dummy_cols[0]: x_, dummy_cols[1]: y_})
        for i in range(2, 30):
            dummy_df[dummy_cols[i]] = x_

        _col = 0
        for i in _prc_names:
            # j = _fil_names[0]
            for j in _fil_names:
                # k = _trn_names[0]
                for mi, m in enumerate(_hoz_names):
                    # m = _hoz_names[0]
                    fn = univ_ib_eqidx_ext+i+'_'+j+'_'+k+'_'+m
                    univ_ib_sig = cr_sig_mr_sg.retrieve(fn)
                    ret_, sig_ = np.empty(0), np.empty(0)
                    for ni, n in enumerate(tick_cols):
                        # ni = 0
                        # n = tick_cols[ni]
                        z = list(np.where(univ_ib_gd[n].values.astype('int') == 1)[0])
                        z = [z_ for z_ in z if z_ > siz_[ni]]   # 5 is just a buffer
                        sig__ = univ_ib_sig[n].values[z]
                        ret__ = univ_ib_cl[n].values[z]
                        sig__ = filt.lag(sig__, 2)
                        ret_ = np.hstack((ret_, ret__[2:]))
                        sig_ = np.hstack((sig_, sig__[2:]))
                    gidx = np.where(~np.isnan(ret_) & ~np.isnan(sig_))[0]
                    dummy_df[mi, dummy_cols[_col]] = 100*spearmanr(ret_[gidx], sig_[gidx]).correlation
                _col += 1
        dummy_df.show_all()
예제 #13
0
        univ_ib_vol7 = cr_vol_all.retrieve(univ_ib_ext+'vol_yz_'+lookback)[['Date', ticker]]

        z = list(np.where(univ_ib_gd[ticker].values.astype('int') == 1)[0])

        univ_ib_cl = univ_ib_cl[z, :]
        univ_ib_vol0 = univ_ib_vol0[z, :]
        univ_ib_vol1 = univ_ib_vol1[z, :]
        univ_ib_vol2 = univ_ib_vol2[z, :]
        univ_ib_vol3 = univ_ib_vol3[z, :]
        univ_ib_vol4 = univ_ib_vol4[z, :]
        univ_ib_vol5 = univ_ib_vol5[z, :]
        univ_ib_vol6 = univ_ib_vol6[z, :]
        univ_ib_vol7 = univ_ib_vol7[z, :]

        univ_ib_cl[ticker] = np.abs(filt.ret(univ_ib_cl[ticker].values))
        univ_ib_vol0[ticker] = filt.lag(univ_ib_vol0[ticker].values)
        univ_ib_vol1[ticker] = filt.lag(univ_ib_vol1[ticker].values)
        univ_ib_vol2[ticker] = filt.lag(univ_ib_vol2[ticker].values)
        univ_ib_vol3[ticker] = filt.lag(univ_ib_vol3[ticker].values)
        univ_ib_vol4[ticker] = filt.lag(univ_ib_vol4[ticker].values)
        univ_ib_vol5[ticker] = filt.lag(univ_ib_vol5[ticker].values)
        univ_ib_vol6[ticker] = filt.lag(univ_ib_vol6[ticker].values)
        univ_ib_vol7[ticker] = filt.lag(univ_ib_vol7[ticker].values)

        univ_ib_dt_ = univ_ib_cl['Date'].values
        univ_ib_cl_ = univ_ib_cl[ticker].values
        univ_ib_vol0_ = univ_ib_vol0[ticker].values
        univ_ib_vol1_ = univ_ib_vol1[ticker].values
        univ_ib_vol2_ = univ_ib_vol2[ticker].values
        univ_ib_vol3_ = univ_ib_vol3[ticker].values
        univ_ib_vol4_ = univ_ib_vol4[ticker].values
예제 #14
0
def check_closest_volatility():
    univ_ib_cl = cr_cret.retrieve(univ_ib_ext + "Close")
    univ_ib_rt1 = univ_ib_cl.copy()
    univ_ib_rt2 = univ_ib_cl.copy()
    univ_ib_rt3 = univ_ib_cl.copy()

    for i in univ_ib_cl.tick_cols():
        univ_ib_cl[i] = filt.ret(univ_ib_cl[i].values)
        univ_ib_rt1[i] = filt.ret(univ_ib_rt1[i].values, 30)
        univ_ib_rt1[i] = filt.lag(univ_ib_rt1[i].values)
        univ_ib_rt2[i] = filt.ret(univ_ib_rt2[i].values, 60)
        univ_ib_rt2[i] = filt.lag(univ_ib_rt2[i].values)
        univ_ib_rt3[i] = filt.ret(univ_ib_rt3[i].values, 120)
        univ_ib_rt3[i] = filt.lag(univ_ib_rt3[i].values)
    univ_ib_cl_data = univ_ib_cl[univ_ib_cl.tick_cols()].values.reshape(-1)
    univ_ib_rt1_data = univ_ib_rt1[univ_ib_rt1.tick_cols()].values.reshape(-1)
    univ_ib_rt2_data = univ_ib_rt2[univ_ib_rt2.tick_cols()].values.reshape(-1)
    univ_ib_rt3_data = univ_ib_rt3[univ_ib_rt3.tick_cols()].values.reshape(-1)
    univ_ib_cl_data = np.abs(univ_ib_cl_data)
    import warnings

    warnings.simplefilter("ignore", RuntimeWarning)
    univ_ib_rt1_data = np.sign(univ_ib_rt1_data)
    univ_ib_rt2_data = np.sign(univ_ib_rt2_data)
    univ_ib_rt3_data = np.sign(univ_ib_rt3_data)
    warnings.simplefilter("default", RuntimeWarning)

    # vol_names_ = [i for i in vol_names if 'reg' not in i]

    for j in vol_names_sm:
        for k in range(_min_range, _max_range, _step_range * 3):
            if k < 100:
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + "_0" + str(k) + "D")
            else:
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + "_" + str(k) + "D")
            for i in univ_ib_vol.tick_cols():
                univ_ib_vol[i] = filt.lag(univ_ib_vol[i].values)
            univ_ib_vol_data = univ_ib_vol[univ_ib_vol.tick_cols()].values.reshape(-1)

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res0 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt1_data < 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res1 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt2_data < 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res2 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt3_data < 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res3 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt1_data > 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res4 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt2_data > 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res5 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt3_data > 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res6 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            if k < 100:
                print(
                    j + "_0" + str(k) + "\t",
                    np_to_str(res0),
                    "\t",
                    np_to_str(res1),
                    "\t",
                    np_to_str(res2),
                    "\t",
                    np_to_str(res3),
                    "\t",
                    np_to_str(res4),
                    "\t",
                    np_to_str(res5),
                    "\t",
                    np_to_str(res6),
                    "\t",
                )
            else:
                print(
                    j + "_" + str(k) + "\t",
                    np_to_str(res0),
                    "\t",
                    np_to_str(res1),
                    "\t",
                    np_to_str(res2),
                    "\t",
                    np_to_str(res3),
                    "\t",
                    np_to_str(res4),
                    "\t",
                    np_to_str(res5),
                    "\t",
                    np_to_str(res6),
                    "\t",
                )
        print("\n")
예제 #15
0
def get_smart_measures():
    univ_ib_data = cr_cret.retrieve(univ_ib_eqidx_ext + 'GBM')

    univ_ib_cl = univ_ib_data['Close'].values
    univ_ib_vl = univ_ib_data['Volatility'].values
    del univ_ib_data['Close']
    del univ_ib_data['Volatility']

    new_col_names = list(univ_ib_data.columns)
    for i in new_col_names:
        univ_ib_data[i] = filt.lag(univ_ib_data[i].values)
    univ_ib_vl = filt.lag(univ_ib_vl)

    univ_ib_clv = univ_ib_cl/univ_ib_vl
    univ_ib_datav = univ_ib_data.copy()
    for i in new_col_names:
        univ_ib_datav[i] = univ_ib_datav[i].values/univ_ib_vl

    # decrease the magnitude of QRG signals for stability
    for i in new_col_names:
        if 'QRG' in i:
            univ_ib_data[i] = univ_ib_data[i].values/1000
            univ_ib_datav[i] = univ_ib_datav[i].values/1000

    # find the first measure
    meas = []
    measv = []
    _collect = []
    _collectv = []
    for i in range(0, 20):
        # i = 2
        if i == 0:
            _maxtest = 0.0
            _maxidx = 99999
            _maxtestv = 0.0
            _maxidxv = 99999
            for ji, j in enumerate(new_col_names):
                _tmp_val = univ_ib_data[j].values
                _test = smart_kendall(_tmp_val[5220:], univ_ib_cl[5220:])
                if abs(_test) > abs(_maxtest):
                    _maxtest = _test
                    _maxidx = ji
                _tmp_valv = univ_ib_datav[j].values
                _testv = smart_kendall(_tmp_valv[5220:], univ_ib_clv[5220:])
                if abs(_testv) > abs(_maxtestv):
                    _maxtestv = _testv
                    _maxidxv = ji
            meas.append(new_col_names[_maxidx])
            measv.append(new_col_names[_maxidxv])

            # minimize the kendall between variable, and error
            _sval = univ_ib_data[new_col_names[_maxidx]].values
            _svalv = univ_ib_datav[new_col_names[_maxidxv]].values

            _beta = find_more_accurate_beta(univ_ib_cl[5220:], _sval[5220:])
            _collect = _beta * _sval
            _err = univ_ib_cl - _collect

            _betav = find_more_accurate_beta(univ_ib_clv[5220:], _svalv[5220:])
            _collectv = _betav * _svalv
            _errv = univ_ib_clv - _collectv
        else:
            _maxtest = 0.0
            _maxidx = 99999
            _maxtestv = 0.0
            _maxidxv = 99999
            for ji, j in enumerate(new_col_names):
                if j not in meas:
                    _tmp_val = univ_ib_data[j].values
                    _test = smart_kendall(_tmp_val[5220:], _err[5220:])
                    if abs(_test) > abs(_maxtest):
                        _maxtest = _test
                        _maxidx = ji
                if j not in measv:
                    _tmp_valv = univ_ib_datav[j].values
                    _testv = smart_kendall(_tmp_valv[5220:], _errv[5220:])
                    if abs(_testv) > abs(_maxtestv):
                        _maxtestv = _testv
                        _maxidxv = ji
            meas.append(new_col_names[_maxidx])
            measv.append(new_col_names[_maxidxv])

            # minimize the kendall between variable, and error
            _sval = univ_ib_data[new_col_names[_maxidx]].values
            _svalv = univ_ib_datav[new_col_names[_maxidxv]].values

            _beta = find_more_accurate_beta(_err[5220:], _sval[5220:])
            _collect += _beta * _sval
            _err = univ_ib_cl - _collect

            _betav = find_more_accurate_beta(_errv[5220:], _svalv[5220:])
            _collectv += _betav * _svalv
            _errv = univ_ib_clv - _collectv
        print(i)
        print(meas)
        print(measv)
        print(smart_kendall(_collect[5220:], univ_ib_cl[5220:]), np.std(univ_ib_cl[5220:]-_collect[5220:]))
        print(smart_kendall(_collectv[5220:], univ_ib_clv[5220:]), np.std(univ_ib_clv[5220:]-_collectv[5220:]))
예제 #16
0
test1 = mkt_retrieve(i, 'Stats', 'Returns')
for j in rlbck:
    # j = rlbck[0]
    test2 = mkt_retrieve(i, 'MovReg', 'Signals_Pct_' + str(j))
    tcl2 = test2.tick_cols()
    for lmb in [100]:
        # lmb = 0.01
        print('Using lookback of %s with lambda of %s' % (str(j), str(lmb)))

        for k in range(2, 3):
            # k = 1
            # lag the signal data
            test2_ = test2.copy()
            for tcl2_ in tcl2:
                test2_[tcl2_] = filt.lag(test2_[tcl2_].values, k)

            # get the average of returns
            test1_ = test1[['Date', 'Close']]
            test1_['Returns'] = filt.ret(test1_['Close'].values)
            test1_['MultiReturns'] = filt.ret(test1_['Close'].values, k)/k
            test1_['MultiReturns'] = filt.mpc(test1_['MultiReturns'].values, j)
            test1_['ReturnPct'] = filt.mpc(test1_['Returns'].values, j)



            # get beta
            # beta1 = qreg5.roll_s_ladreg_2d_l2n(test1_['MultiReturns'].values,
            #                                    np.ascontiguousarray(test2_[test2_.tick_cols()].values),
            #                                    j, lmb, 30)
            beta2 = qreg5.roll_w_ladreg_2d_l2n(test1_['MultiReturns'].values,
예제 #17
0
def get_smart_measures():
    univ_ib_data = cr_cret.retrieve(univ_ib_eqidx_ext + 'GBM')

    univ_ib_cl = univ_ib_data['Close'].values
    univ_ib_vl = univ_ib_data['Volatility'].values
    del univ_ib_data['Close']
    del univ_ib_data['Volatility']

    new_col_names = list(univ_ib_data.columns)
    for i in new_col_names:
        univ_ib_data[i] = filt.lag(univ_ib_data[i].values)
    univ_ib_vl = filt.lag(univ_ib_vl)

    univ_ib_clv = univ_ib_cl/univ_ib_vl
    univ_ib_datav = univ_ib_data.copy()
    for i in new_col_names:
        univ_ib_datav[i] = univ_ib_datav[i].values/univ_ib_vl

    # decrease the magnitude of QRG signals for stability
    for i in new_col_names:
        if 'QRG' in i:
            univ_ib_data[i] = univ_ib_data[i].values/1000
            univ_ib_datav[i] = univ_ib_datav[i].values/1000

    # _new_col_names = new_col_names  # type 1
    _new_col_names = [i for i in new_col_names if ('M_' not in i) and ('W_' not in i)]

    # find the measures
    meas = []
    measv = []
    _collect = []
    _collectv = []
    for i in range(0, 20):
        # i = 0
        if i == 0:
            _maxtest = 0.0
            _maxidx = 99999
            _maxtestv = 0.0
            _maxidxv = 99999
            _testcol_ = None
            _testcolv_ = None
            for ji, j in enumerate(_new_col_names):
                _tmp_val = univ_ib_data[j].values
                _testcol = find_more_accurate_beta(univ_ib_cl[5220:], _tmp_val[5220:]) * _tmp_val
                _test = smart_kendall(univ_ib_cl[5220:], _testcol[5220:])
                if abs(_test) > abs(_maxtest):
                    _maxtest = _test
                    _maxidx = ji
                    _testcol_ = _testcol

                _tmp_valv = univ_ib_datav[j].values
                _testcolv = find_more_accurate_beta(univ_ib_clv[5220:], _tmp_valv[5220:]) * _tmp_valv
                _testv = smart_kendall(univ_ib_clv[5220:], _testcolv[5220:])
                if abs(_testv) > abs(_maxtestv):
                    _maxtestv = _testv
                    _maxidxv = ji
                    _testcolv_ = _testcolv

            meas.append(_new_col_names[_maxidx])
            measv.append(_new_col_names[_maxidxv])

            _collect = _testcol_
            _collectv = _testcolv_

        else:
            _maxtest = 0.0
            _maxidx = 99999
            _maxtestv = 0.0
            _maxidxv = 99999
            _testcol_ = None
            _testcolv_ = None
            for ji, j in enumerate(_new_col_names):
                if j not in meas:
                    _tmp_val = univ_ib_data[j].values
                    _testcol = find_more_accurate_beta(univ_ib_cl[5220:]-_collect[5220:], _tmp_val[5220:]) * _tmp_val
                    _test = smart_kendall(univ_ib_cl[5220:], _collect[5220:]+_testcol[5220:])
                    if abs(_test) > abs(_maxtest):
                        _maxtest = _test
                        _maxidx = ji
                        _testcol_ = _testcol
                if j not in measv:
                    _tmp_valv = univ_ib_datav[j].values
                    _testcolv = find_more_accurate_beta(univ_ib_clv[5220:]-_collectv[5220:], _tmp_valv[5220:]) * _tmp_valv
                    _testv = smart_kendall(univ_ib_clv[5220:], _collectv[5220:]+_testcolv[5220:])
                    if abs(_testv) > abs(_maxtestv):
                        _maxtestv = _testv
                        _maxidxv = ji
                        _testcolv_ = _testcolv

            meas.append(_new_col_names[_maxidx])
            measv.append(_new_col_names[_maxidxv])

            _collect += _testcol_
            _collectv += _testcolv_
        print(i)
        print(meas)
        print(measv)
        print(smart_kendall(_collect[5220:], univ_ib_cl[5220:]))
        print(smart_kendall(_collectv[5220:], univ_ib_clv[5220:]))
예제 #18
0
def get_cs_factor_portfolios():
    # just testing here
    # read the closing prices, convert it to returns
    _cls = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')
    tcl = _cls.tick_cols()
    for j in tcl:
        _cls[j] = filt.ret(_cls[j].values)

    # read the volatility, signal, pca beta, lag them
    _vol = cr_cret.retrieve(univ_ib_eqidx_ext + 'vol_gk240')
    _sig = cr_sret.retrieve(univ_ib_eqidx_ext + 'D10S26_521_QRB_LVL')
    # _sig = cr_sret.retrieve(univ_ib_eqidx_ext + _all_signals_p1_55[2])
    _pca1 = cr_cret.retrieve(univ_ib_eqidx_ext + 'PCA1_Beta')
    _pca2 = cr_cret.retrieve(univ_ib_eqidx_ext + 'PCA2_Beta')
    for j in tcl:
        _sig[j] = filt.lag(_sig[j].values)
        _pca1[j] = filt.lag(_pca1[j].values)
        _pca2[j] = filt.lag(_pca2[j].values)
        _vol[j] = filt.lag(_vol[j].values)

    # find starting point, at least 20 markets are live (from both beta, sig)
    nc = len(tcl)
    ny = np.zeros(nc)
    for j in range(0, nc):
        ny[j] = np.maximum(filt.fst_nan(_sig[tcl[j]].values), filt.fst_nan(_pca1[tcl[j]].values))
    _ny_mn = int(np.min(ny))
    _ny_mx = int(np.max(ny))
    _ny = _ny_mn
    for j in range(_ny_mn, _ny_mx):
        if np.where(ny >= j)[0].shape[0] >= 20:
            _ny = j
            break

    # cross-sectionally normalize the signal
    _sign1 = _sig.copy()
    _sign2 = _sig.copy()
    _sign3 = _sig.copy()
    _sign4 = _sig.copy()
    _sign5 = _sig.copy()
    _sign6 = _sig.copy()
    for j in tcl:
        _sign1[j] = np.nan
        _sign2[j] = np.nan
        _sign3[j] = np.nan
        _sign4[j] = np.nan
        _sign5[j] = np.nan
        _sign6[j] = np.nan
    for j in range(_ny, _sig.shape[0]):
        _tmp = _sig[j, tcl] / _vol[j, tcl]
        _sign1[j, tcl] = half_norm_rankit(_tmp)
        _sign2[j, tcl] = _sign1[j, tcl].values
        _sign3[j, tcl] = _sign1[j, tcl].values
        _sign4[j, tcl] = full_norm_rankit(_tmp)
        _sign5[j, tcl] = _sign4[j, tcl].values
        _sign6[j, tcl] = _sign4[j, tcl].values

    # calculate returns using risk parity portfolio approaches
    bk_test = dummy_df(_vol)
    bk_test['H1'] = np.nan
    bk_test['H2'] = np.nan
    bk_test['H3'] = np.nan
    bk_test['F1'] = np.nan
    bk_test['F2'] = np.nan
    bk_test['F3'] = np.nan

    for j in range(_ny, _sig.shape[0]):
        # j = _ny
        _sign1_tmp = _sign1[j:j, tcl].values
        _sign2_tmp = _sign2[j:j, tcl].values
        _sign3_tmp = _sign3[j:j, tcl].values
        _sign4_tmp = _sign4[j:j, tcl].values
        _sign5_tmp = _sign5[j:j, tcl].values
        _sign6_tmp = _sign6[j:j, tcl].values
        _cls_tmp = _cls[j:j, tcl].values
        _vol_tmp = _vol[j:j, tcl].values
        _pca1_tmp = _pca1[j:j, tcl].values
        _pca2_tmp = _pca2[j:j, tcl].values

        _sign1_tmp, _sign2_tmp, _sign3_tmp, _sign4_tmp, _sign5_tmp, _sign6_tmp, _cls_tmp, _vol_tmp, _pca1_tmp,\
            _pca2_tmp = reduce_nonnan(_sign1_tmp, _sign2_tmp, _sign3_tmp, _sign4_tmp, _sign5_tmp, _sign6_tmp,
                                      _cls_tmp, _vol_tmp, _pca1_tmp, _pca2_tmp)

        _tmp11 = np.dot(_pca1_tmp, _pca1_tmp)
        _tmp22 = np.dot(_pca2_tmp, _pca2_tmp)
        _tmps11 = np.dot(_sign1_tmp, _pca1_tmp)
        _tmps12 = np.dot(_sign1_tmp, _pca2_tmp)
        _tmps21 = np.dot(_sign4_tmp, _pca1_tmp)
        _tmps22 = np.dot(_sign4_tmp, _pca2_tmp)

        _sign2_tmp = _sign2_tmp - _pca1_tmp * _tmps11 / _tmp11
        _sign5_tmp = _sign5_tmp - _pca1_tmp * _tmps21 / _tmp11

        _sign3_tmp = _sign3_tmp - _pca1_tmp * _tmps11 / _tmp11 - _pca2_tmp * _tmps12 / _tmp22
        _sign6_tmp = _sign6_tmp - _pca1_tmp * _tmps21 / _tmp11 - _pca2_tmp * _tmps22 / _tmp22

        _sign1_tmp_sum = np.sum(np.abs(_sign1_tmp))
        _sign2_tmp_sum = np.sum(np.abs(_sign2_tmp))
        _sign3_tmp_sum = np.sum(np.abs(_sign3_tmp))
        _sign4_tmp_sum = np.sum(np.abs(_sign4_tmp))
        _sign5_tmp_sum = np.sum(np.abs(_sign5_tmp))
        _sign6_tmp_sum = np.sum(np.abs(_sign6_tmp))

        _sign1_tmp = _sign1_tmp / _sign1_tmp_sum
        _sign2_tmp = _sign2_tmp / _sign2_tmp_sum
        _sign3_tmp = _sign3_tmp / _sign3_tmp_sum
        _sign4_tmp = _sign4_tmp / _sign4_tmp_sum
        _sign5_tmp = _sign5_tmp / _sign5_tmp_sum
        _sign6_tmp = _sign6_tmp / _sign6_tmp_sum

        _lev_tmp = 0.005 / _vol_tmp
        _ret_tmp = _cls_tmp * _lev_tmp

        bk_test[j, 'H1'] = np.dot(_sign1_tmp, _ret_tmp)
        bk_test[j, 'H2'] = np.dot(_sign2_tmp, _ret_tmp)
        bk_test[j, 'H3'] = np.dot(_sign3_tmp, _ret_tmp)
        bk_test[j, 'F1'] = np.dot(_sign4_tmp, _ret_tmp)
        bk_test[j, 'F2'] = np.dot(_sign5_tmp, _ret_tmp)
        bk_test[j, 'F3'] = np.dot(_sign6_tmp, _ret_tmp)
    print('done')

    _dt = bk_test['Date'].values[_ny:]
    f1 = plt.figure(1)
    for jidx, j in enumerate(['H1', 'H2', 'H3']):
        _sh = np.nanmean(bk_test[j].values)*16/np.nanstd(bk_test[j].values)
        if _sh > 0:
            _mx = avg_drawdown(bk_test[_ny:, j].values) / (16 * np.std(bk_test[_ny:, j].values))
            testh = conv_to_price(bk_test[j].values)
        else:
            _mx = avg_drawdown(-bk_test[_ny:, j].values) / (16 * np.std(bk_test[_ny:, j].values))
            testh = conv_to_price(-bk_test[j].values)
            _sh = -_sh
        print([_sh, _mx])
        plt.subplot(3, 1, jidx+1)
        plot_ts_new(_dt, testh[_ny:])
    f2 = plt.figure(2)
    for jidx, j in enumerate(['F1', 'F2', 'F3']):
        _sh = np.nanmean(bk_test[j].values) * 16 / np.nanstd(bk_test[j].values)
        if _sh > 0:
            _mx = avg_drawdown(bk_test[_ny:, j].values) / (16 * np.std(bk_test[_ny:, j].values))
            testh = conv_to_price(bk_test[j].values)
        else:
            _mx = avg_drawdown(-bk_test[_ny:, j].values) / (16 * np.std(bk_test[_ny:, j].values))
            testh = conv_to_price(-bk_test[j].values)
            _sh = -_sh
        print([_sh, _mx])
        plt.subplot(3, 1, jidx + 1)
        plot_ts_new(_dt, testh[_ny:])
    _bk_test = bk_test[_ny:, bk_test.tick_cols()].values
    print(np.corrcoef(_bk_test.T))

    # bk_test3 = bk_test.copy()

    f1.clear()
    f2.clear()

    plt.subplot(3, 1, 1)
    tmp = bk_test1[:, 'F1'].values
    plot_ts_new(_dt, conv_to_price(tmp)[_ny:])
    print(np.nanmean(tmp)*16/np.nanstd(tmp))
    plt.subplot(3, 1, 2)
    tmp = bk_test2[:, 'F1'].values
    plot_ts_new(_dt, conv_to_price(tmp)[_ny:])
    print(np.nanmean(tmp)*16/np.nanstd(tmp))
    plt.subplot(3, 1, 3)
    tmp = 0.5 * -bk_test3[:, 'F1'].values + 0.5 * bk_test2[:, 'F2'].values
    plot_ts_new(_dt, conv_to_price(tmp)[_ny:])
    print(np.nanmean(tmp) * 16 / np.nanstd(tmp))
                   'D05S08_005_QRB', 'D01S08_038_QRG', 'D09S29_005_QRG', 'D05S23_349_LRB', 'D10S29_008_QRG',
                   'D10S14_032_QRG', 'D03S17_129_LRB', 'D02S17_005_QRB', 'D02S11_349_QRG', 'D10S14_012_QRG',
                   'D04S08_009_QRB', 'D05S14_236_QRG', 'D01S20_070_QRG', 'D10S29_005_QRG', 'D10S11_009_QRG',
                   'D07S08_005_QRG', 'D05S08_012_QRB', 'D08S29_086_QRG', 'D10S08_007_QRG', 'D09S14_129_QRB',
                   'D10S08_005_QRG', 'D01S08_008_QRB', 'D04S11_349_QRG', 'D02S14_156_LRB', 'D04S20_236_QRB',
                   'D01S11_429_QRG', 'D03S17_032_QRB', 'D07S29_005_QRG', 'D10S23_021_QRG', 'D08S20_005_QRG',
                   'D08S08_007_QRG', 'D09S17_129_QRG', 'D09S08_009_QRG', 'D01S08_017_QRG', 'D01S20_048_QRG',
                   'D01S08_027_QRG', 'D07S08_012_QRG', 'D06S11_038_QRG', 'D10S08_012_QRG', 'D03S11_349_QRB',
                   'D07S14_005_QRG', 'D03S20_349_LRB', 'D10S08_236_QRG', 'D02S08_009_QRG', 'D02S26_027_QRB',
                   'D06S29_005_QRG', 'D04S26_005_QRG', 'D08S17_021_QRG', 'D07S23_015_QRG', 'D09S29_129_QRG',
                   'D08S26_236_LRB', 'D10S11_005_QRG', 'D09S29_521_QRG']

for i in _eq_idx:
    # lag the volatility
    # i = _eq_idx[0]
    testv = filt.lag(mkt_retrieve(i, 'Stats', 'Volatility')['vol_gk240'].values)

    # lag the signal
    test2 = mkt_retrieve(i, 'MovReg', 'Signals')[['Date']+_sig_set_95_lvl]
    test2.set_columns(['Date']+[k + '_LVL' for k in _sig_set_95_lvl])

    test2_ = mkt_retrieve(i, 'MovReg', 'Changes1')[['Date']+_sig_set_95_ch1]
    test2_.set_columns(['Date']+[k + '_CH1' for k in _sig_set_95_ch1])
    test2 = DataFrame.merge(test2, test2_, on='Date')

    test2_ = mkt_retrieve(i, 'MovReg', 'Changes3')[['Date']+_sig_set_95_ch3]
    test2_.set_columns(['Date']+[k + '_CH3' for k in _sig_set_95_ch3])
    test2 = DataFrame.merge(test2, test2_, on='Date')

    test2_ = mkt_retrieve(i, 'MovReg', 'Changes5')[['Date']+_sig_set_95_ch5]
    test2_.set_columns(['Date']+[k + '_CH5' for k in _sig_set_95_ch5])
예제 #20
0
def check_closest_volatility():
    univ_ib_cl = cr_cret.retrieve(univ_ib_ext+'Close')
    univ_ib_rt1 = univ_ib_cl.copy()
    univ_ib_rt2 = univ_ib_cl.copy()
    univ_ib_rt3 = univ_ib_cl.copy()

    for i in univ_ib_cl.tick_cols():
        # i = 'SP500'
        univ_ib_cl[i] = filt.ret(univ_ib_cl[i].values)
        univ_ib_rt1[i] = filt.ret(univ_ib_rt1[i].values, 30)
        univ_ib_rt1[i] = filt.lag(univ_ib_rt1[i].values)
        univ_ib_rt2[i] = filt.ret(univ_ib_rt2[i].values, 60)
        univ_ib_rt2[i] = filt.lag(univ_ib_rt2[i].values)
        univ_ib_rt3[i] = filt.ret(univ_ib_rt3[i].values, 120)
        univ_ib_rt3[i] = filt.lag(univ_ib_rt3[i].values)
    univ_ib_cl_data = univ_ib_cl[univ_ib_cl.tick_cols()].values.reshape(-1, )
    univ_ib_rt1_data = univ_ib_rt1[univ_ib_rt1.tick_cols()].values.reshape(-1, )
    univ_ib_rt2_data = univ_ib_rt2[univ_ib_rt2.tick_cols()].values.reshape(-1, )
    univ_ib_rt3_data = univ_ib_rt3[univ_ib_rt3.tick_cols()].values.reshape(-1, )
    univ_ib_cl_data = np.abs(univ_ib_cl_data)
    import warnings

    warnings.simplefilter('ignore', RuntimeWarning)
    univ_ib_rt1_data = np.sign(univ_ib_rt1_data)
    univ_ib_rt2_data = np.sign(univ_ib_rt2_data)
    univ_ib_rt3_data = np.sign(univ_ib_rt3_data)
    warnings.simplefilter('default', RuntimeWarning)

    # vol_names_ = [i for i in vol_names if 'reg' not in i]

    for j in vol_names:
        for k in range(_min_range, _max_range+_step_range, _step_range):
            if k < 100:
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + '_0' + str(k))
            else:
                univ_ib_vol = cr_vol_all.retrieve(univ_ib_ext + j + '_' + str(k))
            for i in univ_ib_vol.tick_cols():
                univ_ib_vol[i] = filt.lag(univ_ib_vol[i].values)
            univ_ib_vol_data = univ_ib_vol[univ_ib_vol.tick_cols()].values.reshape(-1, )

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn]*_norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res0 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_)
            # res0 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt1_data < 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn]*_norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res1 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_)
            # res1 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt2_data < 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn]*_norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            res2 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_)
            # res2 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt3_data < 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn]*_norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            # res3 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params
            res3 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_)

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt1_data > 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            # res4 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params
            res4 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_)

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt2_data > 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            # res5 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params
            res5 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_)

            univ_ib_nn = ~np.isnan(univ_ib_cl_data) & ~np.isnan(univ_ib_vol_data) & (univ_ib_rt3_data > 0)
            univ_ib_vol_data_ = univ_ib_vol_data[univ_ib_nn] * _norm_mult
            univ_ib_cl_data_ = univ_ib_cl_data[univ_ib_nn]
            # res6 = QuantReg(univ_ib_cl_data_, univ_ib_vol_data_).fit(q=0.5).params
            res6 = simple_quant_reg(univ_ib_cl_data_, univ_ib_vol_data_)


            if k < 100:
                print(j + '_0' + str(k)+'\t', np_to_str(res0), '\t', np_to_str(res1), '\t', np_to_str(res2), '\t', np_to_str(res3), '\t',
                      np_to_str(res4), '\t', np_to_str(res5), '\t', np_to_str(res6), '\t')
            else:
                print(j + '_' + str(k)+'\t', np_to_str(res0), '\t', np_to_str(res1), '\t', np_to_str(res2), '\t', np_to_str(res3), '\t',
                      np_to_str(res4), '\t', np_to_str(res5), '\t', np_to_str(res6), '\t')
        print('\n')
예제 #21
0
def check_price_forecast():

    univ_ib_dt = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')['Date'].values
    univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')['SP500'].values
    z = np.where(univ_ib_gd.astype('int') == 1)[0]
    univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')['SP500'].values[z]
    univ_ib_vl = cr_vol_all_adj.retrieve(univ_ib_eqidx_ext + 'vol_pb_120')['SP500'].values[z]
    univ_ib_dt = univ_ib_dt[z]

    nlag = 2

    n1 = filt.fst_nan(univ_ib_cl)
    n1 = np.maximum(n1, filt.fst_nan(univ_ib_vl))
    univ_ib_sig = np.empty(0)
    univ_ib_sig_ = np.empty(0)

    for days in range(1, 11):
        for smth in range(5, 33):
            if days < 10:
                if smth < 10:
                    tmp = cr_aret.retrieve(univ_ib_eqidx_ext + 'AdjSerD0' + str(days)+'S0' + str(smth))['SP500'].values[z]
                else:
                    tmp = cr_aret.retrieve(univ_ib_eqidx_ext + 'AdjSerD0' + str(days)+'S' + str(smth))['SP500'].values[z]
            else:
                if smth < 10:
                    tmp = cr_aret.retrieve(univ_ib_eqidx_ext + 'AdjSerD' + str(days)+'S0' + str(smth))['SP500'].values[z]
                else:
                    tmp = cr_aret.retrieve(univ_ib_eqidx_ext + 'AdjSerD' + str(days)+'S' + str(smth))['SP500'].values[z]
            # get the change in the signal
            # tmp = np.sign(filt.chg(tmp))

            # lag the data
            # tmp = filt.lag(tmp, nlag)

            if univ_ib_sig.shape[0] == 0:
                univ_ib_sig = tmp
            else:
                univ_ib_sig = np.vstack((univ_ib_sig, tmp))
            n1 = np.maximum(n1, filt.fst_nan(tmp))

    univ_ib_cl_ = np.sign(filt.chg(univ_ib_cl, nlag))

    univ_ib_cl_ = univ_ib_cl_[n1:]
    univ_ib_vl_ = univ_ib_vl[n1:]
    univ_ib_sig = univ_ib_sig[:, n1:]

    names = []
    for days in range(1, 11):
        for smth in range(5, 33):
            if (days < 10) and (smth < 10):
                names.append('0'+str(days)+'_0'+str(smth))
            elif (days < 10) and (smth >= 10):
                names.append('0'+str(days)+'_'+str(smth))
            elif (days >= 10) and (smth < 10):
                names.append(str(days)+'_0'+str(smth))
            else:
                names.append(str(days)+'_'+str(smth))

    for i in range(0, len(names)):
        print(names[i], ':', smart_kendall(univ_ib_cl_, univ_ib_sig[i, :]))



    plot_ts_new(univ_ib_dt, univ_ib_cl)
    plot_ts_new(univ_ib_dt, univ_ib_sig[167, :])

    t1 = filt.ret(filt.lag(univ_ib_sig[167, :]))
    t1_ = filt.ret(univ_ib_sig[167, :])
    t2 = filt.ret(univ_ib_cl)
    t3 = t2*np.sign(t1)

    t1_, t2, t3 = reduce_nonnan(t1_, t2, t3)
    ct1_ = np.cumprod(1+t1_)*100
    ct2 = np.cumprod(1+t2)*100
    ct3 = np.cumprod(1-t3)*100

    f = pyl.figure(1)
    f.clear()
    pyl.subplot(3, 1, 1)
    pyl.semilogy(ct2[-5000:])
    pyl.subplot(3, 1, 2)
    pyl.semilogy(ct1_[-5000:])
    pyl.subplot(3, 1, 3)
    pyl.semilogy(ct3[-5000:])
예제 #22
0
def get_smart_measures():
    univ_ib_data = cr_cret.retrieve(univ_ib_eqidx_ext + 'GBM')

    univ_ib_cl = univ_ib_data['Close'].values
    univ_ib_vl = univ_ib_data['Volatility'].values
    del univ_ib_data['Close']
    del univ_ib_data['Volatility']

    new_col_names = list(univ_ib_data.columns)
    univ_ib_data = univ_ib_data.values
    num_col = len(new_col_names)

    # change data to appropriate lags
    for i in range(0, num_col):
        univ_ib_data[:, i] = filt.lag(univ_ib_data[:, i], 2)
    univ_ib_vl = filt.lag(univ_ib_vl, 2)
    univ_ib_cl1 = univ_ib_cl.copy()
    univ_ib_cl2 = (univ_ib_cl+filt.lag(univ_ib_cl))/2

    # divide by the volatility
    univ_ib_cl1 = univ_ib_cl1/univ_ib_vl
    univ_ib_cl2 = univ_ib_cl2/univ_ib_vl
    for i in range(0, num_col):
        univ_ib_data[:, i] = univ_ib_data[:, i]/univ_ib_vl

    # standardize them
    for i in range(0, num_col):
        univ_ib_data[:, i] = univ_ib_data[:, i]/med_abs_dev(univ_ib_data[5220:, i])
    univ_ib_cl1 = univ_ib_cl1/med_abs_dev(univ_ib_cl1[5220:])
    univ_ib_cl2 = univ_ib_cl2/med_abs_dev(univ_ib_cl2[5220:])

    # get the correlations with univ_ib_cl
    univ_ib_correl1 = np.zeros(num_col)
    univ_ib_correl2 = np.zeros(num_col)
    for i in range(0, num_col):
        univ_ib_correl1[i] = smart_kendall(univ_ib_data[5220:, i], univ_ib_cl1[5220:])
        univ_ib_correl2[i] = smart_kendall(univ_ib_data[5220:, i], univ_ib_cl2[5220:])

    univ_ib_correl1a = np.array([(j if 'M_' not in new_col_names[i] else np.nan) for i, j in enumerate(univ_ib_correl1)])
    univ_ib_correl2a = np.array([(j if 'M_' not in new_col_names[i] else np.nan) for i, j in enumerate(univ_ib_correl2)])

    univ_ib_correl1b = np.array([(j if (('M_' not in new_col_names[i]) and ('W_' not in new_col_names[i])) else np.nan)
                                for i, j in enumerate(univ_ib_correl1)])
    univ_ib_correl2b = np.array([(j if (('M_' not in new_col_names[i]) and ('W_' not in new_col_names[i])) else np.nan)
                                for i, j in enumerate(univ_ib_correl2)])

    for k in range(0, 3):
        if k == 0:
            univ_ib_correl1_ = univ_ib_correl1
            univ_ib_correl2_ = univ_ib_correl2
        elif k == 1:
            univ_ib_correl1_ = univ_ib_correl1a
            univ_ib_correl2_ = univ_ib_correl2a
        else:
            univ_ib_correl1_ = univ_ib_correl1b
            univ_ib_correl2_ = univ_ib_correl2b

        _correl = univ_ib_correl1_ + univ_ib_correl2_

        _mom_sig = []
        _mom_sig_idx = []
        _mom_val = []
        for i in range(0, 20):
            if i == 0:
                _idx = argmax_fixed(_correl)
                _mom_sig.append(new_col_names[_idx])
                _mom_sig_idx.append(_idx)
                _mom_val.append(_correl[_idx])
            else:
                # calculate correlation with the already selected series
                _scorrel = np.ones(7200, dtype=bool)
                for j1 in _mom_sig_idx:
                    _scorrel[j1] = False
                for j1 in range(0, 7200):
                    if j1 not in _mom_sig_idx:
                        for j2 in _mom_sig_idx:
                            if _scorrel[j1] and (smart_kendall(univ_ib_data[5220:, j1], univ_ib_data[5220:, j2]) > 0.6):
                                _scorrel[j1] = False
                        # add constraint to handle the case where _scorrel is True, but the correlation with
                _scorrel_idx = np.where(_scorrel)[0]
                if _scorrel_idx.shape[0] > 0:
                    _correl_red = _correl[_scorrel_idx]
                    _idx = _scorrel_idx[argmax_fixed(_correl_red)]
                    if _correl[_idx] > 0.015:
                        _mom_sig.append(new_col_names[_idx])
                        _mom_sig_idx.append(_idx)
                        _mom_val.append(_correl[_idx])
                    else:
                        break
                else:
                    break
            print(i)
            print(_mom_sig)
            print(_mom_val)

        _rev_sig = []
        _rev_sig_idx = []
        _rev_val = []
        for i in range(0, 40):
            # i = 0
            if i == 0:
                _idx = argmin_fixed(_correl)
                _rev_sig.append(new_col_names[_idx])
                _rev_sig_idx.append(_idx)
                _rev_val.append(_correl[_idx])
            else:
                # calculate correlation with the already selected series
                _scorrel = np.ones(7200, dtype=bool)
                for j1 in _rev_sig_idx:
                    _scorrel[j1] = False
                for j1 in range(0, 7200):
                    if j1 not in _rev_sig_idx:
                        for j2 in _rev_sig_idx:
                            if _scorrel[j1] and (smart_kendall(univ_ib_data[5220:, j1], univ_ib_data[5220:, j2]) > 0.6):
                                _scorrel[j1] = False
                        # add constraint to handle the case where _scorrel is True, but the correlation with
                _scorrel_idx = np.where(_scorrel)[0]
                if _scorrel_idx.shape[0] > 0:
                    _correl_red = _correl[_scorrel_idx]
                    _idx = _scorrel_idx[argmin_fixed(_correl_red)]
                    if _correl[_idx] < -0.015:
                        _rev_sig.append(new_col_names[_idx])
                        _rev_sig_idx.append(_idx)
                        _rev_val.append(_correl[_idx])
                    else:
                        break
                else:
                    break
            print(i)
            print(_rev_sig)
            print(_rev_val)