Example #1
0
def generate_final_tl():
    clus_wgt = np.array([0.15, 0.075, 0.21, 0.075, 0.15, 0.06, 0.14, 0.14])
    # calculate the latest
    last_dt = d_bt_fw[0].listdir()
    last_dt = last_dt[-5:]
    for i in last_dt:
        # i = last_dt[1]
        tic = None
        wgt = None
        shr = None
        for k in range(0, 8):
            # k = 0
            last_dt_pos = d_bt_fw[k].load(i)
            tic_ = last_dt_pos["arr_0"]
            wgt_ = last_dt_pos["arr_1"]
            wgt_ = wgt_ * clus_wgt[k]
            cl_ = d_comb[k].retrieve("Close")
            cl_ = cl_[["Date"] + list(tic_)]
            cl_idx = [ii for ii, jj in enumerate(crup.pd_dt_to_str(cl_["Date", list])) if jj == i][0]
            cl_ = cl_[cl_idx, list(tic_)].values.astype("float64")
            sh_ = wgt_ * cap * 1000 / cl_
            if k == 0:
                tic = tic_
                wgt = wgt_
                shr = sh_
            else:
                tic = np.concatenate((tic, tic_))
                wgt = np.concatenate((wgt, wgt_))
                shr = np.concatenate((shr, sh_))
        wgt_chk = np.sum(np.abs(wgt))
        print(wgt_chk)
        if wgt_chk > 8:
            wgt = wgt * 8 / wgt_chk
            shr = shr * 8 / wgt_chk
        if wgt_chk < 1:
            wgt = wgt * 1 / wgt_chk
            shr = shr * 1 / wgt_chk
        shr = np.round(shr).astype("int64")
        # remove zero shr values
        shr_not_zero = np.where(shr != 0)[0]
        if shr_not_zero.shape[0] > 0:
            tic = tic[shr_not_zero]
            wgt = wgt[shr_not_zero]
            shr = shr[shr_not_zero]
        tl = cruf.DataFrame({"Tick": tic, "Shares": shr, "Weight": wgt})
        tl.sort("Weight")
        del tl["Weight"]
        tl = tl[["Tick", "Shares"]]
        if i > last_dt[0]:
            tl_comb = cruf.DataFrame.merge(tl, tl_old, on="Tick", how="outer")
            tl_comb["Shares_x"] = crtf.fill1(tl_comb["Shares_x"].values, 0)
            tl_comb["Shares_y"] = crtf.fill1(tl_comb["Shares_y"].values, 0)
            tl_comb["Trade"] = tl_comb["Shares_x"] - tl_comb["Shares_y"]
            tl_comb = tl_comb[["Tick", "Trade"]]
            tl_comb.to_csv(crsf.cr_tl.name + "\\Tradelist_" + i + ".csv", index_label=None, index=False)
        if i < last_dt[-1]:
            tl_old = tl
        tl.to_csv(crsf.cr_tl.name + "\\Portfolio_" + i + ".csv", index_label=None, index=False)
Example #2
0
def conv_stk_to_ami_format(tick):
    x = crsf.cr_pr_cl.retrieve(tick)
    del x['Turnover']
    xy = crup.pd_dt_to_str(x['Date', list])
    xy = [i[6:]+'-'+i[4:6]+'-'+i[0:4] for i in xy]
    del x['Date']
    x['Date'] = xy
    x = x[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]
    x.to_csv(opj(crff.cr_info.name, tick+'.csv'), sep=',', index_label=False,
             index=False)
Example #3
0
def _refresh_correl(x):
    _d_comb, _d_cor_dy, _d_cor_st = x
    comb_prices = _d_comb.retrieve('Close')
    tc = comb_prices.tick_cols()
    ndt = comb_prices.shape[0]
    ntc = len(tc)
    _prc = comb_prices[tc].values.astype('float64')
    window1 = 240
    window2 = 120
    ntc1 = ntc*(ntc-1)/2
    _cor1 = np.empty([ndt, ntc1], dtype='float32')*np.nan
    _cor2 = np.empty([ndt, ntc1], dtype='float32')*np.nan
    # calculate the correlations
    cnt = 0
    for i in range(0, ntc-1):
        for j in range(i+1, ntc):
            # _cor1[:, cnt] = _spear(_prc[:, i], _prc[:, j], window1)
            # _cor2[:, cnt] = _spear(_prc[:, i], _prc[:, j], window2)
            _cor1[:, cnt] = crtf.cor_cc(_prc[:, i], _prc[:, j], window1, zl=False)
            _cor2[:, cnt] = crtf.cor_cc(_prc[:, i], _prc[:, j], window2, zl=False)
            cnt += 1
            # if cnt % 100 == 0:
            #     print(cnt)
    # remove correlations before date st_dt
    cp_dt = crup.pd_dt_to_str(comb_prices['Date', list])
    cp_dt_idx = [i for i, j in enumerate(cp_dt) if j >= st_dt]
    for i in cp_dt_idx:
        # i = cp_dt_idx[0]
        _cor_mat1 = np.empty([ntc, ntc])*np.nan
        _cor_mat2 = np.empty([ntc, ntc])*np.nan
        cnt = 0
        for j in range(0, ntc):
            for k in range(j, ntc):
                if j == k:
                    _cor_mat1[j, j] = 1
                    _cor_mat2[j, j] = 1
                else:
                    _cor_mat1[j, k] = _cor1[i, cnt]
                    _cor_mat1[k, j] = _cor_mat1[j, k]
                    _cor_mat2[j, k] = _cor2[i, cnt]
                    _cor_mat2[k, j] = _cor_mat2[j, k]
                    cnt += 1
        _d_cor_st.save(cp_dt[i], _cor_mat1)
        _d_cor_dy.save(cp_dt[i], _cor_mat2)
    return None
Example #4
0
def _refresh_base_data(x):
    _d_bt_bs, _d_bt_ft, _d_comb, _d_cor_st, _d_cor_dy = x

    # read the characteristics
    fvol = _reduce_data(_d_comb.retrieve('Vol_CC_240D'))
    fadv = _reduce_data(_d_comb.retrieve('Adv'))
    fun = _reduce_data(_d_comb.retrieve('Univ'))
    frt = _reduce_data(_d_comb.retrieve('Returns'))

    # read the betas
    fpc0_dy = _reduce_data(_d_comb.retrieve('PCA0_Dy'))
    fpc0_st = _reduce_data(_d_comb.retrieve('PCA0_St'))
    fpc1_dy = _reduce_data(_d_comb.retrieve('PCA1_Dy'))
    fpc1_st = _reduce_data(_d_comb.retrieve('PCA1_St'))
    fpc2_dy = _reduce_data(_d_comb.retrieve('PCA2_Dy'))
    fpc2_st = _reduce_data(_d_comb.retrieve('PCA2_St'))
    fpc3_dy = _reduce_data(_d_comb.retrieve('PCA3_Dy'))
    fpc3_st = _reduce_data(_d_comb.retrieve('PCA3_St'))
    fpc4_dy = _reduce_data(_d_comb.retrieve('PCA4_Dy'))
    fpc4_st = _reduce_data(_d_comb.retrieve('PCA4_St'))
    fpc5_dy = _reduce_data(_d_comb.retrieve('PCA5_Dy'))
    fpc5_st = _reduce_data(_d_comb.retrieve('PCA5_St'))

    fsp03 = _reduce_data(_d_comb.retrieve('SPY_Beta_3m'))
    fsp06 = _reduce_data(_d_comb.retrieve('SPY_Beta_6m'))
    fsp09 = _reduce_data(_d_comb.retrieve('SPY_Beta_9m'))
    fsp12 = _reduce_data(_d_comb.retrieve('SPY_Beta_12m'))

    fmd03 = _reduce_data(_d_comb.retrieve('MDY_Beta_3m'))
    fmd06 = _reduce_data(_d_comb.retrieve('MDY_Beta_6m'))
    fmd09 = _reduce_data(_d_comb.retrieve('MDY_Beta_9m'))
    fmd12 = _reduce_data(_d_comb.retrieve('MDY_Beta_12m'))

    fmom03 = _reduce_data(_d_comb.retrieve('MOM_Beta_3m'))
    fmom06 = _reduce_data(_d_comb.retrieve('MOM_Beta_6m'))
    fmom09 = _reduce_data(_d_comb.retrieve('MOM_Beta_9m'))
    fmom12 = _reduce_data(_d_comb.retrieve('MOM_Beta_12m'))

    flvl03 = _reduce_data(_d_comb.retrieve('LVOL_Beta_3m'))
    flvl06 = _reduce_data(_d_comb.retrieve('LVOL_Beta_6m'))
    flvl09 = _reduce_data(_d_comb.retrieve('LVOL_Beta_9m'))
    flvl12 = _reduce_data(_d_comb.retrieve('LVOL_Beta_12m'))

    fsig = _reduce_data(_d_comb.retrieve('CompSig1'))

    n = fvol.shape[0]
    tc = fvol.tick_cols()

    for i in range(0, n):
        # i = 0
        _dt = crup.pd_dt_to_str([fvol[i, 'Date', list]])[0]
        funi = [k for k, j in enumerate(fun[i, tc].values.astype('bool')) if j]
        tci = [j for k, j in enumerate(tc) if k in funi]

        xi = np.vstack((fvol[i, tci].values.astype('float32'),  # 0
                        fadv[i, tci].values.astype('float32'),  # 1
                        fpc0_dy[i, tci].values.astype('float32'),  # 2
                        fpc0_st[i, tci].values.astype('float32'),  # 3
                        fpc1_dy[i, tci].values.astype('float32'),  # 4
                        fpc1_st[i, tci].values.astype('float32'),  # 5
                        fpc2_dy[i, tci].values.astype('float32'),  # 6
                        fpc2_st[i, tci].values.astype('float32'),  # 7
                        fpc3_dy[i, tci].values.astype('float32'),  # 8
                        fpc3_st[i, tci].values.astype('float32'),  # 9
                        fpc4_dy[i, tci].values.astype('float32'),  # 10
                        fpc4_st[i, tci].values.astype('float32'),  # 11
                        fpc5_dy[i, tci].values.astype('float32'),  # 12
                        fpc5_st[i, tci].values.astype('float32'),  # 13
                        fsp03[i, tci].values.astype('float32'),  # 14
                        fsp06[i, tci].values.astype('float32'),  # 15
                        fsp09[i, tci].values.astype('float32'),  # 16
                        fsp12[i, tci].values.astype('float32'),  # 17
                        fmd03[i, tci].values.astype('float32'),  # 18
                        fmd06[i, tci].values.astype('float32'),  # 19
                        fmd09[i, tci].values.astype('float32'),  # 20
                        fmd12[i, tci].values.astype('float32'),  # 21
                        fmom03[i, tci].values.astype('float32'),  # 22
                        fmom06[i, tci].values.astype('float32'),  # 23
                        fmom09[i, tci].values.astype('float32'),  # 24
                        fmom12[i, tci].values.astype('float32'),  # 25
                        flvl03[i, tci].values.astype('float32'),  # 26
                        flvl06[i, tci].values.astype('float32'),  # 27
                        flvl09[i, tci].values.astype('float32'),  # 28
                        flvl12[i, tci].values.astype('float32'),  # 29
                        fsig[i, tci].values.astype('float32')
                        ))

        if i < n-2:
            yi = frt[i+2, tci].values.astype('float64')

        ci1 = _d_cor_st.load(_dt)
        ci1 = ci1[:, funi]
        ci1 = ci1[funi, :]

        ci2 = _d_cor_dy.load(_dt)
        ci2 = ci2[:, funi]
        ci2 = ci2[funi, :]

        # remove stocks with missing data
        v_idx = np.sum(~np.isnan(xi), axis=0)/xi.shape[0]
        v_idx = [k for k, j in enumerate(v_idx) if j > 0.999999]
        xi = xi[:, v_idx]
        ci1 = ci1[:, v_idx]
        ci1 = ci1[v_idx, :]
        ci2 = ci2[:, v_idx]
        ci2 = ci2[v_idx, :]
        tci = [j for i, j in enumerate(tci) if i in v_idx]
        if i < n-2:
            yi = yi[v_idx]

        # remove stocks with no correlation values
        n_ = ci1.shape[0]
        c_idx = []
        for j in range(0, n_):
            cor1_r = np.all(np.isnan(np.array([l for k, l in enumerate(ci1[j, :]) if k != j])))
            cor1_c = np.all(np.isnan(np.array([l for k, l in enumerate(ci1[:, j]) if k != j])))
            cor2_r = np.all(np.isnan(np.array([l for k, l in enumerate(ci2[j, :]) if k != j])))
            cor2_c = np.all(np.isnan(np.array([l for k, l in enumerate(ci2[:, j]) if k != j])))
            if ~cor1_r or ~cor1_c or ~cor2_c or ~cor2_r:
                c_idx.append(j)

        if len(c_idx) != n_:
            xi = xi[:, c_idx]
            ci1 = ci1[:, c_idx]
            ci1 = ci1[c_idx, :]
            ci2 = ci2[:, c_idx]
            ci2 = ci2[c_idx, :]
            tci = [j for i, j in enumerate(tci) if i in c_idx]
            if i < n-2:
                yi = yi[c_idx]
            # n_ = len(c_idx)

        ei = np.ones(n_, dtype='float64')
        bi1 = 1/np.dot(np.abs(ci1), ei)
        bi1 /= np.sum(bi1)

        bi2 = 1/np.dot(np.abs(ci2), ei)
        bi2 /= np.sum(bi2)

        sci1 = np.linalg.inv(crts.shrink_correl_fast(bi1)).astype('float32')
        sci2 = np.linalg.inv(crts.shrink_correl_fast(bi2)).astype('float32')

        xi = np.vstack((xi, bi1.astype('float32'), bi2.astype('float32')))
        _d_bt_bs.save(_dt, xi, ci1, ci2, sci1, sci2, np.array(tci))
        if i < n-2:
            _d_bt_ft.save(_dt, yi)
    return None
Example #5
0
def _reduce_data(x):
    # x = fvol.copy()
    xdt = crup.pd_dt_to_str(x['Date', list])
    xidx = [i for i, j in enumerate(xdt) if j >= _dt_st]
    x = x[xidx, :]
    return x
Example #6
0
def _collect_pca_factors(x):
    _d_pcl_dy, _d_pcl_st, _d_comb = x
    f_files = _d_pcl_dy.listdir()
    f_files = sorted(f_files)

    # convert returns to volatility adjusted returns
    comb_vol = _d_comb.retrieve('VOL_CC_240D')
    fst_dt = f_files[0]
    all_dt = crup.pd_dt_to_str(comb_vol['Date', list])
    ret_idx = [i for i, j in enumerate(all_dt) if j >= fst_dt]
    comb_vol = comb_vol[ret_idx, :]
    tc = comb_vol.tick_cols()

    pca0_score_dy = comb_vol.copy()
    pca1_score_dy = comb_vol.copy()
    pca2_score_dy = comb_vol.copy()
    pca3_score_dy = comb_vol.copy()
    pca4_score_dy = comb_vol.copy()
    pca5_score_dy = comb_vol.copy()

    pca0_score_st = comb_vol.copy()
    pca1_score_st = comb_vol.copy()
    pca2_score_st = comb_vol.copy()
    pca3_score_st = comb_vol.copy()
    pca4_score_st = comb_vol.copy()
    pca5_score_st = comb_vol.copy()

    for i, j in enumerate(f_files):
        # i = 0
        # j = f_files[i]
        vol_val = comb_vol[i, tc].values.astype('float64')

        _pca_s = _d_pcl_dy.load(j)
        pca0_score_dy[i, tc] = _pca_s[0, :]*vol_val
        pca1_score_dy[i, tc] = _pca_s[1, :]*vol_val
        pca2_score_dy[i, tc] = _pca_s[2, :]*vol_val
        pca3_score_dy[i, tc] = _pca_s[3, :]*vol_val
        pca4_score_dy[i, tc] = _pca_s[4, :]*vol_val
        pca5_score_dy[i, tc] = _pca_s[5, :]*vol_val

        _pca_s = _d_pcl_st.load(j)
        pca0_score_st[i, tc] = _pca_s[0, :]*vol_val
        pca1_score_st[i, tc] = _pca_s[1, :]*vol_val
        pca2_score_st[i, tc] = _pca_s[2, :]*vol_val
        pca3_score_st[i, tc] = _pca_s[3, :]*vol_val
        pca4_score_st[i, tc] = _pca_s[4, :]*vol_val
        pca5_score_st[i, tc] = _pca_s[5, :]*vol_val

    _d_comb.store('PCA0_Dy', pca0_score_dy)
    _d_comb.store('PCA1_Dy', pca1_score_dy)
    _d_comb.store('PCA2_Dy', pca2_score_dy)
    _d_comb.store('PCA3_Dy', pca3_score_dy)
    _d_comb.store('PCA4_Dy', pca4_score_dy)
    _d_comb.store('PCA5_Dy', pca5_score_dy)

    _d_comb.store('PCA0_St', pca0_score_st)
    _d_comb.store('PCA1_St', pca1_score_st)
    _d_comb.store('PCA2_St', pca2_score_st)
    _d_comb.store('PCA3_St', pca3_score_st)
    _d_comb.store('PCA4_St', pca4_score_st)
    _d_comb.store('PCA5_St', pca5_score_st)