def _refresh_old_pca2(x): _d_comb, _d_ocor, _d_opca = x f_list_c = _d_ocor.listdir() window = 200 comb_ret = _d_comb.retrieve('Returns') pst_dt = crup.pd_dt_to_str(comb_ret['Date', list]) pst_idx = [(i-window) for i, j in enumerate(pst_dt) if j == f_list_c[0]][0] comb_ret = comb_ret[pst_idx:, :] n = comb_ret.shape[0] tick_cols = comb_ret.tick_cols() # f_list_c = f_list_c[0] for i in range(window+1, n+1): # i = window+1 comb_ret_ext = comb_ret[i-window:i-1, tick_cols].values y1 = np.where(np.all(~np.isnan(comb_ret_ext), axis=0).tolist())[0] comb_ret_ext__ = comb_ret_ext[:, y1] pca_wgt, pca_ret = crtp.compute_fast_pca(comb_ret_ext__.T) ticks = [k for j, k in enumerate(tick_cols) if j in y1] f_list_p = crup.pd_dt_to_str([comb_ret[i-1, 'Date', list]])[0] _d_opca.save(f_list_p, ticks, pca_wgt, pca_ret) return None
def _refresh_old_correl(x): # _d_comb, _d_cor, _d_ocor = x # remove the dependency on _d_cor here (make it fixed) _d_comb, _d_ocor = x window = 200 # used to be 200 earlier comb_ret = _d_comb.retrieve('Returns') tick_cols = comb_ret.tick_cols() n = comb_ret.shape[0] # f_list = _d_cor.listdir() pd_fst = crup.pd_dt_to_str(comb_ret['Date', list]) pd_fst = [i for i, j in enumerate(pd_fst) if j == cor_st_dt][0] pd_fst -= window comb_ret = comb_ret[pd_fst:, :] pd_fst = crup.pd_dt_to_str(comb_ret['Date', list]) for i, j in enumerate(pd_fst): if i < window: pass else: # i = window # j = pd_fst[i] comb_ret_mini = comb_ret[i-window+1:i, :] comb_ret_mini = comb_ret_mini[:, tick_cols].values nn = comb_ret_mini.shape[0] nm = comb_ret_mini.shape[1] comb_ret_idx = np.where(np.sum(~np.isnan(comb_ret_mini), axis=0) == nn)[0] comb_ret_mini_ = comb_ret_mini[:, comb_ret_idx] y = np.corrcoef(comb_ret_mini_.T) cor_ = np.zeros([nm, nm])*np.nan for ii, jj in enumerate(comb_ret_idx): cor_[jj, comb_ret_idx] = y[ii, :] np.fill_diagonal(cor_, 1) _d_ocor.save(j, cor_) return None
def _refresh_old_correl(x): _d_comb, _d_ocor = x window = 200 # used to be 200 earlier comb_ret = _d_comb.retrieve('Returns') tick_cols = comb_ret.tick_cols() n = comb_ret.shape[0] # f_list = _d_cor.listdir() pd_fst = crup.pd_dt_to_str(comb_ret['Date', list]) pd_fst = [i for i, j in enumerate(pd_fst) if j == cor_st_dt][0] pd_fst -= window comb_ret = comb_ret[pd_fst:, :] pd_fst = crup.pd_dt_to_str(comb_ret['Date', list]) # pca_ret = _dummy_df(comb_ret) # pca_ret['PCA'] = np.nan for i, j in enumerate(pd_fst): if i < window: pass else: # i = window # j = pd_fst[i] comb_ret_mini = comb_ret[i-window+1:i, :] comb_ret_mini = comb_ret_mini[:, tick_cols].values nn = comb_ret_mini.shape[0] nm = comb_ret_mini.shape[1] comb_ret_idx = np.where(np.sum(~np.isnan(comb_ret_mini), axis=0) == nn)[0] comb_ret_mini_ = comb_ret_mini[:, comb_ret_idx] # comb_ret_sd_ = np.std(comb_ret_mini_, axis=0) # comb_ret_mn = (comb_ret_mini_[-1,:] - np.mean(comb_ret_mini_, axis=0))/comb_ret_sd_ # new y = np.corrcoef(comb_ret_mini_.T) cor_ = np.zeros([nm, nm])*np.nan for ii, jj in enumerate(comb_ret_idx): cor_[jj, comb_ret_idx] = y[ii, :] np.fill_diagonal(cor_, 1) # u, s, v = np.linalg.svd(y) # v0 = -v[0, :]/np.sum(np.abs(v[0,:])) # pca_ret[i, 'PCA'] = np.dot(v0, comb_ret_mn) _d_ocor.save(j, cor_) # unremove this # pca_ret['PCA'] = np.cumprod(1+pca_ret['PCA']/100) # # pca_ret_ = _dummy_df(_d_comb.retrieve('Returns')) # pca_ret = cruf.DataFrame.merge(pca_ret_, pca_ret, on='Date') # _d_comb.store('PCA0', pca_ret) return None
def _refresh_old_exposure_matrix2(x): _d_opca_cl, _d_opca_wgt, _d_opca_bt = x f_files = _d_opca_cl.listdir() f_files_dt = crup.str_to_pd_dt(f_files) PCA_01 = _d_opca_wgt.retrieve('PCA01_Beta') PCA_02 = _d_opca_wgt.retrieve('PCA02_Beta') PCA_03 = _d_opca_wgt.retrieve('PCA03_Beta') PCA_04 = _d_opca_wgt.retrieve('PCA04_Beta') PCA_05 = _d_opca_wgt.retrieve('PCA05_Beta') PCA_06 = _d_opca_wgt.retrieve('PCA06_Beta') PCA_07 = _d_opca_wgt.retrieve('PCA07_Beta') PCA_08 = _d_opca_wgt.retrieve('PCA08_Beta') PCA_09 = _d_opca_wgt.retrieve('PCA09_Beta') PCA_10 = _d_opca_wgt.retrieve('PCA10_Beta') PCA_11 = _d_opca_wgt.retrieve('PCA11_Beta') PCA_12 = _d_opca_wgt.retrieve('PCA12_Beta') PCA_13 = _d_opca_wgt.retrieve('PCA13_Beta') PCA_14 = _d_opca_wgt.retrieve('PCA14_Beta') PCA_15 = _d_opca_wgt.retrieve('PCA15_Beta') PCA_16 = _d_opca_wgt.retrieve('PCA16_Beta') PCA_17 = _d_opca_wgt.retrieve('PCA17_Beta') PCA_18 = _d_opca_wgt.retrieve('PCA18_Beta') PCA_19 = _d_opca_wgt.retrieve('PCA19_Beta') PCA_20 = _d_opca_wgt.retrieve('PCA20_Beta') p_dt = crup.pd_dt_to_str(PCA_01['Date', list]) tick_cols = PCA_01.tick_cols() for i in f_files: # i = f_files[0] i_loc = [k for k, k2 in enumerate(p_dt) if k2 == i] t1 = _d_opca_cl.load(i)['arr_3'] # retain the first 5 t1_ = np.array(t1[0:5], dtype=np.int64) # first PCA cons1 = t1_[0]+1 if cons1 <= 9: pca_test = locals()['PCA_0'+str(cons1)][i_loc, :] else: pca_test = locals()['PCA_'+str(cons1)][i_loc, :] if any(~np.isnan(pca_test[0, tick_cols].values.astype(np.float64))): beta = pca_test for j in range(0, 4): cons1 = t1_[j+1]+1 if cons1 <= 9: pca_test = locals()['PCA_0'+str(cons1)][i_loc, :] else: pca_test = locals()['PCA_'+str(cons1)][i_loc, :] beta.row_bind(pca_test) del beta['Date'] else: beta = None if beta is not None: _d_opca_bt.store(i, beta) return None
def _refresh_lev_stats_new(x): _d_comb, _d_sig, _d_bt_pp, _d_cor, _d_flag = x _d_cor_lst = _d_cor.listdir() flev = _d_comb.retrieve('Lev_CC_' + _d_flag + 'D') _d_lev_lst = crup.pd_dt_to_str(flev['Date', list]) flev_idx = [i for i, j in enumerate(_d_lev_lst) if (j in _d_cor_lst) and (j >= _dt_st)] # change to PCA Beta flev = flev[flev_idx, :] _d_bt_pp.store('Lev', flev) flev_mult = flev[flev.tick_cols()].values.astype('float64') fun = _d_comb.retrieve('Univ') fun = fun[flev_idx, :] fun[:, fun.tick_cols()] = fun[fun.tick_cols()].values.astype(int) * (flev_mult < 5).astype(int) _d_bt_pp.store('Univ', fun) fliq = _d_comb.retrieve('Adv') fliq = fliq[flev_idx, :] fliq[:, fliq.tick_cols()] = fliq[fliq.tick_cols()].values.astype('float64')/flev_mult _d_bt_pp.store('Adv', fliq) for ii in _d_comb_data: fi = _d_comb.retrieve(ii) fi = fi[flev_idx, :] fi[:, fi.tick_cols()] = fi[fi.tick_cols()].values.astype('float64')*flev_mult _d_bt_pp.store(ii, fi) for ii in _d_sig_data1: fi = _d_sig.retrieve(ii) fi = fi[flev_idx, :] fi[:, fi.tick_cols()] = fi[fi.tick_cols()].values.astype('float64')*flev_mult _d_bt_pp.store(ii, fi) for ii in _d_sig_data2: fi = _d_sig.retrieve(ii) fi = fi[flev_idx, :] fi[:, fi.tick_cols()] = fi[fi.tick_cols()].values.astype('float64')*flev_mult _d_bt_pp.store(ii, fi) # Returns uses lagged leverage (!) fret = _d_comb.retrieve('Returns') fret = fret[flev_idx, :] for ii in fret.tick_cols(): fret[ii] = fret[ii].values * crtf.lag(flev[ii].values, 2) _d_bt_pp.store('Returns', fret) return None
def _refresh_correl(x): _d_comb, _d_cor = x window1 = 200 # window1 = 250 # used to be 200 earlier # window2 = 167 lmb1 = 2/(window1+1) # lmb2 = 2/(window2+1) comb_ret = _d_comb.retrieve('Returns') comb_vol = _d_comb.retrieve('Vol_CC_240D') # change it to one version for 250, one for 167 tick_cols = comb_ret.tick_cols() comb_zs = comb_vol.copy() for i in tick_cols: comb_zs[i] = comb_ret[i].values/comb_vol[i].values comb_zs_val = np.sum(~np.isnan(comb_zs[tick_cols].values.astype('float64')), axis=1) comb_zs_val = np.where(comb_zs_val >= 10)[0] # use a start date comb_zs = comb_zs[comb_zs_val, :] n = comb_zs.shape[0] f_list_c = crup.pd_dt_to_str(comb_zs['Date', list]) comb_zsm = comb_zs.copy() for i in tick_cols: comb_zsm[i] = comb_zsm[i]*np.exp(-(comb_zsm[i].values**2)/18+0.5)/3 for i in range(0, n): # n all_u = comb_zsm[i, tick_cols].values.astype('float64') all_uy = np.outer(all_u, all_u) all_ux = np.zeros(all_uy.shape) nn = all_u.shape[0] for jj in range(0, nn): for kk in range(jj, nn): all_ux[jj, kk] = all_u[jj]**2 + all_u[kk]**2 if jj != kk: all_ux[kk, jj] = all_ux[jj, kk] if i == 0: yf1 = np.sign(all_uy) yr1 = 2/(1+np.exp(-yf1))-1 yct1 = (~np.isnan(yr1)).astype('uint16') # yf2 = yf1.copy() # yr2 = yr1.copy() else: new_idx = list(set(np.where(~np.isnan(all_u))[0]).difference(set(_cor_idx(yf1)))) yf1 += lmb1*(2/(1-yr1**2))*(all_uy-yr1-yr1*(all_ux-2)/(1+yr1**2)) if len(new_idx) > 0: yf1[new_idx, :] = np.sign(all_uy[new_idx, :]) yf1[:, new_idx] = np.sign(all_uy[:, new_idx]) yr1 = 2/(1+np.exp(-yf1))-1 yct1 += (~np.isnan(yr1)).astype('uint16') # new_idx = list(set(np.where(~np.isnan(all_u))[0]).difference(set(_cor_idx(yf2)))) # yf2 += lmb2*(2/(1-yr2**2))*(all_uy-yr2-yr2*(all_ux-2)/(1+yr2**2)) # if len(new_idx) > 0: # yf2[new_idx, :] = np.sign(all_uy[new_idx, :]) # yf2[:, new_idx] = np.sign(all_uy[:, new_idx]) # yr2 = 2/(1+np.exp(-yf2))-1 # yf_ = 3*yf2-2*yf1 # yr_ = 2/(1+np.exp(-yf_))-1 yr = np.zeros([nn, nn])*np.nan yr[yct1 >= window1] = yr1[yct1 >= window1] # yr[yct1 >= window1] = yr_[yct1 >= window1] if np.any(~np.isnan(yr)) and (f_list_c[i] >= cor_st_dt): yr[abs(yr) > 0.99] = 0.99*np.sign(yr[abs(yr) > 0.99]) np.fill_diagonal(yr, 1) _d_cor.save(f_list_c[i], yr.astype('float32'))
def _generate_tradelist2(x, prm=4, eiflag=False): _d_bt_sig, _d_tl_id, _d_comb = x ret = _d_comb.retrieve('Returns') lev = _d_comb.retrieve('Lev_CC_120D') vol = _d_comb.retrieve('Vol_CC_120D') eq03 = _d_comb.retrieve('EQIDX_Beta_3m') eq06 = _d_comb.retrieve('EQIDX_Beta_6m') eq09 = _d_comb.retrieve('EQIDX_Beta_9m') eq12 = _d_comb.retrieve('EQIDX_Beta_12m') sp03 = _d_comb.retrieve('SP_Beta_3m') sp06 = _d_comb.retrieve('SP_Beta_6m') sp09 = _d_comb.retrieve('SP_Beta_9m') sp12 = _d_comb.retrieve('SP_Beta_12m') mom03 = _d_comb.retrieve('MOM_Beta_3m') mom06 = _d_comb.retrieve('MOM_Beta_6m') mom09 = _d_comb.retrieve('MOM_Beta_9m') mom12 = _d_comb.retrieve('MOM_Beta_12m') f_list = _d_bt_sig.listdir() p_dt = crup.pd_dt_to_str(lev['Date', list]) p_dt_idx = [i for i, j in enumerate(p_dt) if j in f_list] # ret = ret[p_dt_idx, :] lev = lev[p_dt_idx, :] vol = vol[p_dt_idx, :] eq03 = eq03[p_dt_idx, :] eq06 = eq06[p_dt_idx, :] eq09 = eq09[p_dt_idx, :] eq12 = eq12[p_dt_idx, :] sp03 = sp03[p_dt_idx, :] sp06 = sp06[p_dt_idx, :] sp09 = sp09[p_dt_idx, :] sp12 = sp12[p_dt_idx, :] mom03 = mom03[p_dt_idx, :] mom06 = mom06[p_dt_idx, :] mom09 = mom09[p_dt_idx, :] mom12 = mom12[p_dt_idx, :] tc = lev.tick_cols() window = 200 n = len(f_list) ret_coll = np.zeros(n-2)*np.nan for i, j in enumerate(f_list): # i = 0 # j = f_list[0] # read the return estimates ret_est = _d_bt_sig.load(j) reti = ret_est['arr_1'] tci = list(ret_est['arr_0']) tci_idx = [ii for ii, jj in enumerate(tci) if jj in tc] levi = lev[i, tci].values.astype('float64') reti_un = (reti/levi)/1000 voli = vol[i, tci].values.astype('float64') eq03i = eq03[i, tci].values.astype('float64') eq06i = eq06[i, tci].values.astype('float64') eq09i = eq09[i, tci].values.astype('float64') eq12i = eq12[i, tci].values.astype('float64') sp03i = sp03[i, tci].values.astype('float64') sp06i = sp06[i, tci].values.astype('float64') sp09i = sp09[i, tci].values.astype('float64') sp12i = sp12[i, tci].values.astype('float64') mom03i = mom03[i, tci].values.astype('float64') mom06i = mom06[i, tci].values.astype('float64') mom09i = mom09[i, tci].values.astype('float64') mom12i = mom12[i, tci].values.astype('float64') ret_mini = ret[p_dt_idx[i]-window+1:p_dt_idx[i], tci].values.astype('float64') ret_mini[np.isnan(ret_mini)] = 0 # hygiene nn = len(tci) ei = np.ones(nn) cori = (np.corrcoef(ret_mini.T)+prm*np.identity(nn))/(1+prm) dvoli = 16*np.diag(voli) covi = np.linalg.inv(np.dot(dvoli, np.dot(cori, dvoli))) if not eiflag: mat_a = np.vstack((eq03i, eq06i, eq09i, eq12i, # ei sp03i, sp06i, sp09i, sp12i, mom03i, mom06i, mom09i, mom12i)) else: mat_a = np.vstack((ei, eq03i, eq06i, eq09i, eq12i, # ei sp03i, sp06i, sp09i, sp12i, mom03i, mom06i, mom09i, mom12i)) wgt_a = 1.5*np.dot(crts.neut_corr_mat(covi, mat_a), reti_un) wgt_a_sm = np.maximum(np.sum(wgt_a[wgt_a > 0]), -np.sum(wgt_a[wgt_a < 0])) # wgt_a_sm = np.sum(np.abs(wgt_a)) if wgt_a_sm > 4: wgt_a = wgt_a*4/wgt_a_sm if wgt_a_sm < 0.5: wgt_a = wgt_a*0.5/wgt_a_sm _d_tl_id.save(j, tci, wgt_a) if i < n-2: reti = ret[p_dt_idx[i+2], tci].values.astype('float64') ret_coll[i] = np.sum(wgt_a*reti) # print(j, np.array([np.sum(np.abs(wgt_a)), np.max(wgt_a), np.min(wgt_a), ret_coll[i]])) else: pass # print(j, np.array([np.sum(np.abs(wgt_a)), np.max(wgt_a), np.min(wgt_a)])) # print(16*np.mean(ret_coll)/np.std(ret_coll)) # ret_coll.shape f_list = f_list[:-2] f_list_ = crup.str_to_pd_dt(f_list) ret_coll = cruf.DataFrame({'Date': f_list_, 'Returns': ret_coll}) _d_comb.store('Signal_Returns', ret_coll) # return ret_coll return None
def _generate_tradelist(x): _d_bt_sig, _d_tl_id, _d_comb = x ret = _d_comb.retrieve('Returns') lev = _d_comb.retrieve('Lev_CC_120D') vol = _d_comb.retrieve('Vol_CC_120D') eq03 = _d_comb.retrieve('EQIDX_Beta_3m') eq06 = _d_comb.retrieve('EQIDX_Beta_6m') eq09 = _d_comb.retrieve('EQIDX_Beta_9m') eq12 = _d_comb.retrieve('EQIDX_Beta_12m') sp03 = _d_comb.retrieve('SP_Beta_3m') sp06 = _d_comb.retrieve('SP_Beta_6m') sp09 = _d_comb.retrieve('SP_Beta_9m') sp12 = _d_comb.retrieve('SP_Beta_12m') mom03 = _d_comb.retrieve('MOM_Beta_3m') mom06 = _d_comb.retrieve('MOM_Beta_6m') mom09 = _d_comb.retrieve('MOM_Beta_9m') mom12 = _d_comb.retrieve('MOM_Beta_12m') f_list = _d_bt_sig.listdir() p_dt = crup.pd_dt_to_str(lev['Date', list]) p_dt_idx = [i for i, j in enumerate(p_dt) if j in f_list] # ret = ret[p_dt_idx, :] lev = lev[p_dt_idx, :] vol = vol[p_dt_idx, :] eq03 = eq03[p_dt_idx, :] eq06 = eq06[p_dt_idx, :] eq09 = eq09[p_dt_idx, :] eq12 = eq12[p_dt_idx, :] sp03 = sp03[p_dt_idx, :] sp06 = sp06[p_dt_idx, :] sp09 = sp09[p_dt_idx, :] sp12 = sp12[p_dt_idx, :] mom03 = mom03[p_dt_idx, :] mom06 = mom06[p_dt_idx, :] mom09 = mom09[p_dt_idx, :] mom12 = mom12[p_dt_idx, :] tc = lev.tick_cols() window = 200 n = len(f_list) ret_coll = np.zeros(n-2)*np.nan for i, j in enumerate(f_list): # i = 0 # j = f_list[0] # read the return estimates ret_est = _d_bt_sig.load(j) reti = ret_est['arr_1'] tci = list(ret_est['arr_0']) tci_idx = [ii for ii, jj in enumerate(tci) if jj in tc] levi = lev[i, tci].values.astype('float64') reti_un = (reti/levi)/1000 voli = vol[i, tci].values.astype('float64') eq03i = eq03[i, tci].values.astype('float64') eq06i = eq06[i, tci].values.astype('float64') eq09i = eq09[i, tci].values.astype('float64') eq12i = eq12[i, tci].values.astype('float64') sp03i = sp03[i, tci].values.astype('float64') sp06i = sp06[i, tci].values.astype('float64') sp09i = sp09[i, tci].values.astype('float64') sp12i = sp12[i, tci].values.astype('float64') mom03i = mom03[i, tci].values.astype('float64') mom06i = mom06[i, tci].values.astype('float64') mom09i = mom09[i, tci].values.astype('float64') mom12i = mom12[i, tci].values.astype('float64') ret_mini = ret[p_dt_idx[i]-window+1:p_dt_idx[i], tci].values.astype('float64') ret_mini[np.isnan(ret_mini)] = 0 # hygiene nn = len(tci) ei = np.ones(nn) cori = (np.corrcoef(ret_mini.T)+2*np.identity(nn))/3 dvoli = 16*np.diag(voli) covi = np.linalg.inv(np.dot(dvoli, np.dot(cori, dvoli))) mat_a = np.vstack((ei, eq03i, eq06i, eq09i, eq12i, sp03i, sp06i, sp09i, sp12i, mom03i, mom06i, mom09i, mom12i)) wgt_a = 1.5*np.dot(crts.neut_corr_mat(covi, mat_a), reti_un) wgt_a_sm = np.sum(np.abs(wgt_a)) if wgt_a_sm > 8: wgt_a = wgt_a*8/wgt_a_sm if wgt_a_sm < 1: wgt_a = wgt_a/wgt_a_sm if i < n-2: reti = ret[p_dt_idx[i+2], tci].values.astype('float64') ret_coll[i] = np.sum(wgt_a*reti) print(j, np.array([np.sum(np.abs(wgt_a)), np.max(wgt_a), np.min(wgt_a), ret_coll[i]])) else: print(j, np.array([np.sum(np.abs(wgt_a)), np.max(wgt_a), np.min(wgt_a)])) print(16*np.mean(ret_coll)/np.std(ret_coll))
def _refresh_base_data_new(x): _d_bt_pp, _d_cor, _d_ocor, _d_bt_bs, _d_bt_fr = x # read the characteristics flev = _d_bt_pp.retrieve('Lev') fadv = _d_bt_pp.retrieve('Adv') fun = _d_bt_pp.retrieve('Univ') frt = _d_bt_pp.retrieve('Returns') # read the betas fsp3 = _d_bt_pp.retrieve('SP_Beta_3m') fsp6 = _d_bt_pp.retrieve('SP_Beta_6m') fsp9 = _d_bt_pp.retrieve('SP_Beta_9m') fsp12 = _d_bt_pp.retrieve('SP_Beta_12m') fmom3 = _d_bt_pp.retrieve('MOM_Beta_3m') fmom6 = _d_bt_pp.retrieve('MOM_Beta_6m') fmom9 = _d_bt_pp.retrieve('MOM_Beta_9m') fmom12 = _d_bt_pp.retrieve('MOM_Beta_12m') feqx3 = _d_bt_pp.retrieve('EQIDX_Beta_3m') feqx6 = _d_bt_pp.retrieve('EQIDX_Beta_6m') feqx9 = _d_bt_pp.retrieve('EQIDX_Beta_9m') feqx12 = _d_bt_pp.retrieve('EQIDX_Beta_12m') flv3 = _d_bt_pp.retrieve('LVOL_Beta_3m') flv6 = _d_bt_pp.retrieve('LVOL_Beta_6m') flv9 = _d_bt_pp.retrieve('LVOL_Beta_9m') flv12 = _d_bt_pp.retrieve('LVOL_Beta_12m') # read the signals flr_10 = _d_bt_pp.retrieve('LrB_10D') flr_20 = _d_bt_pp.retrieve('LrB_20D') flr_30 = _d_bt_pp.retrieve('LrB_30D') flr_40 = _d_bt_pp.retrieve('LrB_40D') flr_50 = _d_bt_pp.retrieve('LrB_50D') flr_60 = _d_bt_pp.retrieve('LrB_60D') flr_70 = _d_bt_pp.retrieve('LrB_70D') flr_80 = _d_bt_pp.retrieve('LrB_80D') flr_90 = _d_bt_pp.retrieve('LrB_90D') flr_100 = _d_bt_pp.retrieve('LrB_100D') flr_110 = _d_bt_pp.retrieve('LrB_110D') flr_120 = _d_bt_pp.retrieve('LrB_120D') flr_130 = _d_bt_pp.retrieve('LrB_130D') flr_140 = _d_bt_pp.retrieve('LrB_140D') flr_150 = _d_bt_pp.retrieve('LrB_150D') flr_160 = _d_bt_pp.retrieve('LrB_160D') flr_170 = _d_bt_pp.retrieve('LrB_170D') flr_180 = _d_bt_pp.retrieve('LrB_180D') flr_190 = _d_bt_pp.retrieve('LrB_190D') flr_200 = _d_bt_pp.retrieve('LrB_200D') flr_210 = _d_bt_pp.retrieve('LrB_210D') flr_220 = _d_bt_pp.retrieve('LrB_220D') flr_230 = _d_bt_pp.retrieve('LrB_230D') flr_240 = _d_bt_pp.retrieve('LrB_240D') flr_250 = _d_bt_pp.retrieve('LrB_250D') flr_260 = _d_bt_pp.retrieve('LrB_260D') flr_270 = _d_bt_pp.retrieve('LrB_270D') flr_280 = _d_bt_pp.retrieve('LrB_280D') flr_290 = _d_bt_pp.retrieve('LrB_290D') flr_300 = _d_bt_pp.retrieve('LrB_300D') flr_310 = _d_bt_pp.retrieve('LrB_310D') flr_320 = _d_bt_pp.retrieve('LrB_320D') flr_330 = _d_bt_pp.retrieve('LrB_330D') flr_340 = _d_bt_pp.retrieve('LrB_340D') flr_350 = _d_bt_pp.retrieve('LrB_350D') flr_360 = _d_bt_pp.retrieve('LrB_360D') flr_370 = _d_bt_pp.retrieve('LrB_370D') flr_380 = _d_bt_pp.retrieve('LrB_380D') flr_390 = _d_bt_pp.retrieve('LrB_390D') flr_400 = _d_bt_pp.retrieve('LrB_400D') flr_410 = _d_bt_pp.retrieve('LrB_410D') flr_420 = _d_bt_pp.retrieve('LrB_420D') flr_430 = _d_bt_pp.retrieve('LrB_430D') flr_440 = _d_bt_pp.retrieve('LrB_440D') flr_450 = _d_bt_pp.retrieve('LrB_450D') flr_460 = _d_bt_pp.retrieve('LrB_460D') flr_470 = _d_bt_pp.retrieve('LrB_470D') flr_480 = _d_bt_pp.retrieve('LrB_480D') flr_490 = _d_bt_pp.retrieve('LrB_490D') flr_500 = _d_bt_pp.retrieve('LrB_500D') # read the signals fqr_10 = _d_bt_pp.retrieve('QrB_10D') fqr_20 = _d_bt_pp.retrieve('QrB_20D') fqr_30 = _d_bt_pp.retrieve('QrB_30D') fqr_40 = _d_bt_pp.retrieve('QrB_40D') fqr_50 = _d_bt_pp.retrieve('QrB_50D') fqr_60 = _d_bt_pp.retrieve('QrB_60D') fqr_70 = _d_bt_pp.retrieve('QrB_70D') fqr_80 = _d_bt_pp.retrieve('QrB_80D') fqr_90 = _d_bt_pp.retrieve('QrB_90D') fqr_100 = _d_bt_pp.retrieve('QrB_100D') fqr_110 = _d_bt_pp.retrieve('QrB_110D') fqr_120 = _d_bt_pp.retrieve('QrB_120D') fqr_130 = _d_bt_pp.retrieve('QrB_130D') fqr_140 = _d_bt_pp.retrieve('QrB_140D') fqr_150 = _d_bt_pp.retrieve('QrB_150D') fqr_160 = _d_bt_pp.retrieve('QrB_160D') fqr_170 = _d_bt_pp.retrieve('QrB_170D') fqr_180 = _d_bt_pp.retrieve('QrB_180D') fqr_190 = _d_bt_pp.retrieve('QrB_190D') fqr_200 = _d_bt_pp.retrieve('QrB_200D') fqr_210 = _d_bt_pp.retrieve('QrB_210D') fqr_220 = _d_bt_pp.retrieve('QrB_220D') fqr_230 = _d_bt_pp.retrieve('QrB_230D') fqr_240 = _d_bt_pp.retrieve('QrB_240D') fqr_250 = _d_bt_pp.retrieve('QrB_250D') fqr_260 = _d_bt_pp.retrieve('QrB_260D') fqr_270 = _d_bt_pp.retrieve('QrB_270D') fqr_280 = _d_bt_pp.retrieve('QrB_280D') fqr_290 = _d_bt_pp.retrieve('QrB_290D') fqr_300 = _d_bt_pp.retrieve('QrB_300D') fqr_310 = _d_bt_pp.retrieve('QrB_310D') fqr_320 = _d_bt_pp.retrieve('QrB_320D') fqr_330 = _d_bt_pp.retrieve('QrB_330D') fqr_340 = _d_bt_pp.retrieve('QrB_340D') fqr_350 = _d_bt_pp.retrieve('QrB_350D') fqr_360 = _d_bt_pp.retrieve('QrB_360D') fqr_370 = _d_bt_pp.retrieve('QrB_370D') fqr_380 = _d_bt_pp.retrieve('QrB_380D') fqr_390 = _d_bt_pp.retrieve('QrB_390D') fqr_400 = _d_bt_pp.retrieve('QrB_400D') fqr_410 = _d_bt_pp.retrieve('QrB_410D') fqr_420 = _d_bt_pp.retrieve('QrB_420D') fqr_430 = _d_bt_pp.retrieve('QrB_430D') fqr_440 = _d_bt_pp.retrieve('QrB_440D') fqr_450 = _d_bt_pp.retrieve('QrB_450D') fqr_460 = _d_bt_pp.retrieve('QrB_460D') fqr_470 = _d_bt_pp.retrieve('QrB_470D') fqr_480 = _d_bt_pp.retrieve('QrB_480D') fqr_490 = _d_bt_pp.retrieve('QrB_490D') fqr_500 = _d_bt_pp.retrieve('QrB_500D') n = flev.shape[0] tc = flev.tick_cols() for i in range(0, n): # i = 0 _dt = crup.pd_dt_to_str([flev[i, 'Date', list]])[0] funi = [k for k, j in enumerate(fun[i, tc].values.astype('bool')) if j] tci = [j for k, j in enumerate(tc) if k in funi] xi = np.vstack((flev[i, tci].values.astype('float32'), # 0 fadv[i, tci].values.astype('float32'), # 1 feqx3[i, tci].values.astype('float32'), # 2 feqx6[i, tci].values.astype('float32'), # 3 feqx9[i, tci].values.astype('float32'), # 4 feqx12[i, tci].values.astype('float32'), # 5 fsp3[i, tci].values.astype('float32'), # 6 fsp6[i, tci].values.astype('float32'), # 7 fsp9[i, tci].values.astype('float32'), # 8 fsp12[i, tci].values.astype('float32'), # 9 fmom3[i, tci].values.astype('float32'), # 10 fmom6[i, tci].values.astype('float32'), # 11 fmom9[i, tci].values.astype('float32'), # 12 fmom12[i, tci].values.astype('float32'), # 13 flv3[i, tci].values.astype('float32'), # 14 flv6[i, tci].values.astype('float32'), # 15 flv9[i, tci].values.astype('float32'), # 16 flv12[i, tci].values.astype('float32') # 17 )) si1 = np.vstack((flr_10[i, tci].values.astype('float32'), flr_20[i, tci].values.astype('float32'), flr_30[i, tci].values.astype('float32'), flr_40[i, tci].values.astype('float32'), flr_50[i, tci].values.astype('float32'), flr_60[i, tci].values.astype('float32'), flr_70[i, tci].values.astype('float32'), flr_80[i, tci].values.astype('float32'), flr_90[i, tci].values.astype('float32'), flr_100[i, tci].values.astype('float32'), flr_110[i, tci].values.astype('float32'), flr_120[i, tci].values.astype('float32'), flr_130[i, tci].values.astype('float32'), flr_140[i, tci].values.astype('float32'), flr_150[i, tci].values.astype('float32'), flr_160[i, tci].values.astype('float32'), flr_170[i, tci].values.astype('float32'), flr_180[i, tci].values.astype('float32'), flr_190[i, tci].values.astype('float32'), flr_200[i, tci].values.astype('float32'), flr_210[i, tci].values.astype('float32'), flr_220[i, tci].values.astype('float32'), flr_230[i, tci].values.astype('float32'), flr_240[i, tci].values.astype('float32'), flr_250[i, tci].values.astype('float32'), flr_260[i, tci].values.astype('float32'), flr_270[i, tci].values.astype('float32'), flr_280[i, tci].values.astype('float32'), flr_290[i, tci].values.astype('float32'), flr_300[i, tci].values.astype('float32'), flr_310[i, tci].values.astype('float32'), flr_320[i, tci].values.astype('float32'), flr_330[i, tci].values.astype('float32'), flr_340[i, tci].values.astype('float32'), flr_350[i, tci].values.astype('float32'), flr_360[i, tci].values.astype('float32'), flr_370[i, tci].values.astype('float32'), flr_380[i, tci].values.astype('float32'), flr_390[i, tci].values.astype('float32'), flr_400[i, tci].values.astype('float32'), flr_410[i, tci].values.astype('float32'), flr_420[i, tci].values.astype('float32'), flr_430[i, tci].values.astype('float32'), flr_440[i, tci].values.astype('float32'), flr_450[i, tci].values.astype('float32'), flr_460[i, tci].values.astype('float32'), flr_470[i, tci].values.astype('float32'), flr_480[i, tci].values.astype('float32'), flr_490[i, tci].values.astype('float32'), flr_500[i, tci].values.astype('float32'))) si2 = np.vstack((fqr_10[i, tci].values.astype('float32'), fqr_20[i, tci].values.astype('float32'), fqr_30[i, tci].values.astype('float32'), fqr_40[i, tci].values.astype('float32'), fqr_50[i, tci].values.astype('float32'), fqr_60[i, tci].values.astype('float32'), fqr_70[i, tci].values.astype('float32'), fqr_80[i, tci].values.astype('float32'), fqr_90[i, tci].values.astype('float32'), fqr_100[i, tci].values.astype('float32'), fqr_110[i, tci].values.astype('float32'), fqr_120[i, tci].values.astype('float32'), fqr_130[i, tci].values.astype('float32'), fqr_140[i, tci].values.astype('float32'), fqr_150[i, tci].values.astype('float32'), fqr_160[i, tci].values.astype('float32'), fqr_170[i, tci].values.astype('float32'), fqr_180[i, tci].values.astype('float32'), fqr_190[i, tci].values.astype('float32'), fqr_200[i, tci].values.astype('float32'), fqr_210[i, tci].values.astype('float32'), fqr_220[i, tci].values.astype('float32'), fqr_230[i, tci].values.astype('float32'), fqr_240[i, tci].values.astype('float32'), fqr_250[i, tci].values.astype('float32'), fqr_260[i, tci].values.astype('float32'), fqr_270[i, tci].values.astype('float32'), fqr_280[i, tci].values.astype('float32'), fqr_290[i, tci].values.astype('float32'), fqr_300[i, tci].values.astype('float32'), fqr_310[i, tci].values.astype('float32'), fqr_320[i, tci].values.astype('float32'), fqr_330[i, tci].values.astype('float32'), fqr_340[i, tci].values.astype('float32'), fqr_350[i, tci].values.astype('float32'), fqr_360[i, tci].values.astype('float32'), fqr_370[i, tci].values.astype('float32'), fqr_380[i, tci].values.astype('float32'), fqr_390[i, tci].values.astype('float32'), fqr_400[i, tci].values.astype('float32'), fqr_410[i, tci].values.astype('float32'), fqr_420[i, tci].values.astype('float32'), fqr_430[i, tci].values.astype('float32'), fqr_440[i, tci].values.astype('float32'), fqr_450[i, tci].values.astype('float32'), fqr_460[i, tci].values.astype('float32'), fqr_470[i, tci].values.astype('float32'), fqr_480[i, tci].values.astype('float32'), fqr_490[i, tci].values.astype('float32'), fqr_500[i, tci].values.astype('float32'))) if i < n-2: yi = frt[i+2, tci].values.astype('float64') ci = _d_cor.load(_dt) ci = ci[:, funi] ci = ci[funi, :] oci = _d_ocor.load(_dt) oci = oci[:, funi] oci = oci[funi, :] # ci = ci.astype('float64') # remove stocks with missing data v_idx1 = np.sum(~np.isnan(xi), axis=0)/xi.shape[0] v_idx2 = np.sum(~np.isnan(si1), axis=0)/si1.shape[0] v_idx3 = np.sum(~np.isnan(si2), axis=0)/si2.shape[0] v_idx = np.minimum(v_idx1, v_idx2, v_idx3) v_idx = [k for k, j in enumerate(v_idx) if j > 0.999999] xi = xi[:, v_idx] si1 = si1[:, v_idx] si2 = si2[:, v_idx] ci = ci[:, v_idx] ci = ci[v_idx, :] oci = oci[:, v_idx] oci = oci[v_idx, :] tci = [j for i, j in enumerate(tci) if i in v_idx] if i < n-2: yi = yi[v_idx] # remove stocks with no correlation values (hygiene) n_ = ci.shape[0] c_idx = [] for j in range(0, n_): cori_r = np.all(np.isnan(np.array([l for k, l in enumerate(ci[j, :]) if k != j]))) cori_c = np.all(np.isnan(np.array([l for k, l in enumerate(ci[:, j]) if k != j]))) ocori_r = np.all(np.isnan(np.array([l for k, l in enumerate(oci[j, :]) if k != j]))) ocori_c = np.all(np.isnan(np.array([l for k, l in enumerate(oci[:, j]) if k != j]))) if ~cori_r or ~cori_c or ~ocori_c or ~ocori_r: c_idx.append(j) if len(c_idx) != n_: xi = xi[:, c_idx] si1 = si1[:, c_idx] si2 = si2[:, c_idx] ci = ci[:, c_idx] ci = ci[c_idx, :] oci = oci[:, c_idx] oci = oci[c_idx, :] if i < n-2: yi = yi[c_idx] tci = [j for i, j in enumerate(tci) if i in c_idx] n_ = len(c_idx) ci[np.isnan(ci)] = 0.5 # hygiene incase there is something weird in cluster S oci[np.isnan(oci)] = 0.5 # hygiene incase there is something weird in cluster S np.fill_diagonal(ci, 1) # hygiene incase there is something weird in cluster S np.fill_diagonal(oci, 1) # hygiene incase there is something weird in cluster S # get the risk budgets ei = np.ones(n_, dtype='float64') bi1 = 1/np.dot(np.abs(ci), ei) bi1 /= np.sum(bi1) bi2 = 1/np.dot(np.abs(oci), ei) bi2 /= np.sum(bi2) sci1 = np.linalg.inv(crts.shrink_correl_fast(bi1)).astype('float32') sci2 = np.linalg.inv(crts.shrink_correl_fast(bi2)).astype('float32') xi = np.vstack((xi, bi1.astype('float32'), bi2.astype('float32'))) # 18, 19 _d_bt_bs.save(_dt, xi, si1, si2, sci1, sci2, np.array(tci)) if i < n-2: _d_bt_fr.save(_dt, yi) return None
def _collect_old_pca_factors(x): _d_opca_cl, _d_comb, _d_flag = x f_files = _d_opca_cl.listdir() f_files = sorted(f_files) comb_ret = _d_comb.retrieve('Returns') # OLD PCA used time-series directly! fst_dt = f_files[0] all_dt = crup.pd_dt_to_str(comb_ret['Date', list]) ret_idx = [i for i, j in enumerate(all_dt) if j >= fst_dt] comb_ret = comb_ret[ret_idx, :] all_dt = [all_dt[i] for i in ret_idx] if len(f_files) != len(all_dt): raise NotImplementedError('Something is wrong') pca0_ret = _dummy_df(comb_ret) pca1_ret = _dummy_df(comb_ret) pca2_ret = _dummy_df(comb_ret) pca3_ret = _dummy_df(comb_ret) comb_ret_val = comb_ret[:, comb_ret.tick_cols()].values.astype('float64') for i, j in enumerate(f_files): # i = 0 # j = f_files[i] _pca_s = _d_opca_cl.load(j).astype('float64') _comb_ret = comb_ret_val[i, :] _pca_s_idx = np.where(np.sum(~np.isnan(_pca_s), axis=0) > 0)[0] _pca_s = _pca_s[:, _pca_s_idx] _comb_ret = _comb_ret[_pca_s_idx] ei = np.ones(_pca_s.shape[1]) _pca_norm = np.dot(abs(_pca_s), ei) _pca_ret = np.dot(_pca_s, _comb_ret) _pca_ret = _pca_ret/_pca_norm if _d_flag == 0: pca0_ret[i, 'PCA'] = _pca_ret[0] pca1_ret[i, 'PCA'] = _pca_ret[1] pca2_ret[i, 'PCA'] = _pca_ret[10] pca3_ret[i, 'PCA'] = _pca_ret[3] elif _d_flag == 1: pca0_ret[i, 'PCA'] = _pca_ret[0] pca1_ret[i, 'PCA'] = _pca_ret[1] pca2_ret[i, 'PCA'] = _pca_ret[2] pca3_ret[i, 'PCA'] = _pca_ret[16] elif _d_flag == 2: pca0_ret[i, 'PCA'] = _pca_ret[0] pca1_ret[i, 'PCA'] = _pca_ret[1] pca2_ret[i, 'PCA'] = _pca_ret[2] pca3_ret[i, 'PCA'] = _pca_ret[11] elif _d_flag == 3: pca0_ret[i, 'PCA'] = _pca_ret[0] pca1_ret[i, 'PCA'] = _pca_ret[16] pca2_ret[i, 'PCA'] = _pca_ret[8] pca3_ret[i, 'PCA'] = _pca_ret[2] elif _d_flag == 4: pca0_ret[i, 'PCA'] = _pca_ret[0] pca1_ret[i, 'PCA'] = _pca_ret[8] pca2_ret[i, 'PCA'] = _pca_ret[3] pca3_ret[i, 'PCA'] = _pca_ret[18] elif _d_flag == 5: pca0_ret[i, 'PCA'] = _pca_ret[0] pca1_ret[i, 'PCA'] = _pca_ret[5] pca2_ret[i, 'PCA'] = _pca_ret[17] pca3_ret[i, 'PCA'] = _pca_ret[8] elif _d_flag == 6: pca0_ret[i, 'PCA'] = _pca_ret[0] pca1_ret[i, 'PCA'] = _pca_ret[2] pca2_ret[i, 'PCA'] = _pca_ret[17] pca3_ret[i, 'PCA'] = _pca_ret[16] else: # _d_flag == 7 pca0_ret[i, 'PCA'] = _pca_ret[0] pca1_ret[i, 'PCA'] = _pca_ret[4] pca2_ret[i, 'PCA'] = _pca_ret[1] pca3_ret[i, 'PCA'] = _pca_ret[3] _d_comb.store('OPCA0_Returns', pca0_ret) _d_comb.store('OPCA1_Returns', pca1_ret) _d_comb.store('OPCA2_Returns', pca2_ret) _d_comb.store('OPCA3_Returns', pca3_ret) pca0_ret['PCA'] = np.cumprod(1+pca0_ret['PCA'].values) pca1_ret['PCA'] = np.cumprod(1+pca1_ret['PCA'].values) pca2_ret['PCA'] = np.cumprod(1+pca2_ret['PCA'].values) pca3_ret['PCA'] = np.cumprod(1+pca3_ret['PCA'].values) _d_comb.store('OPCA0', pca0_ret) _d_comb.store('OPCA1', pca1_ret) _d_comb.store('OPCA2', pca2_ret) _d_comb.store('OPCA3', pca3_ret) return None
def _collect_pca_factors(x): _d_pca_cl, _d_comb, _d_flag = x f_files = _d_pca_cl.listdir() f_files = sorted(f_files) comb_ret = _d_comb.retrieve('Returns') # newly added comb_vol = _d_comb.retrieve('VOL_CC_240D') tc = comb_ret.tick_cols() for i in tc: comb_ret[i] = comb_ret[i].values/comb_vol[i].values fst_dt = f_files[0] all_dt = crup.pd_dt_to_str(comb_ret['Date', list]) ret_idx = [i for i, j in enumerate(all_dt) if j >= fst_dt] comb_ret = comb_ret[ret_idx, :] all_dt = [all_dt[i] for i in ret_idx] if len(f_files) != len(all_dt): raise NotImplementedError('Something is wrong') pca0_ret = _dummy_df(comb_ret) pca1_ret = _dummy_df(comb_ret) pca2_ret = _dummy_df(comb_ret) pca3_ret = _dummy_df(comb_ret) comb_ret_val = comb_ret[:, comb_ret.tick_cols()].values.astype('float64') for i, j in enumerate(f_files): # i = 0 # j = f_files[i] _pca_s = _d_pca_cl.load(j).astype('float64') _comb_ret = comb_ret_val[i, :] _pca_s_idx = np.where(np.sum(~np.isnan(_pca_s), axis=0) > 0)[0] _pca_s = _pca_s[:, _pca_s_idx] # _pca_ret_ = np.dot(_pca_s, _comb_ret[_pca_s_idx])*0.001 _pca_s_n = np.sum(np.abs(_pca_s), axis=1) for jj in range(0, _pca_s.shape[0]): _pca_s[jj, :] = _pca_s[jj, :]/_pca_s_n[jj] _comb_ret = _comb_ret[_pca_s_idx] _pca_ret = np.dot(_pca_s, _comb_ret) _pca_ret *= 0.01 if _d_flag == 0: pca0_ret[i, 'PCA'] = _pca_ret[0] pca1_ret[i, 'PCA'] = _pca_ret[1] pca2_ret[i, 'PCA'] = _pca_ret[2] pca3_ret[i, 'PCA'] = _pca_ret[8] elif _d_flag == 1: pca0_ret[i, 'PCA'] = _pca_ret[0] pca1_ret[i, 'PCA'] = _pca_ret[1] pca2_ret[i, 'PCA'] = _pca_ret[18] pca3_ret[i, 'PCA'] = _pca_ret[2] elif _d_flag == 2: pca0_ret[i, 'PCA'] = _pca_ret[0] pca1_ret[i, 'PCA'] = _pca_ret[1] pca2_ret[i, 'PCA'] = _pca_ret[2] pca3_ret[i, 'PCA'] = _pca_ret[3] elif _d_flag == 3: pca0_ret[i, 'PCA'] = _pca_ret[0] pca1_ret[i, 'PCA'] = _pca_ret[1] pca2_ret[i, 'PCA'] = _pca_ret[2] pca3_ret[i, 'PCA'] = _pca_ret[19] elif _d_flag == 4: pca0_ret[i, 'PCA'] = _pca_ret[0] pca1_ret[i, 'PCA'] = _pca_ret[1] pca2_ret[i, 'PCA'] = _pca_ret[2] pca3_ret[i, 'PCA'] = _pca_ret[3] elif _d_flag == 5: pca0_ret[i, 'PCA'] = _pca_ret[0] pca1_ret[i, 'PCA'] = _pca_ret[1] pca2_ret[i, 'PCA'] = _pca_ret[2] pca3_ret[i, 'PCA'] = _pca_ret[4] elif _d_flag == 6: pca0_ret[i, 'PCA'] = _pca_ret[0] pca1_ret[i, 'PCA'] = _pca_ret[18] pca2_ret[i, 'PCA'] = _pca_ret[15] pca3_ret[i, 'PCA'] = _pca_ret[4] else: # _d_flag == 7 pca0_ret[i, 'PCA'] = _pca_ret[0] pca1_ret[i, 'PCA'] = _pca_ret[1] pca2_ret[i, 'PCA'] = _pca_ret[2] pca3_ret[i, 'PCA'] = _pca_ret[16] _d_comb.store('PCA0_Returns', pca0_ret) _d_comb.store('PCA1_Returns', pca1_ret) _d_comb.store('PCA2_Returns', pca2_ret) _d_comb.store('PCA3_Returns', pca3_ret) pca0_ret['PCA'] = np.cumprod(1+pca0_ret['PCA'].values) pca1_ret['PCA'] = np.cumprod(1+pca1_ret['PCA'].values) pca2_ret['PCA'] = np.cumprod(1+pca2_ret['PCA'].values) pca3_ret['PCA'] = np.cumprod(1+pca3_ret['PCA'].values) _d_comb.store('PCA0', pca0_ret) _d_comb.store('PCA1', pca1_ret) _d_comb.store('PCA2', pca2_ret) _d_comb.store('PCA3', pca3_ret) return None