def generate_final_tl(): clus_wgt = np.array([0.15, 0.075, 0.21, 0.075, 0.15, 0.06, 0.14, 0.14]) # calculate the latest last_dt = d_bt_fw[0].listdir() last_dt = last_dt[-5:] for i in last_dt: # i = last_dt[1] tic = None wgt = None shr = None for k in range(0, 8): # k = 0 last_dt_pos = d_bt_fw[k].load(i) tic_ = last_dt_pos["arr_0"] wgt_ = last_dt_pos["arr_1"] wgt_ = wgt_ * clus_wgt[k] cl_ = d_comb[k].retrieve("Close") cl_ = cl_[["Date"] + list(tic_)] cl_idx = [ii for ii, jj in enumerate(crup.pd_dt_to_str(cl_["Date", list])) if jj == i][0] cl_ = cl_[cl_idx, list(tic_)].values.astype("float64") sh_ = wgt_ * cap * 1000 / cl_ if k == 0: tic = tic_ wgt = wgt_ shr = sh_ else: tic = np.concatenate((tic, tic_)) wgt = np.concatenate((wgt, wgt_)) shr = np.concatenate((shr, sh_)) wgt_chk = np.sum(np.abs(wgt)) print(wgt_chk) if wgt_chk > 8: wgt = wgt * 8 / wgt_chk shr = shr * 8 / wgt_chk if wgt_chk < 1: wgt = wgt * 1 / wgt_chk shr = shr * 1 / wgt_chk shr = np.round(shr).astype("int64") # remove zero shr values shr_not_zero = np.where(shr != 0)[0] if shr_not_zero.shape[0] > 0: tic = tic[shr_not_zero] wgt = wgt[shr_not_zero] shr = shr[shr_not_zero] tl = cruf.DataFrame({"Tick": tic, "Shares": shr, "Weight": wgt}) tl.sort("Weight") del tl["Weight"] tl = tl[["Tick", "Shares"]] if i > last_dt[0]: tl_comb = cruf.DataFrame.merge(tl, tl_old, on="Tick", how="outer") tl_comb["Shares_x"] = crtf.fill1(tl_comb["Shares_x"].values, 0) tl_comb["Shares_y"] = crtf.fill1(tl_comb["Shares_y"].values, 0) tl_comb["Trade"] = tl_comb["Shares_x"] - tl_comb["Shares_y"] tl_comb = tl_comb[["Tick", "Trade"]] tl_comb.to_csv(crsf.cr_tl.name + "\\Tradelist_" + i + ".csv", index_label=None, index=False) if i < last_dt[-1]: tl_old = tl tl.to_csv(crsf.cr_tl.name + "\\Portfolio_" + i + ".csv", index_label=None, index=False)
def conv_stk_to_ami_format(tick): x = crsf.cr_pr_cl.retrieve(tick) del x['Turnover'] xy = crup.pd_dt_to_str(x['Date', list]) xy = [i[6:]+'-'+i[4:6]+'-'+i[0:4] for i in xy] del x['Date'] x['Date'] = xy x = x[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']] x.to_csv(opj(crff.cr_info.name, tick+'.csv'), sep=',', index_label=False, index=False)
def _refresh_correl(x): _d_comb, _d_cor_dy, _d_cor_st = x comb_prices = _d_comb.retrieve('Close') tc = comb_prices.tick_cols() ndt = comb_prices.shape[0] ntc = len(tc) _prc = comb_prices[tc].values.astype('float64') window1 = 240 window2 = 120 ntc1 = ntc*(ntc-1)/2 _cor1 = np.empty([ndt, ntc1], dtype='float32')*np.nan _cor2 = np.empty([ndt, ntc1], dtype='float32')*np.nan # calculate the correlations cnt = 0 for i in range(0, ntc-1): for j in range(i+1, ntc): # _cor1[:, cnt] = _spear(_prc[:, i], _prc[:, j], window1) # _cor2[:, cnt] = _spear(_prc[:, i], _prc[:, j], window2) _cor1[:, cnt] = crtf.cor_cc(_prc[:, i], _prc[:, j], window1, zl=False) _cor2[:, cnt] = crtf.cor_cc(_prc[:, i], _prc[:, j], window2, zl=False) cnt += 1 # if cnt % 100 == 0: # print(cnt) # remove correlations before date st_dt cp_dt = crup.pd_dt_to_str(comb_prices['Date', list]) cp_dt_idx = [i for i, j in enumerate(cp_dt) if j >= st_dt] for i in cp_dt_idx: # i = cp_dt_idx[0] _cor_mat1 = np.empty([ntc, ntc])*np.nan _cor_mat2 = np.empty([ntc, ntc])*np.nan cnt = 0 for j in range(0, ntc): for k in range(j, ntc): if j == k: _cor_mat1[j, j] = 1 _cor_mat2[j, j] = 1 else: _cor_mat1[j, k] = _cor1[i, cnt] _cor_mat1[k, j] = _cor_mat1[j, k] _cor_mat2[j, k] = _cor2[i, cnt] _cor_mat2[k, j] = _cor_mat2[j, k] cnt += 1 _d_cor_st.save(cp_dt[i], _cor_mat1) _d_cor_dy.save(cp_dt[i], _cor_mat2) return None
def _refresh_base_data(x): _d_bt_bs, _d_bt_ft, _d_comb, _d_cor_st, _d_cor_dy = x # read the characteristics fvol = _reduce_data(_d_comb.retrieve('Vol_CC_240D')) fadv = _reduce_data(_d_comb.retrieve('Adv')) fun = _reduce_data(_d_comb.retrieve('Univ')) frt = _reduce_data(_d_comb.retrieve('Returns')) # read the betas fpc0_dy = _reduce_data(_d_comb.retrieve('PCA0_Dy')) fpc0_st = _reduce_data(_d_comb.retrieve('PCA0_St')) fpc1_dy = _reduce_data(_d_comb.retrieve('PCA1_Dy')) fpc1_st = _reduce_data(_d_comb.retrieve('PCA1_St')) fpc2_dy = _reduce_data(_d_comb.retrieve('PCA2_Dy')) fpc2_st = _reduce_data(_d_comb.retrieve('PCA2_St')) fpc3_dy = _reduce_data(_d_comb.retrieve('PCA3_Dy')) fpc3_st = _reduce_data(_d_comb.retrieve('PCA3_St')) fpc4_dy = _reduce_data(_d_comb.retrieve('PCA4_Dy')) fpc4_st = _reduce_data(_d_comb.retrieve('PCA4_St')) fpc5_dy = _reduce_data(_d_comb.retrieve('PCA5_Dy')) fpc5_st = _reduce_data(_d_comb.retrieve('PCA5_St')) fsp03 = _reduce_data(_d_comb.retrieve('SPY_Beta_3m')) fsp06 = _reduce_data(_d_comb.retrieve('SPY_Beta_6m')) fsp09 = _reduce_data(_d_comb.retrieve('SPY_Beta_9m')) fsp12 = _reduce_data(_d_comb.retrieve('SPY_Beta_12m')) fmd03 = _reduce_data(_d_comb.retrieve('MDY_Beta_3m')) fmd06 = _reduce_data(_d_comb.retrieve('MDY_Beta_6m')) fmd09 = _reduce_data(_d_comb.retrieve('MDY_Beta_9m')) fmd12 = _reduce_data(_d_comb.retrieve('MDY_Beta_12m')) fmom03 = _reduce_data(_d_comb.retrieve('MOM_Beta_3m')) fmom06 = _reduce_data(_d_comb.retrieve('MOM_Beta_6m')) fmom09 = _reduce_data(_d_comb.retrieve('MOM_Beta_9m')) fmom12 = _reduce_data(_d_comb.retrieve('MOM_Beta_12m')) flvl03 = _reduce_data(_d_comb.retrieve('LVOL_Beta_3m')) flvl06 = _reduce_data(_d_comb.retrieve('LVOL_Beta_6m')) flvl09 = _reduce_data(_d_comb.retrieve('LVOL_Beta_9m')) flvl12 = _reduce_data(_d_comb.retrieve('LVOL_Beta_12m')) fsig = _reduce_data(_d_comb.retrieve('CompSig1')) n = fvol.shape[0] tc = fvol.tick_cols() for i in range(0, n): # i = 0 _dt = crup.pd_dt_to_str([fvol[i, 'Date', list]])[0] funi = [k for k, j in enumerate(fun[i, tc].values.astype('bool')) if j] tci = [j for k, j in enumerate(tc) if k in funi] xi = np.vstack((fvol[i, tci].values.astype('float32'), # 0 fadv[i, tci].values.astype('float32'), # 1 fpc0_dy[i, tci].values.astype('float32'), # 2 fpc0_st[i, tci].values.astype('float32'), # 3 fpc1_dy[i, tci].values.astype('float32'), # 4 fpc1_st[i, tci].values.astype('float32'), # 5 fpc2_dy[i, tci].values.astype('float32'), # 6 fpc2_st[i, tci].values.astype('float32'), # 7 fpc3_dy[i, tci].values.astype('float32'), # 8 fpc3_st[i, tci].values.astype('float32'), # 9 fpc4_dy[i, tci].values.astype('float32'), # 10 fpc4_st[i, tci].values.astype('float32'), # 11 fpc5_dy[i, tci].values.astype('float32'), # 12 fpc5_st[i, tci].values.astype('float32'), # 13 fsp03[i, tci].values.astype('float32'), # 14 fsp06[i, tci].values.astype('float32'), # 15 fsp09[i, tci].values.astype('float32'), # 16 fsp12[i, tci].values.astype('float32'), # 17 fmd03[i, tci].values.astype('float32'), # 18 fmd06[i, tci].values.astype('float32'), # 19 fmd09[i, tci].values.astype('float32'), # 20 fmd12[i, tci].values.astype('float32'), # 21 fmom03[i, tci].values.astype('float32'), # 22 fmom06[i, tci].values.astype('float32'), # 23 fmom09[i, tci].values.astype('float32'), # 24 fmom12[i, tci].values.astype('float32'), # 25 flvl03[i, tci].values.astype('float32'), # 26 flvl06[i, tci].values.astype('float32'), # 27 flvl09[i, tci].values.astype('float32'), # 28 flvl12[i, tci].values.astype('float32'), # 29 fsig[i, tci].values.astype('float32') )) if i < n-2: yi = frt[i+2, tci].values.astype('float64') ci1 = _d_cor_st.load(_dt) ci1 = ci1[:, funi] ci1 = ci1[funi, :] ci2 = _d_cor_dy.load(_dt) ci2 = ci2[:, funi] ci2 = ci2[funi, :] # remove stocks with missing data v_idx = np.sum(~np.isnan(xi), axis=0)/xi.shape[0] v_idx = [k for k, j in enumerate(v_idx) if j > 0.999999] xi = xi[:, v_idx] ci1 = ci1[:, v_idx] ci1 = ci1[v_idx, :] ci2 = ci2[:, v_idx] ci2 = ci2[v_idx, :] tci = [j for i, j in enumerate(tci) if i in v_idx] if i < n-2: yi = yi[v_idx] # remove stocks with no correlation values n_ = ci1.shape[0] c_idx = [] for j in range(0, n_): cor1_r = np.all(np.isnan(np.array([l for k, l in enumerate(ci1[j, :]) if k != j]))) cor1_c = np.all(np.isnan(np.array([l for k, l in enumerate(ci1[:, j]) if k != j]))) cor2_r = np.all(np.isnan(np.array([l for k, l in enumerate(ci2[j, :]) if k != j]))) cor2_c = np.all(np.isnan(np.array([l for k, l in enumerate(ci2[:, j]) if k != j]))) if ~cor1_r or ~cor1_c or ~cor2_c or ~cor2_r: c_idx.append(j) if len(c_idx) != n_: xi = xi[:, c_idx] ci1 = ci1[:, c_idx] ci1 = ci1[c_idx, :] ci2 = ci2[:, c_idx] ci2 = ci2[c_idx, :] tci = [j for i, j in enumerate(tci) if i in c_idx] if i < n-2: yi = yi[c_idx] # n_ = len(c_idx) ei = np.ones(n_, dtype='float64') bi1 = 1/np.dot(np.abs(ci1), ei) bi1 /= np.sum(bi1) bi2 = 1/np.dot(np.abs(ci2), ei) bi2 /= np.sum(bi2) sci1 = np.linalg.inv(crts.shrink_correl_fast(bi1)).astype('float32') sci2 = np.linalg.inv(crts.shrink_correl_fast(bi2)).astype('float32') xi = np.vstack((xi, bi1.astype('float32'), bi2.astype('float32'))) _d_bt_bs.save(_dt, xi, ci1, ci2, sci1, sci2, np.array(tci)) if i < n-2: _d_bt_ft.save(_dt, yi) return None
def _reduce_data(x): # x = fvol.copy() xdt = crup.pd_dt_to_str(x['Date', list]) xidx = [i for i, j in enumerate(xdt) if j >= _dt_st] x = x[xidx, :] return x
def _collect_pca_factors(x): _d_pcl_dy, _d_pcl_st, _d_comb = x f_files = _d_pcl_dy.listdir() f_files = sorted(f_files) # convert returns to volatility adjusted returns comb_vol = _d_comb.retrieve('VOL_CC_240D') fst_dt = f_files[0] all_dt = crup.pd_dt_to_str(comb_vol['Date', list]) ret_idx = [i for i, j in enumerate(all_dt) if j >= fst_dt] comb_vol = comb_vol[ret_idx, :] tc = comb_vol.tick_cols() pca0_score_dy = comb_vol.copy() pca1_score_dy = comb_vol.copy() pca2_score_dy = comb_vol.copy() pca3_score_dy = comb_vol.copy() pca4_score_dy = comb_vol.copy() pca5_score_dy = comb_vol.copy() pca0_score_st = comb_vol.copy() pca1_score_st = comb_vol.copy() pca2_score_st = comb_vol.copy() pca3_score_st = comb_vol.copy() pca4_score_st = comb_vol.copy() pca5_score_st = comb_vol.copy() for i, j in enumerate(f_files): # i = 0 # j = f_files[i] vol_val = comb_vol[i, tc].values.astype('float64') _pca_s = _d_pcl_dy.load(j) pca0_score_dy[i, tc] = _pca_s[0, :]*vol_val pca1_score_dy[i, tc] = _pca_s[1, :]*vol_val pca2_score_dy[i, tc] = _pca_s[2, :]*vol_val pca3_score_dy[i, tc] = _pca_s[3, :]*vol_val pca4_score_dy[i, tc] = _pca_s[4, :]*vol_val pca5_score_dy[i, tc] = _pca_s[5, :]*vol_val _pca_s = _d_pcl_st.load(j) pca0_score_st[i, tc] = _pca_s[0, :]*vol_val pca1_score_st[i, tc] = _pca_s[1, :]*vol_val pca2_score_st[i, tc] = _pca_s[2, :]*vol_val pca3_score_st[i, tc] = _pca_s[3, :]*vol_val pca4_score_st[i, tc] = _pca_s[4, :]*vol_val pca5_score_st[i, tc] = _pca_s[5, :]*vol_val _d_comb.store('PCA0_Dy', pca0_score_dy) _d_comb.store('PCA1_Dy', pca1_score_dy) _d_comb.store('PCA2_Dy', pca2_score_dy) _d_comb.store('PCA3_Dy', pca3_score_dy) _d_comb.store('PCA4_Dy', pca4_score_dy) _d_comb.store('PCA5_Dy', pca5_score_dy) _d_comb.store('PCA0_St', pca0_score_st) _d_comb.store('PCA1_St', pca1_score_st) _d_comb.store('PCA2_St', pca2_score_st) _d_comb.store('PCA3_St', pca3_score_st) _d_comb.store('PCA4_St', pca4_score_st) _d_comb.store('PCA5_St', pca5_score_st)