__author__ = 'PB' __copyright__ = 'Copyright 2015, Compugene LLC' __status__ = 'Development' import numpy as np import crtech.shrink as crts import time np.set_printoptions(precision=4, linewidth=250) b = np.random.random(200) b = b/sum(b) x = np.random.random(200)-0.5 c1 = crts.shrink_correl(b, x) c2 = crts.shrink_correl2(b, x) t0 = time.time() for i in range(0, 500): c1 = crts.shrink_correl(b, x) print(time.time()-t0) t0 = time.time() for i in range(0, 500): c1 = crts.shrink_correl2(b, x) print(time.time()-t0) # # x, lmb = crts.shrink_correl_fast(b) #
def _refresh_base_data(x): _d_bt_pp, _d_cor, _d_bt_bs, _d_bt_fr = x flev = _d_bt_pp.retrieve('Lev') fadv = _d_bt_pp.retrieve('Adv') fun = _d_bt_pp.retrieve('Univ') frt = _d_bt_pp.retrieve('Returns') fb1 = _d_bt_pp.retrieve('PCA1_Beta') fb2 = _d_bt_pp.retrieve('PCA2_Beta') fb3 = _d_bt_pp.retrieve('PCA3_Beta') fb4 = _d_bt_pp.retrieve('PCA4_Beta') fb5 = _d_bt_pp.retrieve('PCA5_Beta') fsp = _d_bt_pp.retrieve('SP_Beta') fmom = _d_bt_pp.retrieve('MOM_12m1') flr_10 =_d_bt_pp.retrieve('LrB_10D') flr_20 =_d_bt_pp.retrieve('LrB_20D') flr_30 =_d_bt_pp.retrieve('LrB_30D') flr_40 =_d_bt_pp.retrieve('LrB_40D') flr_50 =_d_bt_pp.retrieve('LrB_50D') flr_60 =_d_bt_pp.retrieve('LrB_60D') flr_70 =_d_bt_pp.retrieve('LrB_70D') flr_80 =_d_bt_pp.retrieve('LrB_80D') flr_90 =_d_bt_pp.retrieve('LrB_90D') flr_100 =_d_bt_pp.retrieve('LrB_100D') flr_110 =_d_bt_pp.retrieve('LrB_110D') flr_120 =_d_bt_pp.retrieve('LrB_120D') flr_130 =_d_bt_pp.retrieve('LrB_130D') flr_140 =_d_bt_pp.retrieve('LrB_140D') flr_150 =_d_bt_pp.retrieve('LrB_150D') flr_160 =_d_bt_pp.retrieve('LrB_160D') flr_170 =_d_bt_pp.retrieve('LrB_170D') flr_180 =_d_bt_pp.retrieve('LrB_180D') flr_190 =_d_bt_pp.retrieve('LrB_190D') flr_200 =_d_bt_pp.retrieve('LrB_200D') flr_210 =_d_bt_pp.retrieve('LrB_210D') flr_220 =_d_bt_pp.retrieve('LrB_220D') flr_230 =_d_bt_pp.retrieve('LrB_230D') flr_240 =_d_bt_pp.retrieve('LrB_240D') fqr_10 =_d_bt_pp.retrieve('QrB_10D') fqr_20 =_d_bt_pp.retrieve('QrB_20D') fqr_30 =_d_bt_pp.retrieve('QrB_30D') fqr_40 =_d_bt_pp.retrieve('QrB_40D') fqr_50 =_d_bt_pp.retrieve('QrB_50D') fqr_60 =_d_bt_pp.retrieve('QrB_60D') fqr_70 =_d_bt_pp.retrieve('QrB_70D') fqr_80 =_d_bt_pp.retrieve('QrB_80D') fqr_90 =_d_bt_pp.retrieve('QrB_90D') fqr_100 =_d_bt_pp.retrieve('QrB_100D') fqr_110 =_d_bt_pp.retrieve('QrB_110D') fqr_120 =_d_bt_pp.retrieve('QrB_120D') fqr_130 =_d_bt_pp.retrieve('QrB_130D') fqr_140 =_d_bt_pp.retrieve('QrB_140D') fqr_150 =_d_bt_pp.retrieve('QrB_150D') fqr_160 =_d_bt_pp.retrieve('QrB_160D') fqr_170 =_d_bt_pp.retrieve('QrB_170D') fqr_180 =_d_bt_pp.retrieve('QrB_180D') fqr_190 =_d_bt_pp.retrieve('QrB_190D') fqr_200 =_d_bt_pp.retrieve('QrB_200D') fqr_210 =_d_bt_pp.retrieve('QrB_210D') fqr_220 =_d_bt_pp.retrieve('QrB_220D') fqr_230 =_d_bt_pp.retrieve('QrB_230D') fqr_240 =_d_bt_pp.retrieve('QrB_240D') n = flev.shape[0] tc = flev.tick_cols() for i in range(0, n): # i = 0 _dt = conv_ts_to_str(flev[i, 'Date']) funi = [k for k, j in enumerate(zip(fun[i, tc].values.astype(bool), flev[i, tc].values.astype('float64'))) if j[0] and j[1] <= 10.0] # remove less liquid names and very high leverage tci = [j for k, j in enumerate(tc) if k in funi] xi = np.vstack((flev[i, tci].values.astype('float64'), fadv[i, tci].values.astype('float64'), fb1[i, tci].values.astype('float64'), fb2[i, tci].values.astype('float64'), fb3[i, tci].values.astype('float64'), fb4[i, tci].values.astype('float64'), fb5[i, tci].values.astype('float64'), fsp[i, tci].values.astype('float64'), fmom[i, tci].values.astype('float64'), flr_10[i, tci].values.astype('float64'), flr_20[i, tci].values.astype('float64'), flr_30[i, tci].values.astype('float64'), flr_40[i, tci].values.astype('float64'), flr_50[i, tci].values.astype('float64'), flr_60[i, tci].values.astype('float64'), flr_70[i, tci].values.astype('float64'), flr_80[i, tci].values.astype('float64'), flr_90[i, tci].values.astype('float64'), flr_100[i, tci].values.astype('float64'), flr_110[i, tci].values.astype('float64'), flr_120[i, tci].values.astype('float64'), flr_130[i, tci].values.astype('float64'), flr_140[i, tci].values.astype('float64'), flr_150[i, tci].values.astype('float64'), flr_160[i, tci].values.astype('float64'), flr_170[i, tci].values.astype('float64'), flr_180[i, tci].values.astype('float64'), flr_190[i, tci].values.astype('float64'), flr_200[i, tci].values.astype('float64'), flr_210[i, tci].values.astype('float64'), flr_220[i, tci].values.astype('float64'), flr_230[i, tci].values.astype('float64'), flr_240[i, tci].values.astype('float64'), fqr_10[i, tci].values.astype('float64'), fqr_20[i, tci].values.astype('float64'), fqr_30[i, tci].values.astype('float64'), fqr_40[i, tci].values.astype('float64'), fqr_50[i, tci].values.astype('float64'), fqr_60[i, tci].values.astype('float64'), fqr_70[i, tci].values.astype('float64'), fqr_80[i, tci].values.astype('float64'), fqr_90[i, tci].values.astype('float64'), fqr_100[i, tci].values.astype('float64'), fqr_110[i, tci].values.astype('float64'), fqr_120[i, tci].values.astype('float64'), fqr_130[i, tci].values.astype('float64'), fqr_140[i, tci].values.astype('float64'), fqr_150[i, tci].values.astype('float64'), fqr_160[i, tci].values.astype('float64'), fqr_170[i, tci].values.astype('float64'), fqr_180[i, tci].values.astype('float64'), fqr_190[i, tci].values.astype('float64'), fqr_200[i, tci].values.astype('float64'), fqr_210[i, tci].values.astype('float64'), fqr_220[i, tci].values.astype('float64'), fqr_230[i, tci].values.astype('float64'), fqr_240[i, tci].values.astype('float64'))) if i < n-2: yi = frt[i+2, tci].values.astype('float64') cori = _d_cor.load(_dt) cori = cori[:, funi] cori = cori[funi, :] cori = cori.astype('float64') # remove stocks with missing alpha data m = xi.shape[0] v_idx = np.sum(~np.isnan(xi), axis=0)/m v_idx = [k for k, j in enumerate(v_idx) if j > 0.999999] xi = xi[:, v_idx] cori = cori[:, v_idx] cori = cori[v_idx, :] tci = [j for i, j in enumerate(tci) if i in v_idx] if i < n-2: yi = yi[v_idx] # remove stocks with no correlation values n_ = cori.shape[0] c_idx = [] for j in range(0, n_): cori_r = np.all(np.isnan(np.array([l for k, l in enumerate(cori[j, :]) if k != j]))) cori_c = np.all(np.isnan(np.array([l for k, l in enumerate(cori[:, j]) if k != j]))) if ~cori_r or ~cori_c: c_idx.append(j) if len(c_idx) != n_: xi = xi[:, c_idx] cori = cori[:, c_idx] cori = cori[c_idx, :] if i < n-2: yi = yi[c_idx] tci = [j for i, j in enumerate(tci) if i in c_idx] n_ = len(c_idx) # get risk budgets ei = np.ones(n_, dtype='float64') bi1 = 1/np.dot(np.abs(cori), ei) bi1 /= np.sum(bi1) advi = np.diag(1/np.sqrt(np.sqrt(np.sqrt(xi[1, :])))) cori_ = np.dot(advi, np.dot(cori, advi)) bi2 = 1/np.dot(np.abs(cori_), ei) bi2 /= np.sum(bi2) levi = np.diag(np.exp(np.log(xi[0, :])*2/3)/np.sqrt(np.sqrt(np.sqrt(xi[1, :])))) cori__ = np.dot(levi, np.dot(cori, levi)) bi3 = 1/np.dot(np.abs(cori__), ei) bi3 /= np.sum(bi3) # get new correlation scori = crts.shrink_correl(bi1, ei).astype('float32') scori_ = crts.shrink_correl(bi2, ei).astype('float32') scori__ = crts.shrink_correl(bi3, ei).astype('float32') xi = np.vstack((xi, bi1, bi2, bi3)) # _d_bt_bs.save(_dt, xi, cori) _d_bt_bs.save(_dt, xi.astype('float32'), scori, scori_, scori__, np.array(tci)) if i<n-2: _d_bt_fr.save(_dt, yi) return None