def find_more_accurate_beta(y, x): _yvar = med_abs_dev(y) _xvar = med_abs_dev(x) _yxrat = _yvar / _xvar _yxcors = smart_spearman(y, x) _yxcork = smart_kendall(y, x) _collects = _yxrat * _yxcors *x _collectk = _yxrat * _yxcork * x _errs = y - _collects _errk = y - _collectk _excors = smart_kendall(_errs, _collects) # _yxcors -> _excors _excork = smart_kendall(_errk, _collectk) # _yxcork -> _excork _excors_ = smart_spearman(_errs, _collects) # _yxcors -> _excors _excork_ = smart_spearman(_errk, _collectk) # _yxcork -> _excork if abs(_excork - _excors) > 1e-8: # adjust 1 (low kendall) _yxcorn = _yxcors - _excors*(_yxcork-_yxcors)/(_excork-_excors) _collectn = _yxrat * _yxcorn * x _errn = y - _collectn _excorn = smart_kendall(_errn, _collectn) # adjust 2 (low spearman) _yxcorn2 = _yxcors - _excors_*(_yxcork-_yxcors)/(_excork_-_excors_) _collectn2 = _yxrat * _yxcorn2 * x _errn2 = y - _collectn2 _excorn2 = smart_kendall(_errn2, _collectn2) if abs(_excorn-_excorn2) > 1e-8: # adjust 3 (low kendall2) _yxcorn3 = _yxcorn - _excorn*(_yxcorn-_yxcorn2)/(_excorn-_excorn2) _collectn3 = _yxrat * _yxcorn3 * x _errn3 = y - _collectn3 _excorn3 = smart_kendall(_errn3, _collectn3) # interpolate between adjust 1 and adjust 3 if abs(_excorn-_excorn3) > 1e-8: _yxcorn4 = _yxcorn - _excorn*(_yxcorn-_yxcorn3)/(_excorn-_excorn3) _collectn4 = _yxrat * _yxcorn4 * x _errn4 = y - _collectn4 _excorn4 = smart_kendall(_errn4, _collectn4) if abs(_excorn) < abs(_excorn3): _excorn1 = _excorn _yxcorn1 = _yxcorn else: _excorn1 = _excorn3 _yxcorn1 = _yxcorn3 if abs(_excorn1-_excorn4) > 1e-8: # interpolate between smaller of 1 and 3 _yxcorn5 = _yxcorn4 - _excorn4*(_yxcorn4-_yxcorn1)/(_excorn4-_excorn1) # _collectn5 = _yxrat * _yxcorn5 * x # _errn5 = y - _collectn5 # _excorn5 = smart_kendall(_errn5, _collectn5) return _yxrat * _yxcorn5 else: return _yxrat * _yxcorn4 else: return _yxrat * _yxcorn3 else: return _yxrat * _yxcorn else: return _yxrat * _excork
def get_smart_measures(): univ_ib_data = cr_cret.retrieve(univ_ib_eqidx_ext + 'GBM') univ_ib_cl = univ_ib_data['Close'].values univ_ib_vl = univ_ib_data['Volatility'].values del univ_ib_data['Close'] del univ_ib_data['Volatility'] new_col_names = list(univ_ib_data.columns) univ_ib_data = univ_ib_data.values num_col = len(new_col_names) # change data to appropriate lags for i in range(0, num_col): univ_ib_data[:, i] = filt.lag(univ_ib_data[:, i], 2) univ_ib_vl = filt.lag(univ_ib_vl, 2) univ_ib_cl1 = univ_ib_cl.copy() univ_ib_cl2 = (univ_ib_cl+filt.lag(univ_ib_cl))/2 # divide by the volatility univ_ib_cl1 = univ_ib_cl1/univ_ib_vl univ_ib_cl2 = univ_ib_cl2/univ_ib_vl for i in range(0, num_col): univ_ib_data[:, i] = univ_ib_data[:, i]/univ_ib_vl # standardize them for i in range(0, num_col): univ_ib_data[:, i] = univ_ib_data[:, i]/med_abs_dev(univ_ib_data[5220:, i]) univ_ib_cl1 = univ_ib_cl1/med_abs_dev(univ_ib_cl1[5220:]) univ_ib_cl2 = univ_ib_cl2/med_abs_dev(univ_ib_cl2[5220:]) # get the correlations with univ_ib_cl univ_ib_correl1 = np.zeros(num_col) univ_ib_correl2 = np.zeros(num_col) for i in range(0, num_col): univ_ib_correl1[i] = smart_kendall(univ_ib_data[5220:, i], univ_ib_cl1[5220:]) univ_ib_correl2[i] = smart_kendall(univ_ib_data[5220:, i], univ_ib_cl2[5220:]) univ_ib_correl1a = np.array([(j if 'M_' not in new_col_names[i] else np.nan) for i, j in enumerate(univ_ib_correl1)]) univ_ib_correl2a = np.array([(j if 'M_' not in new_col_names[i] else np.nan) for i, j in enumerate(univ_ib_correl2)]) univ_ib_correl1b = np.array([(j if (('M_' not in new_col_names[i]) and ('W_' not in new_col_names[i])) else np.nan) for i, j in enumerate(univ_ib_correl1)]) univ_ib_correl2b = np.array([(j if (('M_' not in new_col_names[i]) and ('W_' not in new_col_names[i])) else np.nan) for i, j in enumerate(univ_ib_correl2)]) for k in range(0, 3): if k == 0: univ_ib_correl1_ = univ_ib_correl1 univ_ib_correl2_ = univ_ib_correl2 elif k == 1: univ_ib_correl1_ = univ_ib_correl1a univ_ib_correl2_ = univ_ib_correl2a else: univ_ib_correl1_ = univ_ib_correl1b univ_ib_correl2_ = univ_ib_correl2b _correl = univ_ib_correl1_ + univ_ib_correl2_ _mom_sig = [] _mom_sig_idx = [] _mom_val = [] for i in range(0, 20): if i == 0: _idx = argmax_fixed(_correl) _mom_sig.append(new_col_names[_idx]) _mom_sig_idx.append(_idx) _mom_val.append(_correl[_idx]) else: # calculate correlation with the already selected series _scorrel = np.ones(7200, dtype=bool) for j1 in _mom_sig_idx: _scorrel[j1] = False for j1 in range(0, 7200): if j1 not in _mom_sig_idx: for j2 in _mom_sig_idx: if _scorrel[j1] and (smart_kendall(univ_ib_data[5220:, j1], univ_ib_data[5220:, j2]) > 0.6): _scorrel[j1] = False # add constraint to handle the case where _scorrel is True, but the correlation with _scorrel_idx = np.where(_scorrel)[0] if _scorrel_idx.shape[0] > 0: _correl_red = _correl[_scorrel_idx] _idx = _scorrel_idx[argmax_fixed(_correl_red)] if _correl[_idx] > 0.015: _mom_sig.append(new_col_names[_idx]) _mom_sig_idx.append(_idx) _mom_val.append(_correl[_idx]) else: break else: break print(i) print(_mom_sig) print(_mom_val) _rev_sig = [] _rev_sig_idx = [] _rev_val = [] for i in range(0, 40): # i = 0 if i == 0: _idx = argmin_fixed(_correl) _rev_sig.append(new_col_names[_idx]) _rev_sig_idx.append(_idx) _rev_val.append(_correl[_idx]) else: # calculate correlation with the already selected series _scorrel = np.ones(7200, dtype=bool) for j1 in _rev_sig_idx: _scorrel[j1] = False for j1 in range(0, 7200): if j1 not in _rev_sig_idx: for j2 in _rev_sig_idx: if _scorrel[j1] and (smart_kendall(univ_ib_data[5220:, j1], univ_ib_data[5220:, j2]) > 0.6): _scorrel[j1] = False # add constraint to handle the case where _scorrel is True, but the correlation with _scorrel_idx = np.where(_scorrel)[0] if _scorrel_idx.shape[0] > 0: _correl_red = _correl[_scorrel_idx] _idx = _scorrel_idx[argmin_fixed(_correl_red)] if _correl[_idx] < -0.015: _rev_sig.append(new_col_names[_idx]) _rev_sig_idx.append(_idx) _rev_val.append(_correl[_idx]) else: break else: break print(i) print(_rev_sig) print(_rev_val)