def test_q_anl_impl(self): n_bins = 5 x = self.x[:, 0] q_groups = quantile(x, n_bins) s = pd.Series(self.r, index=q_groups) expected_res = s.groupby(level=0).mean() calculated_res = er_quantile_analysis(x, n_bins, self.r) np.testing.assert_array_almost_equal(expected_res.values, calculated_res)
def test_quantile(self): n = 5000 bins = 10 s = np.random.randn(n) calculated = quantile(s, bins) rank = s.argsort().argsort() bin_size = float(n) / bins pillars = [int(i * bin_size) for i in range(1, bins + 1)] starter = 0 for i, r in enumerate(pillars): self.assertTrue( np.all(calculated[(rank >= starter) & (rank < r)] == i)) starter = r
def er_quantile_analysis(er: np.ndarray, n_bins: int, dx_return: np.ndarray) -> np.ndarray: er = er.flatten() q_groups = quantile(er, n_bins) if dx_return.ndim < 2: dx_return.shape = -1, 1 group_return = agg_mean(q_groups, dx_return).flatten() total_return = group_return.sum() ret = group_return.copy() resid = n_bins - 1 res_weight = 1. / resid for i, value in enumerate(ret): ret[i] = (1. + res_weight) * value - res_weight * total_return return ret
def test_q_anl_impl(self): n_bins = 5 x = self.x[:, 0] q_groups = quantile(x, n_bins) s = pd.Series(self.r, index=q_groups) grouped_return = s.groupby(level=0).mean().values.flatten() expected_res = grouped_return.copy() res = n_bins - 1 res_weight = 1. / res for i, value in enumerate(expected_res): expected_res[i] = (1. + res_weight) * value - res_weight * grouped_return.sum() calculated_res = er_quantile_analysis(x, n_bins, self.r, de_trend=True) np.testing.assert_array_almost_equal(expected_res, calculated_res)
def er_quantile_analysis( er: np.ndarray, n_bins: int, dx_return: np.ndarray, benchmark: Optional[np.ndarray] = None, ) -> np.ndarray: er = er.flatten() q_groups = quantile(er, n_bins) if dx_return.ndim < 2: dx_return.shape = -1, 1 group_return = agg_mean(q_groups, dx_return).flatten() if benchmark is not None: b_ret = np.dot(benchmark, dx_return) b_total = benchmark.sum() return group_return * b_total - b_ret else: return group_return
def basic_info(destsession, session, factor_name, task_id, fac_rets_series, ic_series, cum_df, total_data, kwargs): #收益率 fac_rets = fac_rets_series.cumsum().values[-1] #收益率t值 fac_rets_ttest = ttest_1samp(fac_rets_series, 0) t_rets = fac_rets_ttest.statistic ic_mean = ic_series.mean() ic_std = ic_series.std() ic_marked = len(ic_series[ic_series.abs() > 0.02]) / len(ic_series) ir = ic_mean / ic_std #年化收益率 annualized = cum_df.q0.values[-1] / len(cum_df.q0) * 250 # 换手率 n_bins = 5 last_code = None diff_count = 0 sum_count = 0 res = [] grouped = total_data.groupby('trade_date') for k, g in grouped: er = g['prc_factor'].values g['group'] = quantile(er.flatten(), n_bins) res.append(g) turnover_df = pd.concat(res).reset_index()[['trade_date', 'code', 'group']] if cum_df.q0.cumsum().values[-1] > cum_df.q4.cumsum().values[-1]: group_df = turnover_df.set_index('group').loc[0].reset_index() else: group_df = turnover_df.set_index('group').loc[4].reset_index() grouped = group_df.groupby('trade_date') for k, g in grouped: if last_code is None: sum_count = len(g.code.values) last_code = g.code.values else: mix_code = set(g.code.values) & set(last_code) diff_count += (len(g.code.values) - len(mix_code)) sum_count += len(g.code.values) turnover_rate = diff_count / sum_count basic_info = { 'fac_rets': fac_rets, 't_rets': t_rets, 'ic_mean': ic_mean, 'ic_std': ic_std, 'ic_marked': ic_marked, 'ir': ir, 'annualized': annualized, 'turnover_rate': turnover_rate, 'update_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%S:%M'), 'params': str(json.dumps(kwargs)), 'remark': '中证500股票池' } basic_df = pd.DataFrame([basic_info]) basic_df['session'] = session basic_df['factor_name'] = factor_name basic_df['task_id'] = task_id update_destdb(destsession, 'basic_info', basic_df, is_trade=False)