def weighted_mean_return(factor_data: pd.DataFrame, grouper: list): """ 计算加权平均收益/标准差 """ forward_returns_columns = get_forward_returns_columns(factor_data.columns) def agg(values, weights): count = len(values) average = np.average(values, weights=weights, axis=0) variance = (np.average( (values - average)**2, weights=weights, axis=0) * count / max( (count - 1), 1)) return pd.Series([average, np.sqrt(variance), count], index=["mean", "std", "count"]) group_stats = factor_data.groupby(grouper)[forward_returns_columns.append( pd.Index(["weights" ]))].apply(lambda x: x[forward_returns_columns].apply( agg, weights=x["weights"].fillna(0.0).values)) mean_ret = group_stats.xs("mean", level=-1) std_error_ret = group_stats.xs("std", level=-1) / np.sqrt( group_stats.xs("count", level=-1)) return mean_ret, std_error_ret
def demean_forward_returns(factor_data: pd.DataFrame, grouper: list = None) -> pd.DataFrame: """ 按照分组对因子远期收益进行去均值 参数 --- :param factor_data: 因子远期收益, 索引为 ['日期' '股票'] 的 MultiIndex, columns 为因子远期收益 :param grouper: 分组信息,如果为 None, 则默认按日期进行去均值 返回值 --- :return adjust_forward_returns: 去均值后的因子远期收益 """ factor_data = factor_data.copy() if not grouper: grouper = ["datetime"] cols = get_forward_returns_columns(factor_data.columns) factor_data[cols] = factor_data.groupby( grouper, as_index=False)[cols.append(pd.Index( ["weights"]))].apply(lambda x: x[cols].subtract(np.average( x[cols], axis=0, weights=x["weights"].fillna(0.0).values), axis=1)) return factor_data
def factor_returns( factor_data: pd.DataFrame, demeaned: bool = True, group_adjust: bool = False, equal_weight: bool = False, by_asset: bool = False, ): """ 计算按因子值加权的投资组合收益 参数 --- :param factor_data: 因子数据 :param demeaned: 是否构建多空组合 :param group_adjust: 是否按分组进行多空组合 :param equal_weight: 针对因子中位数分别构建多空组合 :param by_asset: 按股票展示组合收益, 默认为 False 返回值 --- """ weights = factor_weights(factor_data, demeaned, group_adjust, equal_weight) weighted_returns = factor_data[get_forward_returns_columns( factor_data.columns)].multiply(weights, axis=0) if by_asset: returns = weighted_returns else: returns = weighted_returns.groupby(level="datetime").sum() return returns
def src_ic(group): f = group["factor"] _ic = group[get_forward_returns_columns( factor_data.columns )].apply(lambda x: stats.spearmanr(x, f)[0]) return _ic
def factor_alpha_beta( factor_data: pd.DataFrame, returns: pd.DataFrame = None, demeaned: bool = True, group_adjust: bool = False, equal_weight: bool = False, ): """ 计算因子的 alpha (超额收益), alpha 的 t-统计量 以及 beta 值 参数 --- :param factor_data: 索引为 ['日期' '股票'] 的 MultiIndex, values 包括因子值,远期收益,因子分位,因子分组 [可选] :param returns: 因子远期收益,默认为 None, 如果为 None 的时候,会通过调用 `factor_returns` 来计算相应的收益 :param demeaned: 是否基于一个多空组合 :param group_adjust: 是否进行行业中性处理 :param equal_weight: 返回 --- """ if returns is None: returns = factor_returns( factor_data, demeaned, group_adjust, equal_weight ) universe_ret = ( factor_data.groupby(level="datetime")[get_forward_returns_columns( factor_data.columns )].mean().loc[returns.index] ) if isinstance(returns, pd.Series): returns.name = universe_ret.columns.values[0] returns = pd.DataFrame(returns) alpha_beta = pd.DataFrame() for period in returns.columns.values: x = universe_ret[period].values y = returns[period].values x = add_constant(x) reg_fit = OLS(y, x).fit() try: alpha, beta = reg_fit.params except ValueError: alpha_beta.loc["Ann. alpha", period] = np.nan alpha_beta.loc["beta", period] = np.nan else: freq_adjust = pd.Timedelta(days=DAYS_PER_YEAR) / pd.Timedelta( utils.get_period(period.replace("period_", "")) ) alpha_beta.loc["Ann. alpha", period] = (1 + alpha)**freq_adjust - 1.0 alpha_beta.loc["beta", period] = beta return alpha_beta
def create_summary_tear_sheet(factor_data: pd.DataFrame, long_short: bool = True, group_neutral: bool = False): """ """ # Return Analysis mean_quant_ret, std_quantiles = perf.mean_return_by_quantile( factor_data, by_group=False, demeaned=long_short, group_adjust=group_neutral) mean_quant_rateret = mean_quant_ret.apply( utils.rate_of_return, axis=0, base_period=mean_quant_ret.columns[0]) mean_quant_ret_bydatetime, std_quant_bydatetime = perf.mean_return_by_quantile( factor_data, by_datetime=True, by_group=False, demeaned=long_short, group_adjust=group_neutral, ) mean_quant_rateret_bydatetime = mean_quant_ret_bydatetime.apply( utils.rate_of_return, axis=0, base_period=mean_quant_ret_bydatetime.columns[0]) std_quant_bydatetime = std_quant_bydatetime.apply( utils.std_conversion, axis=0, base_period=std_quant_bydatetime.columns[0]) alpha_beta = perf.factor_alpha_beta(factor_data, demeaned=long_short, group_adjust=group_neutral) mean_ret_spread_quant, std_spread_quant = perf.mean_returns_spread( mean_quant_rateret_bydatetime, factor_data["factor_quantile"].max(), factor_data["factor_quantile"].min(), std_err=std_quant_bydatetime, ) periods = utils.get_forward_returns_columns(factor_data.columns) fr_cols = len(periods) vertical_sections = 2 + fr_cols * 3 gr = GridFigure(rows=vertical_sections, cols=1) plot_quantile_statistics_table(factor_data) plot_quantile_returns_bar(mean_quant_rateret, by_group=False, ylim_percentiles=None, ax=gf.next_row())
def create_summary_tear_sheet( self, by_datetime=True, by_group: bool = False, long_short: bool = True, group_neutral: bool = False, ): """ 创建一个小型的汇总表格,包括因子的收益率分析,IC 值,换手率等分析 参数 --- :param factor_data: 因子数据 :param long_short: 是否构建多空组合,在该组合上进行进行分析。 :param group_neutral: 是否进行行业中性 """ # Returns Analysis mean_quant_ret, std_quant = self.calc_mean_return_by_quantile( by_group=by_group, demeaned=long_short, group_adjust=group_neutral) mean_quant_rateret = mean_quant_ret.apply( utils.rate_of_return, axis=0, base_period=mean_quant_ret.columns[0] ) mean_quant_ret_bydatetime, std_quant_bydatetime = self.calc_mean_return_by_quantile( by_datetime=True, demeaned=long_short, group_adjust=group_neutral) mean_quant_rateret_bydatetime = mean_quant_ret_bydatetime.apply( utils.rate_of_return, axis=0, base_period=mean_quant_ret_bydatetime.columns[0], ) std_quant_rate_bydatetime = std_quant_bydatetime.apply( utils.rate_of_return, axis=0, base_period=std_quant_bydatetime.columns[0] ) alpha_beta = self.calc_factor_alpha_beta( demeaned=long_short, group_adjust=group_neutral ) mean_ret_spread_quant, std_spread_quant = self.calc_mean_returns_spread( ) fr_cols = utils.get_forward_returns_columns( self._clean_factor_data.columns ) vertical_sections = 2 + len(fr_cols) * 3 gf = GridFigure(rows=vertical_sections, cols=1) plotting.plot_quantile_statistics_table(self._clean_factor_data) plotting.plot_returns_table( alpha_beta, mean_quant_rateret, mean_ret_spread_quant ) plotting.plot_quantile_returns_bar( mean_quant_rateret, by_group=False, ylim_percentiles=None, ax=gf.next_row() ) # Information Analysis ic = perf.factor_information_coefficient(self._clean_factor_data) plotting.plot_information_table(ic) # Turnover Analysis # FIXME: 股票是 T+1,意味着频率只能是 Day 及以上频率 quantile_factor = self._clean_factor_data["factor_quantile"] quantile_turnover = { p: pd.concat( [ perf.quantile_turnover(quantile_factor, q, p) for q in range(1, int(quantile_factor.max()) + 1) ], axis=1, ) for p in self.periods } autocorrelation = pd.concat( [ perf.factor_rank_autocorrelation( self._clean_factor_data, period ) for period in self.periods ], axis=1, ) plotting.plot_turnover_table(autocorrelation, quantile_turnover) plt.show() gf.close()
def create_summary_tear_sheet( factor_data: pd.DataFrame, by_datetime: bool = True, # 按日期计算 by_group: bool = False, # 按分组计算 long_short: bool = True, # 多空组合 group_neutral: bool = False, # 分组中性 periods: Union[int, Tuple[int], List[int]] = 1, frequence: str = '1d'): """ 创建一个小型的汇总表格,包括因子的收益率分析,IC 值,换手率等分析 参数 --- :param factor_data: 因子数据 :param long_short: 是否构建多空组合,在该组合上进行进行分析。 :param group_neutral: 是否进行行业中性 """ if isinstance(periods, int): periods = [ periods, ] # 收益分析 mean_quant_ret, std_quant = perf.mean_return_by_quantile( factor_data, by_group=by_group, demeaned=long_short, group_adjust=group_neutral) mean_quant_rateret = mean_quant_ret.apply( utils.rate_of_return, axis=0, base_period=mean_quant_ret.columns[0]) std_quant_rate = std_quant.apply(utils.std_conversion, axis=0, base_period=std_quant.columns[0]) mean_quant_ret_bydatetime, std_quant_bydatetime = perf.mean_return_by_quantile( factor_data, by_datetime=by_datetime, by_group=by_group, demeaned=long_short, group_adjust=group_neutral) mean_quant_rateret_bydatetime = mean_quant_ret_bydatetime.apply( utils.rate_of_return, axis=0, base_period=mean_quant_ret_bydatetime.columns[0]) std_quant_rate_bydatetime = std_quant_bydatetime.apply( utils.std_conversion, axis=0, base_period=std_quant_bydatetime.columns[0]) alpha_beta = perf.factor_alpha_beta(factor_data=factor_data, demeaned=long_short, group_adjust=group_neutral) mean_ret_spread_quant, std_spread_quant = perf.mean_returns_spread( mean_quant_rateret, upper_quant=factor_data.factor_quantile.max(), lower_quant=factor_data.factor_quantile.min(), std_err=std_quant_rate) fr_cols = utils.get_forward_returns_columns(factor_data.columns) vertical_sections = 2 + len(fr_cols) * 3 gf = GridFigure(rows=vertical_sections, cols=1) plotting.plot_quantile_statistics_table(factor_data) plotting.plot_returns_table(alpha_beta, mean_quant_rateret, mean_ret_spread_quant) plotting.plot_quantile_returns_bar(mean_quant_rateret, by_group=by_group, ylim_percentiles=None, ax=gf.next_row()) # Information Analysis ic = perf.factor_information_coefficient(factor_data) plotting.plot_information_table(ic) # Turnover Analysis # FIXME: 股票是 T+1,意味着频率只能是 Day 及以上频率 quantile_factor = factor_data["factor_quantile"] quantile_turnover = { p: pd.concat( [ perf.quantile_turnover(quantile_factor, q, p) for q in range(1, int(quantile_factor.max()) + 1) ], axis=1, ) for p in periods } autocorrelation = pd.concat( [ perf.factor_rank_autocorrelation(factor_data, period) for period in periods ], axis=1, ) plotting.plot_turnover_table(autocorrelation, quantile_turnover) plt.show() gf.close()