Exemple #1
0
def weighted_mean_return(factor_data: pd.DataFrame, grouper: list):
    """
    计算加权平均收益/标准差
    """
    forward_returns_columns = get_forward_returns_columns(factor_data.columns)

    def agg(values, weights):
        count = len(values)
        average = np.average(values, weights=weights, axis=0)
        variance = (np.average(
            (values - average)**2, weights=weights, axis=0) * count / max(
                (count - 1), 1))

        return pd.Series([average, np.sqrt(variance), count],
                         index=["mean", "std", "count"])

    group_stats = factor_data.groupby(grouper)[forward_returns_columns.append(
        pd.Index(["weights"
                  ]))].apply(lambda x: x[forward_returns_columns].apply(
                      agg, weights=x["weights"].fillna(0.0).values))

    mean_ret = group_stats.xs("mean", level=-1)
    std_error_ret = group_stats.xs("std", level=-1) / np.sqrt(
        group_stats.xs("count", level=-1))

    return mean_ret, std_error_ret
Exemple #2
0
def demean_forward_returns(factor_data: pd.DataFrame,
                           grouper: list = None) -> pd.DataFrame:
    """
    按照分组对因子远期收益进行去均值

    参数
    ---
    :param factor_data: 因子远期收益, 索引为 ['日期' '股票'] 的 MultiIndex,
        columns 为因子远期收益
    :param grouper: 分组信息,如果为 None, 则默认按日期进行去均值

    返回值
    ---
    :return adjust_forward_returns: 去均值后的因子远期收益
    """
    factor_data = factor_data.copy()

    if not grouper:
        grouper = ["datetime"]

    cols = get_forward_returns_columns(factor_data.columns)
    factor_data[cols] = factor_data.groupby(
        grouper, as_index=False)[cols.append(pd.Index(
            ["weights"]))].apply(lambda x: x[cols].subtract(np.average(
                x[cols], axis=0, weights=x["weights"].fillna(0.0).values),
                                                            axis=1))

    return factor_data
Exemple #3
0
def factor_returns(
    factor_data: pd.DataFrame,
    demeaned: bool = True,
    group_adjust: bool = False,
    equal_weight: bool = False,
    by_asset: bool = False,
):
    """
    计算按因子值加权的投资组合收益

    参数
    ---
    :param factor_data: 因子数据
    :param demeaned: 是否构建多空组合
    :param group_adjust: 是否按分组进行多空组合
    :param equal_weight: 针对因子中位数分别构建多空组合
    :param by_asset: 按股票展示组合收益, 默认为 False

    返回值
    ---
    """
    weights = factor_weights(factor_data, demeaned, group_adjust, equal_weight)

    weighted_returns = factor_data[get_forward_returns_columns(
        factor_data.columns)].multiply(weights, axis=0)

    if by_asset:
        returns = weighted_returns
    else:
        returns = weighted_returns.groupby(level="datetime").sum()

    return returns
Exemple #4
0
 def src_ic(group):
     f = group["factor"]
     _ic = group[get_forward_returns_columns(
         factor_data.columns
     )].apply(lambda x: stats.spearmanr(x,
                                        f)[0])
     return _ic
Exemple #5
0
def factor_alpha_beta(
        factor_data: pd.DataFrame,
        returns: pd.DataFrame = None,
        demeaned: bool = True,
        group_adjust: bool = False,
        equal_weight: bool = False,
):
    """
    计算因子的 alpha (超额收益), alpha 的 t-统计量 以及 beta 值

    参数
    ---
    :param factor_data: 索引为 ['日期' '股票'] 的 MultiIndex, values 包括因子值,远期收益,因子分位,因子分组 [可选]
    :param returns: 因子远期收益,默认为 None, 如果为 None 的时候,会通过调用 `factor_returns` 来计算相应的收益
    :param demeaned: 是否基于一个多空组合
    :param group_adjust: 是否进行行业中性处理
    :param equal_weight:

    返回
    ---
    """
    if returns is None:
        returns = factor_returns(
            factor_data,
            demeaned,
            group_adjust,
            equal_weight
        )

    universe_ret = (
        factor_data.groupby(level="datetime")[get_forward_returns_columns(
            factor_data.columns
        )].mean().loc[returns.index]
    )

    if isinstance(returns, pd.Series):
        returns.name = universe_ret.columns.values[0]
        returns = pd.DataFrame(returns)

    alpha_beta = pd.DataFrame()
    for period in returns.columns.values:
        x = universe_ret[period].values
        y = returns[period].values
        x = add_constant(x)

        reg_fit = OLS(y, x).fit()
        try:
            alpha, beta = reg_fit.params
        except ValueError:
            alpha_beta.loc["Ann. alpha", period] = np.nan
            alpha_beta.loc["beta", period] = np.nan
        else:
            freq_adjust = pd.Timedelta(days=DAYS_PER_YEAR) / pd.Timedelta(
                utils.get_period(period.replace("period_",
                                                ""))
            )
            alpha_beta.loc["Ann. alpha",
                           period] = (1 + alpha)**freq_adjust - 1.0
            alpha_beta.loc["beta", period] = beta
    return alpha_beta
Exemple #6
0
def create_summary_tear_sheet(factor_data: pd.DataFrame,
                              long_short: bool = True,
                              group_neutral: bool = False):
    """

    """
    # Return Analysis
    mean_quant_ret, std_quantiles = perf.mean_return_by_quantile(
        factor_data,
        by_group=False,
        demeaned=long_short,
        group_adjust=group_neutral)

    mean_quant_rateret = mean_quant_ret.apply(
        utils.rate_of_return, axis=0, base_period=mean_quant_ret.columns[0])

    mean_quant_ret_bydatetime, std_quant_bydatetime = perf.mean_return_by_quantile(
        factor_data,
        by_datetime=True,
        by_group=False,
        demeaned=long_short,
        group_adjust=group_neutral,
    )

    mean_quant_rateret_bydatetime = mean_quant_ret_bydatetime.apply(
        utils.rate_of_return,
        axis=0,
        base_period=mean_quant_ret_bydatetime.columns[0])
    std_quant_bydatetime = std_quant_bydatetime.apply(
        utils.std_conversion,
        axis=0,
        base_period=std_quant_bydatetime.columns[0])
    alpha_beta = perf.factor_alpha_beta(factor_data,
                                        demeaned=long_short,
                                        group_adjust=group_neutral)
    mean_ret_spread_quant, std_spread_quant = perf.mean_returns_spread(
        mean_quant_rateret_bydatetime,
        factor_data["factor_quantile"].max(),
        factor_data["factor_quantile"].min(),
        std_err=std_quant_bydatetime,
    )

    periods = utils.get_forward_returns_columns(factor_data.columns)

    fr_cols = len(periods)
    vertical_sections = 2 + fr_cols * 3
    gr = GridFigure(rows=vertical_sections, cols=1)

    plot_quantile_statistics_table(factor_data)
    plot_quantile_returns_bar(mean_quant_rateret,
                              by_group=False,
                              ylim_percentiles=None,
                              ax=gf.next_row())
Exemple #7
0
    def create_summary_tear_sheet(
            self,
            by_datetime=True,
            by_group: bool = False,
            long_short: bool = True,
            group_neutral: bool = False,
    ):
        """
        创建一个小型的汇总表格,包括因子的收益率分析,IC 值,换手率等分析

        参数
        ---
        :param factor_data: 因子数据
        :param long_short: 是否构建多空组合,在该组合上进行进行分析。
        :param group_neutral: 是否进行行业中性
        """
        # Returns Analysis
        mean_quant_ret, std_quant = self.calc_mean_return_by_quantile(
            by_group=by_group, demeaned=long_short, group_adjust=group_neutral)
        mean_quant_rateret = mean_quant_ret.apply(
            utils.rate_of_return,
            axis=0,
            base_period=mean_quant_ret.columns[0]
        )

        mean_quant_ret_bydatetime, std_quant_bydatetime = self.calc_mean_return_by_quantile(
            by_datetime=True, demeaned=long_short, group_adjust=group_neutral)

        mean_quant_rateret_bydatetime = mean_quant_ret_bydatetime.apply(
            utils.rate_of_return,
            axis=0,
            base_period=mean_quant_ret_bydatetime.columns[0],
        )
        std_quant_rate_bydatetime = std_quant_bydatetime.apply(
            utils.rate_of_return,
            axis=0,
            base_period=std_quant_bydatetime.columns[0]
        )

        alpha_beta = self.calc_factor_alpha_beta(
            demeaned=long_short,
            group_adjust=group_neutral
        )

        mean_ret_spread_quant, std_spread_quant = self.calc_mean_returns_spread(
        )

        fr_cols = utils.get_forward_returns_columns(
            self._clean_factor_data.columns
        )

        vertical_sections = 2 + len(fr_cols) * 3
        gf = GridFigure(rows=vertical_sections, cols=1)

        plotting.plot_quantile_statistics_table(self._clean_factor_data)
        plotting.plot_returns_table(
            alpha_beta,
            mean_quant_rateret,
            mean_ret_spread_quant
        )
        plotting.plot_quantile_returns_bar(
            mean_quant_rateret,
            by_group=False,
            ylim_percentiles=None,
            ax=gf.next_row()
        )

        # Information Analysis
        ic = perf.factor_information_coefficient(self._clean_factor_data)
        plotting.plot_information_table(ic)

        # Turnover Analysis
        # FIXME: 股票是 T+1,意味着频率只能是 Day 及以上频率
        quantile_factor = self._clean_factor_data["factor_quantile"]
        quantile_turnover = {
            p: pd.concat(
                [
                    perf.quantile_turnover(quantile_factor,
                                           q,
                                           p)
                    for q in range(1,
                                   int(quantile_factor.max()) + 1)
                ],
                axis=1,
            )
            for p in self.periods
        }
        autocorrelation = pd.concat(
            [
                perf.factor_rank_autocorrelation(
                    self._clean_factor_data,
                    period
                ) for period in self.periods
            ],
            axis=1,
        )

        plotting.plot_turnover_table(autocorrelation, quantile_turnover)

        plt.show()
        gf.close()
Exemple #8
0
def create_summary_tear_sheet(
        factor_data: pd.DataFrame,
        by_datetime: bool = True,  # 按日期计算
        by_group: bool = False,  # 按分组计算
        long_short: bool = True,  # 多空组合
        group_neutral: bool = False,  # 分组中性
        periods: Union[int, Tuple[int], List[int]] = 1,
        frequence: str = '1d'):
    """
    创建一个小型的汇总表格,包括因子的收益率分析,IC 值,换手率等分析

    参数
    ---
    :param factor_data: 因子数据
    :param long_short: 是否构建多空组合,在该组合上进行进行分析。
    :param group_neutral: 是否进行行业中性
    """
    if isinstance(periods, int):
        periods = [
            periods,
        ]
    # 收益分析
    mean_quant_ret, std_quant = perf.mean_return_by_quantile(
        factor_data,
        by_group=by_group,
        demeaned=long_short,
        group_adjust=group_neutral)
    mean_quant_rateret = mean_quant_ret.apply(
        utils.rate_of_return, axis=0, base_period=mean_quant_ret.columns[0])
    std_quant_rate = std_quant.apply(utils.std_conversion,
                                     axis=0,
                                     base_period=std_quant.columns[0])

    mean_quant_ret_bydatetime, std_quant_bydatetime = perf.mean_return_by_quantile(
        factor_data,
        by_datetime=by_datetime,
        by_group=by_group,
        demeaned=long_short,
        group_adjust=group_neutral)

    mean_quant_rateret_bydatetime = mean_quant_ret_bydatetime.apply(
        utils.rate_of_return,
        axis=0,
        base_period=mean_quant_ret_bydatetime.columns[0])
    std_quant_rate_bydatetime = std_quant_bydatetime.apply(
        utils.std_conversion,
        axis=0,
        base_period=std_quant_bydatetime.columns[0])

    alpha_beta = perf.factor_alpha_beta(factor_data=factor_data,
                                        demeaned=long_short,
                                        group_adjust=group_neutral)

    mean_ret_spread_quant, std_spread_quant = perf.mean_returns_spread(
        mean_quant_rateret,
        upper_quant=factor_data.factor_quantile.max(),
        lower_quant=factor_data.factor_quantile.min(),
        std_err=std_quant_rate)

    fr_cols = utils.get_forward_returns_columns(factor_data.columns)

    vertical_sections = 2 + len(fr_cols) * 3
    gf = GridFigure(rows=vertical_sections, cols=1)

    plotting.plot_quantile_statistics_table(factor_data)
    plotting.plot_returns_table(alpha_beta, mean_quant_rateret,
                                mean_ret_spread_quant)
    plotting.plot_quantile_returns_bar(mean_quant_rateret,
                                       by_group=by_group,
                                       ylim_percentiles=None,
                                       ax=gf.next_row())

    # Information Analysis
    ic = perf.factor_information_coefficient(factor_data)
    plotting.plot_information_table(ic)

    # Turnover Analysis
    # FIXME: 股票是 T+1,意味着频率只能是 Day 及以上频率
    quantile_factor = factor_data["factor_quantile"]
    quantile_turnover = {
        p: pd.concat(
            [
                perf.quantile_turnover(quantile_factor, q, p)
                for q in range(1,
                               int(quantile_factor.max()) + 1)
            ],
            axis=1,
        )
        for p in periods
    }
    autocorrelation = pd.concat(
        [
            perf.factor_rank_autocorrelation(factor_data, period)
            for period in periods
        ],
        axis=1,
    )

    plotting.plot_turnover_table(autocorrelation, quantile_turnover)

    plt.show()
    gf.close()