Example #1
0
def cal_ic_by_alphalens(factor_data, prices=None, group_by=None, periods=(20,), **kwargs):
    """调用alphalens计算因子IC
    """
    factor_data = factor_data.copy()
    if isinstance(factor_data, pd.DataFrame):
        factor_data = factor_data.iloc[:, 0]
    factor_data.index.names = ['date', 'asset']

    if prices is None:
        start = factor_data.index.get_level_values('date').min()
        start = tc.tradeDayOffset(start, -5)
        end = factor_data.index.get_level_values('date').max()
        end = tc.tradeDayOffset(end, max(periods))
        prices = data_source.load_factor('adj_close', '/stocks/', start_date=start,
                                         end_date=end)['adj_close'].unstack()
    elif isinstance(prices, pd.DataFrame):
        if prices.index.nlevels == 2:
            prices = prices.iloc[:, 0].unstack()
    else:
        raise ValueError('prices 格式非法!')
    
    merge_data = get_clean_factor_and_forward_returns(factor_data, prices,
                                                      group_by, periods=periods, **kwargs)
    by_group = group_by is not None
    ic = factor_information_coefficient(merge_data, group_adjust=False, by_group=by_group)
    return ic
Example #2
0
def test_performance(factor, prices):
    import matplotlib.pyplot as plt
    from alphalens import utils, performance, plotting

    # 持股收益-逐只
    stocks_holding_return = utils.get_clean_factor_and_forward_returns(factor,
                                                                       prices,
                                                                       quantiles=5,
                                                                       periods=(1, 5, 10))

    print("因子的IC值:")
    ic = performance.factor_information_coefficient(stocks_holding_return)
    print(ic)
    plotting.plot_ic_hist(ic)
    plt.show()
    plotting.plot_ic_ts(ic)
    plt.show()

    print("平均IC值-月:")
    mean_ic = performance.mean_information_coefficient(stocks_holding_return,
                                                       by_time="M")
    plotting.plot_monthly_ic_heatmap(mean_ic)
    plt.show()

    # 按quantile区分的持股平均收益(减去了总体平均值)
    mean_return_by_q = performance.mean_return_by_quantile(stocks_holding_return,
                                                           by_date=True,
                                                           demeaned=True)[0]
    # 按quantile画出累积持有收益
    for i in [1, 5, 10]:
        plotting.plot_cumulative_returns_by_quantile(mean_return_by_q,
                                                     period=i)
        plt.show()
Example #3
0
def InformationTable(factor_data):
    """
    alphalens plot information table
    """

    ic = performance.factor_information_coefficient(factor_data)

    return plotting.plot_information_table(ic)
Example #4
0
 def create_ic_tear_sheet(self):
     factor_and_return = self._get_clean_factor_and_fwd_return(
         self._factor, self._factor_freq)
     ic = factor_information_coefficient(factor_and_return)
     plot_ic_hist(ic)
     self.ic_bar_tear_sheet(ic)
     plot_monthly_ic_heatmap(ic)
     plt.show()
     return
Example #5
0
def mean_information_coefficient(factor_data,
                                 group_adjust=False,
                                 by_group=False,
                                 by_time=None):
    """
    Get the mean information coefficient of specified groups.
    Answers questions like:
    What is the mean IC for each month?
    What is the mean IC for each group for our whole timerange?
    What is the mean IC for for each group, each week?

    Parameters
    factor_data : pd.DataFrame - MultiIndex
        A MultiIndex DataFrame indexed by date (level 0) and asset (level 1),
        containing the values for a single alpha factor, forward returns for each period,
        The factor quantile/bin that factor value belongs too, and (optionally) the group the
        asset belongs to.
    group_adjust : bool
        Demean forward returns by group before computing IC.
    by_group : bool
        If True, take the mean IC for each group.
    by_time : str (pd time_rule), optional
        Time window to use when taking mean IC.
        See http://pandas.pydata.org/pandas-docs/stable/timeseries.html
        for available options.

    Returns
    -------
    ic : pd.DataFrame
        Mean Spearman Rank correlation between factor and provided
        forward price movement windows.
    """

    ic = factor_information_coefficient(factor_data, group_adjust, by_group)

    grouper = []
    if by_time is not None:
        grouper.append(pd.TimeGrouper(by_time))
    if by_group:
        grouper.append('group')

    if len(grouper) == 0:
        ic = pd.DataFrame(ic.mean())

    else:
        ic = (ic.reset_index().set_index('date').groupby(grouper).mean())

    ic.columns = pd.Int64Index(ic.columns)

    return ic
Example #6
0
    def get_icir(self, by_group=False, by_date=True):
        """计算因子的ICIR

        Parameters:
        -----------------------------
        by_group: bool
            是否分组计算ICIR
        by_date: bool
            是否分日期计算
        """
        from alphalens.performance import factor_information_coefficient
        ic_series = factor_information_coefficient(self._cf_and_fr,
                                                   by_group=by_group)
        if (not by_date) and by_group:
            return ic_series.groupby('group').mean()
        elif (not by_date) and (not by_group):
            return ic_series.mean()
        return ic_series
Example #7
0
def get_IC_score(all_data, price):
    IC_score = pd.DataFrame()
    IC_ = pd.DataFrame()
    for factor in all_data.columns:
        single_factor_series = all_data[factor]
        factor_return = utils.get_clean_factor_and_forward_returns(
            single_factor_series, price, max_loss=0.99)
        IC = performance.factor_information_coefficient(factor_return)
        a = IC.iloc[:, 1]
        IC_ = pd.concat([IC_, IC.iloc[:, 1]])
        IC = pd.Series([
            IC.mean()[1],
            len(a[a > 0.02]) / len(a),
            performance.factor_returns(factor_return).iloc[:, 1].mean(),
            IC.mean()[1] / IC.std()[1]
        ])
        IC_score = IC_score.append(IC, ignore_index=True)
    IC_score.columns = [['IC_mean', 'perc_above_0.02', 'average_return', 'IR']]
    IC_score['factor'] = all_data.columns

    return IC_score, IC_
Example #8
0
    def get_factors_ic_df(self,
                          factors_dict,
                          pool,
                          start,
                          end,
                          periods=(1, 5, 10),
                          quantiles=None,
                          bins=None,
                          price=None):
        """
        获取指定周期下的多个因子ic值序列矩阵
        :param factors_dict: 若干因子组成的字典(dict),形式为:
                             {"factor_name_1":factor_1,"factor_name_2":factor_2}
                            每个因子值格式为一个MultiIndex Series,索引(index)为date(level 0)和asset(level 1),
                             包含一列factor值。
        :param pool: 股票池范围(list),如:["000001.XSHE","600300.XSHG",......]
        :param start: 起始时间 (datetime)
        :param end: 结束时间 (datetime)
        :param periods: 指定持有周期(tuple),周期值类型为(int)
        :param quantiles: 根据因子大小将股票池划分的分位数量(int)
        :param price (optional): 包含了pool中所有股票的价格时间序列(pd.Dataframe),索引(index)为datetime,columns为各股票代码,与pool对应。
        :return: ic_df_dict 指定的不同周期下的多个因子ic值序列矩阵所组成的字典(dict), 键为周期(int),值为因子ic值序列矩阵(ic_df)。
                 如:{1:ic_df_1,5:ic_df_5,10:ic_df_10}
                 ic_df(ic值序列矩阵) 类型pd.Dataframe,索引(index)为datetime,columns为各因子名称,与factors_dict中的对应。
                 如:

                           BP	   CFP	   EP	  ILLIQUIDITY	REVS20	   SRMI	   VOL20
                date
                2016-06-24	0.165260	0.002198	0.085632	-0.078074	0.173832	0.214377	0.068445
                2016-06-27	0.165537	0.003583	0.063299	-0.048674	0.180890	0.202724	0.081748
                2016-06-28	0.135215	0.010403	0.059038	-0.034879	0.111691	0.122554	0.042489
                2016-06-29	0.068774	0.019848	0.058476	-0.049971	0.042805	0.053339	0.079592
                2016-06-30	0.039431	0.012271	0.037432	-0.027272	0.010902	0.077293	-0.050667
        """

        from fxdayu_data import DataAPI
        import datetime
        import numpy as np
        from alphalens import utils, performance

        def get_price_data(pool, start, end, max_window=10):
            data = DataAPI.candle(tuple(pool), "D",
                                  start=start - datetime.timedelta(days=max_window),
                                  end=end + datetime.timedelta(days=max_window))
            data = data.replace(to_replace=0, value=np.NaN)
            return data

        if (price is None):
            price_data = get_price_data(pool.tolist(), start, end, max_window=max(periods))
            price = price_data.minor_xs("close")

        ic_dict = {}
        for factor_name in factors_dict.keys():
            factor_value = factors_dict[factor_name]
            # 持股收益-逐只
            stocks_holding_return = utils.get_clean_factor_and_forward_returns(factor_value, price, quantiles=quantiles,
                                                                               bins=bins, periods=periods)
            ic = performance.factor_information_coefficient(stocks_holding_return)
            ic_dict[factor_name] = ic

        # 获取factor_value的时间(index),将用来生成 factors_ic_df 的对应时间(index)
        times = sorted(pd.concat([pd.Series(factors_dict[factor_name].index.levels[0]) for factor_name in factors_dict.keys()]).unique())
        ic_df_dict = {}
        for period in periods:
            ic_table = []
            for factor_name in ic_dict.keys():
                ic_by_period = pd.DataFrame(ic_dict[factor_name][period])
                ic_by_period.columns = [factor_name, ]
                ic_table.append(ic_by_period)
            ic_df_dict[period] = pd.concat(ic_table, axis=1).dropna()
            ic_df_dict[period] = ic_df_dict[period].reindex(times)

        return ic_df_dict
Example #9
0
# #### 1、计算因子值和预期收益之间的基于Spearman Rank Correlation(斯皮尔曼等级相关系数)的IC:
# Computes the Spearman Rank Correlation based Information Coefficient (IC)
# between factor values and N period forward returns for each period in
# the factor index;
#
# factor_data:里面放上面通过utils.get_clean_factor_and_forward_returns的dataframe
#
# by_group:如果是True,会每个组都会计算IC;
#
# group_adjust:在计算IC之前是否对预期收益进行处理
#
# 返回:Spearman Rank correlation between factor and provided forward returns.

IC = performance.factor_information_coefficient(facs_data_analysis,
                                                group_adjust=False,
                                                by_group=True)
IC.head()

# #### 2、Get the mean information coefficient of specified groups.
# #### 获得某段时期或某种分组的平均IC
get_ipython().magic('pinfo performance.mean_information_coefficient')

# factor_data:里面放上面通过utils.get_clean_factor_and_forward_returns的dataframe
#
# by_group:如果是True,计算每组的平均IC
#
# group_adjust:在计算IC之前是否对预期收益进行处理
#
# by_time:(按哪种时间规则计算,1q=1季度,1w=1周)
Example #10
0
"""
import pandas as pd
import numpy as np
import scipy.stats as st
from alphalens import tears, performance, plotting, utils

df = pd.DataFrame([[1, 2], [4, 5]], columns=["A", "B"])

# 计算斯皮尔相关系数Rank IC,取值 [-1, 1]之间
print(st.spearmanr(df["A"], df["B"]))

"""使用alphalens更简易的做因子分析"""
# 输入因子表和收盘价表到返回到期收益率表,再将因子表和到期收益表整合返回综合因子数据表
factor_data = utils.get_clean_factor_and_forward_returns("factor", "price")
# 因子IC的计算
IC = performance.factor_information_coefficient(factor_data)
# 因子时间序列和移动平均图,看出一个因子在时间上的正负性、
plotting.plot_ic_ts(IC)
# 因子分布直方图,IC平均值,标准差
plotting.plot_ic_hist(IC)
# 热力图
mean_monthly_ic = performance.mean_information_coefficient(factor_data, by_time="1m")
plotting.plot_monthly_ic_heatmap(mean_monthly_ic)
# IC分析合集
tears.create_information_tear_sheet(factor_data)

# 收益率分析
tears.create_returns_tear_sheet(factor_data)
# 因子的每一期的收益(因子收益)
performance.factor_returns(factor_data).iloc[:, 0].mean()