def __init__(self, input_position, portfolio_returns, *, benchmark_weight='default'):
        self.pa_position = position(input_position.holding_matrix)
        # 如果传入基准持仓数据,则归因超额收益
        if type(benchmark_weight) != str:
            # 一些情况下benchmark的权重和不为1(一般为差一点),为了防止偏差,这里重新归一化
            # 同时将时间索引控制在回测期间内
            new_benchmark_weight = benchmark_weight.reindex(self.pa_position.holding_matrix.index).\
                apply(lambda x:x if (x==0).all() else x.div(x.sum()), axis=1)
            self.pa_position.holding_matrix = input_position.holding_matrix.sub(new_benchmark_weight, fill_value=0)
            # 提示用户, 归因变成了对超额部分的归因
            print('Note that with benchmark_weight being passed, the performance attribution will be base on the '
                  'active part of the portfolio against the benchmark. Please make sure that the portfolio returns '
                  'you passed to the pa is the corresponding active return! \n')
        elif benchmark_weight == 'default':
            self.pa_position.holding_matrix = input_position.holding_matrix

        # 如果有传入组合收益,则直接用这个组合收益,如果没有则自己计算
        self.port_returns = portfolio_returns

        self.pa_returns = pd.DataFrame()
        self.port_expo = pd.DataFrame()
        self.port_pa_returns = pd.DataFrame()
        self.style_factor_returns = pd.Series()
        self.industry_factor_returns = pd.Series()
        self.country_factor_return = pd.Series()
        self.residual_returns = pd.Series()
        # 业绩归因为基于barra因子的业绩归因
        self.bb = barra_base()

        self.discarded_stocks_num = pd.DataFrame()
        self.discarded_stocks_wgt = pd.DataFrame()
def sf_test_multiple_pools(factor=None, sf_obj=single_factor_strategy(), *, direction='+', bb_obj=None,
                           discard_factor=(), folder_names=None, holding_freq='w', benchmarks=None,
                           stock_pools=('all', 'hs300', 'zz500', 'zz800'), bkt_start=None, bkt_end=None,
                           select_method=0, do_bb_pure_factor=False, do_pa=False, do_active_pa=False,
                           do_data_description=False, do_factor_corr_test=False, loc=-1):
    # 打印当前测试的策略名称
    print('Name Of Strategy Under Test: {0}\n'.format(sf_obj.__class__.__name__))

    cp_adj = data.read_data('ClosePrice_adj')
    temp_position = position(cp_adj)
    # 先要初始化bkt对象
    bkt_obj = backtest(temp_position, bkt_start=bkt_start, bkt_end=bkt_end, buy_cost=0.0015,
                       sell_cost=0.0015, bkt_stock_data=['ClosePrice_adj', 'ClosePrice_adj'])
    # 建立bb对象,否则之后每次循环都要建立一次新的bb对象
    if bb_obj is None:
        bb_obj = barra_base()
    # 外部传入的bb对象,要检测其股票池是否为all,如果不是all,则输出警告,因为可能丢失了数据
    elif bb_obj.bb_data.stock_pool != 'all':
        print('The stockpool of the barra_base obj from outside is NOT "all", be aware of possibile'
              'data loss due to this situation!\n')

    # 根据股票池进行循环
    for cursor, stock_pool in enumerate(stock_pools):
        # 进行当前股票池下的单因子测试
        # 注意bb obj进行了一份深拷贝,这是因为在业绩归因的计算中,会根据不同的股票池丢弃数据,导致数据不全,因此不能传引用
        # 对bkt obj做了同样的处理,尽管这里并不是必要的
        sf_obj.single_factor_test(factor=factor, loc=loc, direction=direction, bkt_obj=copy.deepcopy(bkt_obj),
            base_obj=copy.deepcopy(bb_obj), discard_factor=discard_factor,
            folder_name=folder_names[cursor], bkt_start=bkt_start, bkt_end=bkt_end,
            holding_freq=holding_freq, benchmark=benchmarks[cursor], stock_pool=stock_pool,
            select_method=select_method, do_base_pure_factor=do_bb_pure_factor,
            do_pa=do_pa, do_active_pa=do_active_pa, do_data_description=do_data_description,
            do_factor_corr_test=do_factor_corr_test)
    def prepare_base(self):
        base_all = barra_base(stock_pool='all')
        base_all.base_data.factor_expo = pd.read_hdf('bb_factor_expo_all',
                                                     '123')
        base_all.base_factor_return = data.read_data(['bb_factor_return_all'
                                                      ]).iloc[0]
        base_all.base_data.generate_if_tradable()
        base_all.base_data.handle_stock_pool()

        base_sz50 = barra_base(stock_pool='sz50')
        base_sz50.base_data.factor_expo = pd.read_hdf('bb_factor_expo_sz50',
                                                      '123')
        base_sz50.base_factor_return = data.read_data(
            ['bb_factor_return_sz50']).iloc[0]
        base_sz50.base_data.generate_if_tradable()
        base_sz50.base_data.handle_stock_pool()

        base_hs300 = barra_base(stock_pool='hs300')
        base_hs300.base_data.factor_expo = pd.read_hdf('bb_factor_expo_hs300',
                                                       '123')
        base_hs300.base_factor_return = data.read_data(
            ['bb_factor_return_hs300']).iloc[0]
        base_hs300.base_data.generate_if_tradable()
        base_hs300.base_data.handle_stock_pool()

        base_zz500 = barra_base(stock_pool='zz500')
        base_zz500.base_data.factor_expo = pd.read_hdf('bb_factor_expo_zz500',
                                                       '123')
        base_zz500.base_factor_return = data.read_data(
            ['bb_factor_return_zz500']).iloc[0]
        base_zz500.base_data.generate_if_tradable()
        base_zz500.base_data.handle_stock_pool()

        self.bases = {
            'all': base_all,
            'sz50': base_sz50,
            'hs300': base_hs300,
            'zz500': base_zz500
        }
    def update_base_data(self,
                         end_date=pd.Timestamp(
                             datetime.now().date().strftime('%Y-%m-%d'))):
        # 首先更新数据库数据, 注意这里不会更新持仓数据
        super(pa_report_db, self.db).update_data_from_db(end_date=end_date)
        # 更新base中的数据
        for i in ['all', 'hs300', 'zz500', 'sz50']:
            base = barra_base(stock_pool=i)
            base.update_factor_base_data()
            # 储存因子暴露数据
            base.base_data.factor_expo.to_hdf(
                'bb_factorexpo' + base.filename_appendix, '123')
            # 回归计算因子收益, 并进行储存
            base.get_base_factor_return(if_save=True)

        print('base data has been updated!\n')
Exemple #5
0
 def get_expo_return_data(self, stock_pool):
     base = barra_base(stock_pool=stock_pool)
Exemple #6
0
def sf_test_multiple_pools(factor='default',
                           *,
                           direction='+',
                           bb_obj='Empty',
                           discard_factor=[],
                           holding_freq='m',
                           stock_pools=['all', 'hs300', 'zz500', 'zz800'],
                           bkt_start='default',
                           bkt_end='default',
                           select_method=0,
                           do_bb_pure_factor=False,
                           do_active_bb_pure_factor=False,
                           do_pa=False,
                           do_active_pa=False,
                           do_data_description=False):
    # 如果传入的是str,则将其传入到single factor test中去让其自己处理,如果是dataframe,则直接传入因子
    # 注意:这里的因子数据并不储存到self.strategy_data.factor中,因为循环股票池会丢失数据
    if type(factor) != str:
        factor_data = data.read_data([factor], [factor], shift=True)
        factor = factor_data[factor]
        # 初始化一个持仓对象,以用来初始化backtest对象,索引以factor为标准
        temp_position = position(factor)
    else:
        # 如过传入的是string, 则读取closeprice_adj来初始化backtest对象
        cp_adj = data.read_data(['ClosePrice_adj'])
        cp_adj = cp_adj['ClosePrice_adj']
        temp_position = position(cp_adj)

    # 先要初始化bkt对象
    bkt_obj = backtest(temp_position,
                       bkt_start=bkt_start,
                       bkt_end=bkt_end,
                       buy_cost=1.5 / 1000,
                       sell_cost=1.5 / 1000)
    # 建立bb对象,否则之后每次循环都要建立一次新的bb对象
    if bb_obj == 'Empty':
        bb_obj = barra_base()
        bb_obj.just_get_sytle_factor()
    # 外部传入的bb对象,要检测其股票池是否为all,如果不是all,则输出警告,因为可能丢失了数据
    elif bb_obj.bb_data.stock_pool != 'all':
        print(
            'The stockpool of the barra_base obj from outside is NOT "all", be aware of possibile'
            'data loss due to this situation!\n')

    # 根据股票池进行循环
    for stock_pool in stock_pools:
        # 建立单因子测试对象
        # curr_sf = single_factor_strategy()
        from analyst_coverage import analyst_coverage
        curr_sf = analyst_coverage()

        # 进行当前股票池下的单因子测试
        # 注意bb obj进行了一份深拷贝,这是因为在业绩归因的计算中,会根据不同的股票池丢弃数据,导致数据不全,因此不能传引用
        # 对bkt obj做了同样的处理,尽管这里并不是必要的
        curr_sf.single_factor_test(
            factor=factor,
            direction=direction,
            bkt_obj=copy.deepcopy(bkt_obj),
            bb_obj=copy.deepcopy(bb_obj),
            discard_factor=discard_factor,
            bkt_start=bkt_start,
            bkt_end=bkt_end,
            holding_freq=holding_freq,
            stock_pool=stock_pool,
            select_method=select_method,
            do_bb_pure_factor=do_bb_pure_factor,
            do_active_bb_pure_factor=do_active_bb_pure_factor,
            do_pa=do_pa,
            do_active_pa=do_active_pa,
            do_data_description=do_data_description)
def sf_test_multiple_pools_parallel(factor=None,
                                    *,
                                    direction='+',
                                    bb_obj=None,
                                    discard_factor=(),
                                    folder_name=None,
                                    stock_pools=('all', 'hs300', 'zz500',
                                                 'zz800'),
                                    bkt_start=None,
                                    bkt_end=None,
                                    select_method=0,
                                    do_bb_pure_factor=False,
                                    do_pa=False,
                                    do_factor_corr_test=False,
                                    do_active_pa=False,
                                    holding_freq='w',
                                    do_data_description=False,
                                    loc=-1):
    cp_adj = data.read_data(['ClosePrice_adj'])
    cp_adj = cp_adj['ClosePrice_adj']
    temp_position = position(cp_adj)

    # 先要初始化bkt对象
    bkt_obj = backtest(temp_position,
                       bkt_start=bkt_start,
                       bkt_end=bkt_end,
                       buy_cost=1.5 / 1000,
                       sell_cost=1.5 / 1000)
    # 建立bb对象,否则之后每次循环都要建立一次新的bb对象
    if bb_obj is None:
        bb_obj = barra_base()
    # 外部传入的bb对象,要检测其股票池是否为all,如果不是all,则输出警告,因为可能丢失了数据
    elif bb_obj.bb_data.stock_pool != 'all':
        print(
            'The stockpool of the barra_base obj from outside is NOT "all", be aware of possibile'
            'data loss due to this situation!\n')

    def single_task(stock_pool):
        # curr_sf = single_factor_strategy()
        from intangible_info import intangible_info_earnings
        curr_sf = intangible_info_earnings()

        # 进行当前股票池下的单因子测试
        # 注意bb obj进行了一份深拷贝,这是因为在业绩归因的计算中,会根据不同的股票池丢弃数据,导致数据不全,因此不能传引用
        # 对bkt obj做了同样的处理,这是因为尽管bkt obj不会被改变,但是多进程同时操作可能出现潜在的问题
        curr_sf.single_factor_test(stock_pool=stock_pool,
                                   factor=factor,
                                   loc=loc,
                                   direction=direction,
                                   folder_name=folder_name,
                                   bkt_obj=copy.deepcopy(bkt_obj),
                                   base_obj=copy.deepcopy(bb_obj),
                                   discard_factor=discard_factor,
                                   bkt_start=bkt_start,
                                   bkt_end=bkt_end,
                                   select_method=select_method,
                                   do_base_pure_factor=do_bb_pure_factor,
                                   holding_freq=holding_freq,
                                   do_pa=do_pa,
                                   do_active_pa=do_active_pa,
                                   do_data_description=do_data_description,
                                   do_factor_corr_test=do_factor_corr_test)

    import multiprocessing as mp
    mp.set_start_method('fork')
    # 根据股票池进行循环
    for stock_pool in stock_pools:
        p = mp.Process(target=single_task, args=(stock_pool, ))
        p.start()
Exemple #8
0
    #
    # # 取股票的收益
    # stock_return = universe.pivot_table(index='Barrid', values='Signal').div(100)
    # # stock_return = stock_return[0:50]
    # # 取残余收益
    # spec_risk = AssetData.pivot_table(index='!Barrid', values='SpecRisk%').reindex(stock_return.index)
    # spec_var = (spec_risk/100)**2
    # # 取因子暴露
    # factor_expo = AssetExpo.pivot_table(index='!Barrid', columns='Factor', values='Exposure').\
    #     reindex(stock_return.index).fillna(0.0).T
    # # 取因子协方差矩阵
    # factor_cov1 = Covariance.pivot_table(index='!Factor1', columns='Factor2', values='VarCovar')
    # factor_cov2 = Covariance.pivot_table(index='Factor2', columns='!Factor1', values='VarCovar')
    # factor_cov = factor_cov1.where(factor_cov1.notnull(), factor_cov2).div(10000)

    bb = barra_base()
    # bb.base_data.stock_pool = 'hs300'
    # bb.construct_factor_base()
    # bb.base_data.factor_expo.to_hdf('bb_factorexpo_hs300', '123')

    factor_cov = pd.read_hdf('bb_factor_eigencovmat_hs300_sf3', '123')
    bb.base_data.factor_expo = pd.read_hdf('bb_factorexpo_hs300', '123')
    spec_vol = pd.read_hdf('bb_factor_vraspecvol_hs300', '123')
    spec_var = spec_vol**2
    stock_return = pd.read_hdf('stock_alpha_hs300', '123')

    bench = data.read_data(['Weight_hs300'])
    bench = bench['Weight_hs300']

    # factor_cov = pd.read_hdf('barra_fore_cov_mat', '123')
    # bb.base_data.factor_expo = pd.read_hdf('barra_factor_expo_new', '123')