def __init__(self, input_position, portfolio_returns, *, benchmark_weight='default'): self.pa_position = position(input_position.holding_matrix) # 如果传入基准持仓数据,则归因超额收益 if type(benchmark_weight) != str: # 一些情况下benchmark的权重和不为1(一般为差一点),为了防止偏差,这里重新归一化 # 同时将时间索引控制在回测期间内 new_benchmark_weight = benchmark_weight.reindex(self.pa_position.holding_matrix.index).\ apply(lambda x:x if (x==0).all() else x.div(x.sum()), axis=1) self.pa_position.holding_matrix = input_position.holding_matrix.sub(new_benchmark_weight, fill_value=0) # 提示用户, 归因变成了对超额部分的归因 print('Note that with benchmark_weight being passed, the performance attribution will be base on the ' 'active part of the portfolio against the benchmark. Please make sure that the portfolio returns ' 'you passed to the pa is the corresponding active return! \n') elif benchmark_weight == 'default': self.pa_position.holding_matrix = input_position.holding_matrix # 如果有传入组合收益,则直接用这个组合收益,如果没有则自己计算 self.port_returns = portfolio_returns self.pa_returns = pd.DataFrame() self.port_expo = pd.DataFrame() self.port_pa_returns = pd.DataFrame() self.style_factor_returns = pd.Series() self.industry_factor_returns = pd.Series() self.country_factor_return = pd.Series() self.residual_returns = pd.Series() # 业绩归因为基于barra因子的业绩归因 self.bb = barra_base() self.discarded_stocks_num = pd.DataFrame() self.discarded_stocks_wgt = pd.DataFrame()
def sf_test_multiple_pools(factor=None, sf_obj=single_factor_strategy(), *, direction='+', bb_obj=None, discard_factor=(), folder_names=None, holding_freq='w', benchmarks=None, stock_pools=('all', 'hs300', 'zz500', 'zz800'), bkt_start=None, bkt_end=None, select_method=0, do_bb_pure_factor=False, do_pa=False, do_active_pa=False, do_data_description=False, do_factor_corr_test=False, loc=-1): # 打印当前测试的策略名称 print('Name Of Strategy Under Test: {0}\n'.format(sf_obj.__class__.__name__)) cp_adj = data.read_data('ClosePrice_adj') temp_position = position(cp_adj) # 先要初始化bkt对象 bkt_obj = backtest(temp_position, bkt_start=bkt_start, bkt_end=bkt_end, buy_cost=0.0015, sell_cost=0.0015, bkt_stock_data=['ClosePrice_adj', 'ClosePrice_adj']) # 建立bb对象,否则之后每次循环都要建立一次新的bb对象 if bb_obj is None: bb_obj = barra_base() # 外部传入的bb对象,要检测其股票池是否为all,如果不是all,则输出警告,因为可能丢失了数据 elif bb_obj.bb_data.stock_pool != 'all': print('The stockpool of the barra_base obj from outside is NOT "all", be aware of possibile' 'data loss due to this situation!\n') # 根据股票池进行循环 for cursor, stock_pool in enumerate(stock_pools): # 进行当前股票池下的单因子测试 # 注意bb obj进行了一份深拷贝,这是因为在业绩归因的计算中,会根据不同的股票池丢弃数据,导致数据不全,因此不能传引用 # 对bkt obj做了同样的处理,尽管这里并不是必要的 sf_obj.single_factor_test(factor=factor, loc=loc, direction=direction, bkt_obj=copy.deepcopy(bkt_obj), base_obj=copy.deepcopy(bb_obj), discard_factor=discard_factor, folder_name=folder_names[cursor], bkt_start=bkt_start, bkt_end=bkt_end, holding_freq=holding_freq, benchmark=benchmarks[cursor], stock_pool=stock_pool, select_method=select_method, do_base_pure_factor=do_bb_pure_factor, do_pa=do_pa, do_active_pa=do_active_pa, do_data_description=do_data_description, do_factor_corr_test=do_factor_corr_test)
def prepare_base(self): base_all = barra_base(stock_pool='all') base_all.base_data.factor_expo = pd.read_hdf('bb_factor_expo_all', '123') base_all.base_factor_return = data.read_data(['bb_factor_return_all' ]).iloc[0] base_all.base_data.generate_if_tradable() base_all.base_data.handle_stock_pool() base_sz50 = barra_base(stock_pool='sz50') base_sz50.base_data.factor_expo = pd.read_hdf('bb_factor_expo_sz50', '123') base_sz50.base_factor_return = data.read_data( ['bb_factor_return_sz50']).iloc[0] base_sz50.base_data.generate_if_tradable() base_sz50.base_data.handle_stock_pool() base_hs300 = barra_base(stock_pool='hs300') base_hs300.base_data.factor_expo = pd.read_hdf('bb_factor_expo_hs300', '123') base_hs300.base_factor_return = data.read_data( ['bb_factor_return_hs300']).iloc[0] base_hs300.base_data.generate_if_tradable() base_hs300.base_data.handle_stock_pool() base_zz500 = barra_base(stock_pool='zz500') base_zz500.base_data.factor_expo = pd.read_hdf('bb_factor_expo_zz500', '123') base_zz500.base_factor_return = data.read_data( ['bb_factor_return_zz500']).iloc[0] base_zz500.base_data.generate_if_tradable() base_zz500.base_data.handle_stock_pool() self.bases = { 'all': base_all, 'sz50': base_sz50, 'hs300': base_hs300, 'zz500': base_zz500 }
def update_base_data(self, end_date=pd.Timestamp( datetime.now().date().strftime('%Y-%m-%d'))): # 首先更新数据库数据, 注意这里不会更新持仓数据 super(pa_report_db, self.db).update_data_from_db(end_date=end_date) # 更新base中的数据 for i in ['all', 'hs300', 'zz500', 'sz50']: base = barra_base(stock_pool=i) base.update_factor_base_data() # 储存因子暴露数据 base.base_data.factor_expo.to_hdf( 'bb_factorexpo' + base.filename_appendix, '123') # 回归计算因子收益, 并进行储存 base.get_base_factor_return(if_save=True) print('base data has been updated!\n')
def get_expo_return_data(self, stock_pool): base = barra_base(stock_pool=stock_pool)
def sf_test_multiple_pools(factor='default', *, direction='+', bb_obj='Empty', discard_factor=[], holding_freq='m', stock_pools=['all', 'hs300', 'zz500', 'zz800'], bkt_start='default', bkt_end='default', select_method=0, do_bb_pure_factor=False, do_active_bb_pure_factor=False, do_pa=False, do_active_pa=False, do_data_description=False): # 如果传入的是str,则将其传入到single factor test中去让其自己处理,如果是dataframe,则直接传入因子 # 注意:这里的因子数据并不储存到self.strategy_data.factor中,因为循环股票池会丢失数据 if type(factor) != str: factor_data = data.read_data([factor], [factor], shift=True) factor = factor_data[factor] # 初始化一个持仓对象,以用来初始化backtest对象,索引以factor为标准 temp_position = position(factor) else: # 如过传入的是string, 则读取closeprice_adj来初始化backtest对象 cp_adj = data.read_data(['ClosePrice_adj']) cp_adj = cp_adj['ClosePrice_adj'] temp_position = position(cp_adj) # 先要初始化bkt对象 bkt_obj = backtest(temp_position, bkt_start=bkt_start, bkt_end=bkt_end, buy_cost=1.5 / 1000, sell_cost=1.5 / 1000) # 建立bb对象,否则之后每次循环都要建立一次新的bb对象 if bb_obj == 'Empty': bb_obj = barra_base() bb_obj.just_get_sytle_factor() # 外部传入的bb对象,要检测其股票池是否为all,如果不是all,则输出警告,因为可能丢失了数据 elif bb_obj.bb_data.stock_pool != 'all': print( 'The stockpool of the barra_base obj from outside is NOT "all", be aware of possibile' 'data loss due to this situation!\n') # 根据股票池进行循环 for stock_pool in stock_pools: # 建立单因子测试对象 # curr_sf = single_factor_strategy() from analyst_coverage import analyst_coverage curr_sf = analyst_coverage() # 进行当前股票池下的单因子测试 # 注意bb obj进行了一份深拷贝,这是因为在业绩归因的计算中,会根据不同的股票池丢弃数据,导致数据不全,因此不能传引用 # 对bkt obj做了同样的处理,尽管这里并不是必要的 curr_sf.single_factor_test( factor=factor, direction=direction, bkt_obj=copy.deepcopy(bkt_obj), bb_obj=copy.deepcopy(bb_obj), discard_factor=discard_factor, bkt_start=bkt_start, bkt_end=bkt_end, holding_freq=holding_freq, stock_pool=stock_pool, select_method=select_method, do_bb_pure_factor=do_bb_pure_factor, do_active_bb_pure_factor=do_active_bb_pure_factor, do_pa=do_pa, do_active_pa=do_active_pa, do_data_description=do_data_description)
def sf_test_multiple_pools_parallel(factor=None, *, direction='+', bb_obj=None, discard_factor=(), folder_name=None, stock_pools=('all', 'hs300', 'zz500', 'zz800'), bkt_start=None, bkt_end=None, select_method=0, do_bb_pure_factor=False, do_pa=False, do_factor_corr_test=False, do_active_pa=False, holding_freq='w', do_data_description=False, loc=-1): cp_adj = data.read_data(['ClosePrice_adj']) cp_adj = cp_adj['ClosePrice_adj'] temp_position = position(cp_adj) # 先要初始化bkt对象 bkt_obj = backtest(temp_position, bkt_start=bkt_start, bkt_end=bkt_end, buy_cost=1.5 / 1000, sell_cost=1.5 / 1000) # 建立bb对象,否则之后每次循环都要建立一次新的bb对象 if bb_obj is None: bb_obj = barra_base() # 外部传入的bb对象,要检测其股票池是否为all,如果不是all,则输出警告,因为可能丢失了数据 elif bb_obj.bb_data.stock_pool != 'all': print( 'The stockpool of the barra_base obj from outside is NOT "all", be aware of possibile' 'data loss due to this situation!\n') def single_task(stock_pool): # curr_sf = single_factor_strategy() from intangible_info import intangible_info_earnings curr_sf = intangible_info_earnings() # 进行当前股票池下的单因子测试 # 注意bb obj进行了一份深拷贝,这是因为在业绩归因的计算中,会根据不同的股票池丢弃数据,导致数据不全,因此不能传引用 # 对bkt obj做了同样的处理,这是因为尽管bkt obj不会被改变,但是多进程同时操作可能出现潜在的问题 curr_sf.single_factor_test(stock_pool=stock_pool, factor=factor, loc=loc, direction=direction, folder_name=folder_name, bkt_obj=copy.deepcopy(bkt_obj), base_obj=copy.deepcopy(bb_obj), discard_factor=discard_factor, bkt_start=bkt_start, bkt_end=bkt_end, select_method=select_method, do_base_pure_factor=do_bb_pure_factor, holding_freq=holding_freq, do_pa=do_pa, do_active_pa=do_active_pa, do_data_description=do_data_description, do_factor_corr_test=do_factor_corr_test) import multiprocessing as mp mp.set_start_method('fork') # 根据股票池进行循环 for stock_pool in stock_pools: p = mp.Process(target=single_task, args=(stock_pool, )) p.start()
# # # 取股票的收益 # stock_return = universe.pivot_table(index='Barrid', values='Signal').div(100) # # stock_return = stock_return[0:50] # # 取残余收益 # spec_risk = AssetData.pivot_table(index='!Barrid', values='SpecRisk%').reindex(stock_return.index) # spec_var = (spec_risk/100)**2 # # 取因子暴露 # factor_expo = AssetExpo.pivot_table(index='!Barrid', columns='Factor', values='Exposure').\ # reindex(stock_return.index).fillna(0.0).T # # 取因子协方差矩阵 # factor_cov1 = Covariance.pivot_table(index='!Factor1', columns='Factor2', values='VarCovar') # factor_cov2 = Covariance.pivot_table(index='Factor2', columns='!Factor1', values='VarCovar') # factor_cov = factor_cov1.where(factor_cov1.notnull(), factor_cov2).div(10000) bb = barra_base() # bb.base_data.stock_pool = 'hs300' # bb.construct_factor_base() # bb.base_data.factor_expo.to_hdf('bb_factorexpo_hs300', '123') factor_cov = pd.read_hdf('bb_factor_eigencovmat_hs300_sf3', '123') bb.base_data.factor_expo = pd.read_hdf('bb_factorexpo_hs300', '123') spec_vol = pd.read_hdf('bb_factor_vraspecvol_hs300', '123') spec_var = spec_vol**2 stock_return = pd.read_hdf('stock_alpha_hs300', '123') bench = data.read_data(['Weight_hs300']) bench = bench['Weight_hs300'] # factor_cov = pd.read_hdf('barra_fore_cov_mat', '123') # bb.base_data.factor_expo = pd.read_hdf('barra_factor_expo_new', '123')