def update_holdng_tar_data(self, *, end_date=None): self.is_update = True # 读取旧的目标持仓数据 old_tar_holding_vol = data.read_data('tar_holding_vol', folder_name='') old_tar_position = data.read_data('tar_position', folder_name='') # 寻找最后一天 last_day = old_tar_holding_vol.major_axis[-1] self.start_date = last_day if isinstance(end_date, pd.Timestamp): self.end_date = end_date # Initialize database engines and get tradingdays and labels # since when you call this function, it usually means you didn't call update_data_from_db() self.get_trading_days() self.get_labels() # 计算更新时间段的目标持仓数据 self.get_tar_holding_vol() self.get_tar_position() # 将旧数据中的股票数据重索引成新数据中的股票数据 old_tar_holding_vol = old_tar_holding_vol.reindex(minor_axis=self.tar_holding_vol.minor_axis, fill_value=0) old_tar_position = old_tar_position.reindex(minor_axis=self.tar_position.minor_axis, fill_value=0) # 将新旧数据衔接 new_tar_holding_vol = pd.concat([old_tar_holding_vol.drop(self.start_date, axis=1).sort_index(), self.tar_holding_vol.sort_index()], axis=1) new_tar_position = pd.concat([old_tar_position.drop(self.start_date, axis=1).sort_index(), self.tar_position.sort_index()], axis=1) # 储存新数据 data.write_data(new_tar_holding_vol, file_name='tar_holding_vol', folder_name='') data.write_data(new_tar_position, file_name='tar_position', folder_name='') # 重置标记 self.is_update = False
def get_discount_factor(self): # 读取beta beta = data.read_data(['beta']) beta = beta['beta'] # 读取行业数据 indus = data.read_data(['Industry']) indus = indus['Industry'] # 计算每天的行业平均beta indus_mean_beta = pd.DataFrame(np.nan, index=beta.index, columns=indus.iloc[-1, :].unique()) for cursor, time in enumerate(beta.index): curr_beta = beta.ix[time, :] curr_indus = indus.ix[time, :] indus_mean_beta.ix[time, :] = curr_beta.groupby(curr_indus).mean() # 将每天的行业平均beta根据每支股票的行业分配到每支股票上去 stocks_with_indus_mean_beta = beta * np.nan for cursor, time in enumerate(beta.index): if time >= pd.Timestamp('2009-04-01'): curr_indus = indus.ix[time, :] curr_indus_mean_beta = indus_mean_beta.ix[time, :] stocks_with_indus_mean_beta.ix[time, :] = curr_indus.replace(curr_indus_mean_beta.to_dict()) pass # 由于使用的是日beta, 因此算一个过去252个交易日的平均beta作为年beta stocks_with_indus_mean_beta_annual = stocks_with_indus_mean_beta.rolling( 252, min_periods=63).apply(lambda x:np.nanmean(x)) # 暂时设无风险利率为0 # 论文中, 市场的超额收益直接设定在了6%的常数, 这个的合理性还有待检验 # 还可以考虑使用历史数据得出市场超额收益等, 现在先暂时使用6%这个常数 discount_factor = stocks_with_indus_mean_beta_annual * 0.06 self.discount_factor = discount_factor pass
def construct_factor(self): price_data = data.read_data(['ClosePrice_adj', 'OpenPrice_adj', 'vwap_adj'], ['ClosePrice_adj', 'OpenPrice_adj', 'vwap_adj'], shift=True) ret = np.log(price_data['ClosePrice_adj'] / price_data['ClosePrice_adj'].shift(1)) ret = ret.fillna(0) exp_w = barra_base.construct_expo_weights(126, 504) mom = ret.rolling(504).apply(lambda x: (x * exp_w).sum()) # mom = data.read_data(['runner_value_8'], shift=True) # mom = - mom['runner_value_8'] bb = data.read_data(['rv', 'liquidity', 'lncap', 'runner_value_36'], shift=True) # bb = data.read_data(['runner_value_36'], shift=True) # 过滤数据 self.strategy_data.handle_stock_pool() mom = mom.where(self.strategy_data.if_tradable.ix['if_inv'], np.nan) for item, df in bb.iteritems(): bb[item] = df.where(self.strategy_data.if_tradable.ix['if_inv'], np.nan) # 进行回归 # orth_mom = strategy_data.simple_orth_gs(mom, bb) # orth_mom = - orth_mom[0] self.strategy_data.factor = pd.Panel({'mom':-mom})
def handle_stock_pool(self, *, shift=False): # 如果未设置股票池 if self.stock_pool == 'all': self.if_tradable['if_inpool'] = True # 设置了股票池,若已存在benchmark中的weight,则直接使用 elif 'Weight_' + self.stock_pool in self.benchmark_price.items: self.if_tradable['if_inpool'] = self.benchmark_price.ix[ 'Weight_' + self.stock_pool] > 0 # 若不在,则读取weight数据,文件名即为stock_pool else: temp_weights = data.read_data(['Weight_' + self.stock_pool], ['Weight_' + self.stock_pool], shift=shift) if self.benchmark_price.empty: self.benchmark_price = temp_weights else: self.benchmark_price['Weight_' + self.stock_pool] = temp_weights[ 'Weight_' + self.stock_pool] self.if_tradable['if_inpool'] = self.benchmark_price.ix[ 'Weight_' + self.stock_pool] > 0 # 若还没有if_tradable,报错 assert 'if_tradable' in self.if_tradable.items, 'Please generate if_tradable first!' # 新建一个if_inv,表明在股票池中,且可以交易 # 在if_tradable中为true,且在if_inpool中为true,才可投资,即在if_inv中为true self.if_tradable['if_inv'] = np.logical_and( self.if_tradable.ix['if_tradable'], self.if_tradable.ix['if_inpool'])
def get_growth(self): if os.path.isfile('growth.csv') and not self.is_update: growth = data.read_data(['growth'], ['growth']) self.bb_data.factor['growth'] = growth.ix['growth'] else: self.get_g_egrlf() self.get_g_egrsf() self.get_g_egro() self.get_g_sgro() self.bb_data.discard_uninv_data() # 计算四个成分因子的暴露 self.bb_data.raw_data[ 'egrlf_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['egrlf'], self.bb_data.stock_price.ix['FreeMarketValue']) self.bb_data.raw_data[ 'egrsf_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['egrsf'], self.bb_data.stock_price.ix['FreeMarketValue']) self.bb_data.raw_data[ 'egro_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['egro'], self.bb_data.stock_price.ix['FreeMarketValue']) self.bb_data.raw_data[ 'sgro_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['sgro'], self.bb_data.stock_price.ix['FreeMarketValue']) growth = 0.18*self.bb_data.raw_data.ix['egrlf_expo']+0.11*self.bb_data.raw_data.ix['egrsf_expo']+ \ 0.24*self.bb_data.raw_data.ix['egro_expo']+0.47*self.bb_data.raw_data.ix['sgro_expo'] self.bb_data.factor['growth'] = growth
def handle_stock_pool(self, *, shift=False): # 如果未设置股票池 if self.stock_pool == 'all': self.if_tradable['if_inpool'] = True # 设置了股票池,若已存在benchmark中的weight,则直接使用 elif 'Weight_' + self.stock_pool in self.benchmark_price.items: self.if_tradable['if_inpool'] = self.benchmark_price.ix[ 'Weight_' + self.stock_pool] > 0 # 若不在,则读取weight数据,文件名即为stock_pool else: temp_weights = data.read_data(['Weight_' + self.stock_pool], ['Weight_' + self.stock_pool], shift=shift).fillna(0.0) if self.benchmark_price.empty: self.benchmark_price = temp_weights else: self.benchmark_price['Weight_' + self.stock_pool] = temp_weights[ 'Weight_' + self.stock_pool] # 由于指数权重数据会跟1有一点点偏离, 因此要将其归一化 self.benchmark_price['Weight_'+self.stock_pool] = self.benchmark_price['Weight_'+self.stock_pool]. \ apply(position.to_percentage_func, axis=1) # 指数权重大于0的股票, 即为在指数内的股票 self.if_tradable['if_inpool'] = self.benchmark_price.ix[ 'Weight_' + self.stock_pool] > 0 # 若还没有if_tradable,则生成if_tradable if 'if_tradable' not in self.if_tradable.items: self.generate_if_tradable(shift=shift) # 新建一个if_inv,表明在股票池中,且可以交易 # 在if_tradable中为true,且在if_inpool中为true,才可投资,即在if_inv中为true self.if_tradable['if_inv'] = np.logical_and( self.if_tradable.ix['if_tradable'], self.if_tradable.ix['if_inpool'])
def get_short_reversal(self): if os.path.isfile(os.path.abspath('.')+'/ResearchData/short_rev'+self.filename_appendix) \ and not self.is_update and self.try_to_read: self.base_data.factor['short_rev'] = data.read_data( ['short_rev' + self.filename_appendix], item_name=['short_rev']) # 没有就进行计算 else: # rolling求sum, 21个交易日, 半衰期为10天 exponential_weights = strategy_data.construct_expo_weights(10, 21) # 定义reversal的函数 def func_rev(df, *, weights): iweights = pd.Series(weights, index=df.index) # 将权重乘在原始数据上, 然后加和计算reversal weighted_return = strategy_data.multiply_weights( df, iweights, multiply_power=1.0) rev = weighted_return.sum(0) # 设定阈值, 表示至少过去21个交易日中有多少数据才能有momentum因子 threshold_condition = df.notnull().sum(0) >= 5 rev = rev.where(threshold_condition, np.nan) return rev reversal = self.base_data.stock_price.ix[ 'daily_excess_log_return'] * np.nan for cursor, date in enumerate(self.complete_base_data.stock_price. \ ix['daily_excess_log_return'].index): # 至少第21期才开始计算 if cursor < 20: continue curr_data = self.complete_base_data.stock_price.ix[ 'daily_excess_log_return', cursor - 20:cursor + 1, :] temp = func_rev(curr_data, weights=exponential_weights) reversal.ix[cursor, :] = temp self.base_data.factor['short_rev'] = reversal
def get_earnings_yeild(self): if os.path.isfile('ey.csv') and not self.is_update: EarningsYield = data.read_data(['ey'], ['ey']) self.bb_data.factor['ey'] = EarningsYield.ix['ey'] else: self.get_ey_epfwd() self.get_ey_cetop() self.get_ey_etop() self.bb_data.discard_uninv_data() # 计算三个成分因子的暴露 self.bb_data.raw_data[ 'epfwd_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['epfwd'], self.bb_data.stock_price.ix['FreeMarketValue']) self.bb_data.raw_data[ 'cetop_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['cetop'], self.bb_data.stock_price.ix['FreeMarketValue']) self.bb_data.raw_data[ 'etop_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['etop'], self.bb_data.stock_price.ix['FreeMarketValue']) EarningsYield = 0.68*self.bb_data.raw_data.ix['epfwd_expo']+0.21*self.bb_data.raw_data.ix['cetop_expo']+ \ 0.11*self.bb_data.raw_data.ix['etop_expo'] self.bb_data.factor['ey'] = EarningsYield
def get_industry_factor(self): # 读取行业信息数据 industry = data.read_data(['Industry'], ['Industry']) self.industry = industry.ix['Industry'] # 对第一个拥有所有行业的日期取虚拟变量,以建立储存数据的panel industry_num = self.industry.apply(lambda x: x.unique().size, axis=1) # 注意所有行业28个,加上nan有29个 first_valid_index = industry_num[industry_num == 29].index[0] temp_dum = pd.get_dummies(self.industry.ix[first_valid_index], prefix='Industry') industry_dummies = pd.Panel(data=None, major_axis=temp_dum.index, minor_axis=temp_dum.columns) # 开始循环 for time, ind_data in self.industry.iterrows(): industry_dummies[time] = pd.get_dummies(ind_data, prefix='Industry') # 转置 industry_dummies = industry_dummies.transpose(2, 0, 1) # 将行业因子暴露与风格因子暴露的索引对其 industry_dummies = data.align_index(self.bb_data.factor_expo.ix[0], industry_dummies) # 将nan填成0,主要是有些行业在某一时间点,没有一只股票属于它,这会造成在这个行业上的暴露是nan # 因此需要把这个行业的暴露填成0,而uninv的nan同样会被填上,但会在之后的filter中再次变成nan industry_dummies = industry_dummies.fillna(0) # 将行业因子暴露与风格因子暴露衔接在一起 self.bb_data.factor_expo = pd.concat( [self.bb_data.factor_expo, industry_dummies])
def prepare_benchmark(self): self.benchmarks = data.read_data( ['Weihgt_sz50', 'Weight_hs300', 'Weight_zz500']) # 归一化 for i, df in self.benchmarks.iteritems(): self.benchmarks[i] = df.div(df.sum(1), axis=0) self.benchmarks.fillna(0.0)
def get_ey_epfwd(self): if os.path.isfile('epfwd.csv') and not self.is_update: epfwd = data.read_data(['epfwd'], ['epfwd']) epfwd = epfwd.ix['epfwd'] else: # 定义计算epfwd的函数 def epfwd_func(fy1_data, fy2_data): # 获取当前的月份数 curr_month = fy1_data.index.month # 获取fy1数据与fy2数据的权重,注意:财年是以4月份结束的 # 因此5月份时,全部用fy1数据,其权重为1,fy2权重为0 # 4月份时,fy1权重为1/12, fy2权重为11/12 # 6月份时,fy1权重为11/12,fy2权重为1/12 # 当前月份与5月的差距 diff_month = curr_month - 5 fy1_weight = np.where(diff_month >= 0, (12 - diff_month) / 12, -diff_month / 12) # fy1_weight为一个ndarray,将它改为series fy1_weight = pd.Series(fy1_weight, index=fy1_data.index) fy2_weight = 1 - fy1_weight return (fy1_data.mul(fy1_weight, axis=0) + fy2_data.mul(fy2_weight, axis=0)) # 用预测的净利润数据除以市值数据得到预测的ep ep_fy1 = self.bb_data.raw_data.ix[ 'NetIncome_fy1'] / self.bb_data.stock_price.ix[ 'FreeMarketValue'] ep_fy2 = self.bb_data.raw_data.ix[ 'NetIncome_fy2'] / self.bb_data.stock_price.ix[ 'FreeMarketValue'] epfwd = epfwd_func(ep_fy1, ep_fy2) self.bb_data.raw_data['epfwd'] = epfwd
def get_pa_return(self, *, discard_factor=[], enable_reading_pa_return=True): # 如果有储存的因子收益, 且没有被丢弃的因子, 则读取储存在本地的因子 if os.path.isfile('bb_factor_return_'+self.bb.bb_data.stock_pool+'.csv') and \ len(discard_factor) == 0 and enable_reading_pa_return: bb_factor_return = data.read_data(['bb_factor_return_'+self.bb.bb_data.stock_pool], ['pa_returns']) self.pa_returns = bb_factor_return['pa_returns'] print('Barra base factor returns successfully read from local files! \n') else: # 将被删除的风格因子的暴露全部设置为0 self.bb.bb_data.factor_expo.ix[discard_factor, :, :] = 0 # 再次将不能交易的值设置为nan self.bb.bb_data.discard_uninv_data() # 建立储存因子收益的dataframe self.pa_returns = pd.DataFrame(0, index=self.bb.bb_data.factor_expo.major_axis, columns = self.bb.bb_data.factor_expo.items) # 计算barra base因子的因子收益 self.bb.get_bb_factor_return() # barra base因子的因子收益即是归因的因子收益 self.pa_returns = self.bb.bb_factor_return # 将回归得到的因子收益储存在本地, 每次更新了新的数据都要重新回归后储存一次 self.pa_returns.to_csv('bb_factor_return_'+self.bb.bb_data.stock_pool+'.csv', index_label='datetime', na_rep='NaN', encoding='GB18030') # 将pa_returns的时间轴改为业绩归因的时间轴(而不是bb的时间轴) self.pa_returns = self.pa_returns.reindex(self.pa_position.holding_matrix.index)
def get_rv_cmra(self): if os.path.isfile('cmra.csv') and not self.is_update: cmra = data.read_data(['cmra'], ['cmra']) cmra = cmra.ix['cmra'] else: # 定义需要cmra的函数,这个函数计算252个交易日中的cmra def func_cmra(df): # 累计收益率 cum_df = df.cumsum(axis=0) # 取每月的累计收益率 months = np.arange(20, 252, 21) months_cum_df = cum_df.ix[months] z_max = months_cum_df.max(axis=0) z_min = months_cum_df.min(axis=0) # # 避免出现log函数中出现非正参数 # z_min[z_min <= -1] = -0.9999 # return np.log(1+z_max)-np.log(1+z_min) # 为避免出现z_min<=-1调整后的极端值,cmra改为z_max-z_min # 注意:改变后并未改变因子排序,而是将因子原本的scale变成了exp(scale) return z_max - z_min cmra = self.bb_data.stock_price.ix['daily_excess_return'] * np.nan for cursor, date in enumerate( self.bb_data.stock_price.ix['daily_excess_return'].index): # 至少252期才开始计算 if cursor <= 250: continue curr_data = self.bb_data.stock_price.ix['daily_excess_return', cursor - 251:cursor + 1, :] temp = func_cmra(curr_data) cmra.ix[cursor, :] = temp self.bb_data.raw_data['cmra'] = cmra
def get_rv_dastd(self): if os.path.isfile('dastd.csv') and not self.is_update: dastd = data.read_data(['dastd'], ['dastd']) dastd = dastd.ix['dastd'] else: # rolling后求std,252个交易日,42的半衰期 exponential_weights = barra_base.construct_expo_weights(42, 252) # 定义dastd的函数 def func_dastd(df, *, weights): iweights = pd.Series(weights, index=df.index) return df.mul(iweights, axis=0).std(0) dastd = self.bb_data.stock_price.ix['daily_excess_return'] * np.nan for cursor, date in enumerate( self.bb_data.stock_price.ix['daily_excess_return'].index): # 至少252期才开始计算 if cursor <= 250: continue curr_data = self.bb_data.stock_price.ix['daily_excess_return', cursor - 251:cursor + 1, :] temp = func_dastd(curr_data, weights=exponential_weights) dastd.ix[cursor, :] = temp self.bb_data.raw_data['dastd'] = dastd
def get_momentum(self): if os.path.isfile('momentum.csv') and not self.is_update: momentum = data.read_data(['momentum'], ['momentum']) self.bb_data.factor['momentum'] = momentum.ix['momentum'] else: # 计算momentum因子 # 首先数据有一个21天的lag lag_return = self.bb_data.stock_price.ix[ 'daily_excess_return'].shift(21) # rolling后求sum,504个交易日,126的半衰期 exponential_weights = barra_base.construct_expo_weights(126, 504) # 定义momentum的函数 def func_mom(df, *, weights): iweights = pd.Series(weights, index=df.index) return df.mul(iweights, axis=0).sum(0) momentum = self.bb_data.stock_price.ix[ 'daily_excess_return'] * np.nan for cursor, date in enumerate(lag_return.index): # 至少504+21期才开始计算 if cursor <= (502 + 21): continue curr_data = lag_return.ix[cursor - 503:cursor + 1, :] temp = func_mom(curr_data, weights=exponential_weights) momentum.ix[cursor, :] = temp self.bb_data.factor['momentum'] = momentum pass
def sf_test_multiple_pools(factor=None, sf_obj=single_factor_strategy(), *, direction='+', bb_obj=None, discard_factor=(), folder_names=None, holding_freq='w', benchmarks=None, stock_pools=('all', 'hs300', 'zz500', 'zz800'), bkt_start=None, bkt_end=None, select_method=0, do_bb_pure_factor=False, do_pa=False, do_active_pa=False, do_data_description=False, do_factor_corr_test=False, loc=-1): # 打印当前测试的策略名称 print('Name Of Strategy Under Test: {0}\n'.format(sf_obj.__class__.__name__)) cp_adj = data.read_data('ClosePrice_adj') temp_position = position(cp_adj) # 先要初始化bkt对象 bkt_obj = backtest(temp_position, bkt_start=bkt_start, bkt_end=bkt_end, buy_cost=0.0015, sell_cost=0.0015, bkt_stock_data=['ClosePrice_adj', 'ClosePrice_adj']) # 建立bb对象,否则之后每次循环都要建立一次新的bb对象 if bb_obj is None: bb_obj = barra_base() # 外部传入的bb对象,要检测其股票池是否为all,如果不是all,则输出警告,因为可能丢失了数据 elif bb_obj.bb_data.stock_pool != 'all': print('The stockpool of the barra_base obj from outside is NOT "all", be aware of possibile' 'data loss due to this situation!\n') # 根据股票池进行循环 for cursor, stock_pool in enumerate(stock_pools): # 进行当前股票池下的单因子测试 # 注意bb obj进行了一份深拷贝,这是因为在业绩归因的计算中,会根据不同的股票池丢弃数据,导致数据不全,因此不能传引用 # 对bkt obj做了同样的处理,尽管这里并不是必要的 sf_obj.single_factor_test(factor=factor, loc=loc, direction=direction, bkt_obj=copy.deepcopy(bkt_obj), base_obj=copy.deepcopy(bb_obj), discard_factor=discard_factor, folder_name=folder_names[cursor], bkt_start=bkt_start, bkt_end=bkt_end, holding_freq=holding_freq, benchmark=benchmarks[cursor], stock_pool=stock_pool, select_method=select_method, do_base_pure_factor=do_bb_pure_factor, do_pa=do_pa, do_active_pa=do_active_pa, do_data_description=do_data_description, do_factor_corr_test=do_factor_corr_test)
def get_g_egro(self): if os.path.isfile('egro.csv') and not self.is_update: egro = data.read_data(['egro'], ['egro']) egro = egro.ix['egro'] else: # 用ni ttm的两年增长率代替ni ttm的5年增长率 egro = self.bb_data.raw_data.ix['NetIncome_ttm_growth_8q'] self.bb_data.raw_data['egro'] = egro
def get_ey_etop(self): if os.path.isfile('etop.csv') and not self.is_update: etop = data.read_data(['etop'], ['etop']) etop = etop.ix['etop'] else: # 用pe_ttm的倒数来计算etop etop = 1 / self.bb_data.raw_data.ix['PE_ttm'] self.bb_data.raw_data['etop'] = etop
def get_g_sgro(self): if os.path.isfile('sgro.csv') and not self.is_update: sgro = data.read_data(['sgro'], ['sgro']) sgro = sgro.ix['sgro'] else: # 用历史营业收入代替历史sales per share sgro = self.bb_data.raw_data.ix['Revenue_ttm_growth_8q'] self.bb_data.raw_data['sgro'] = sgro
def prepare_data(self, *, price='ClosePrice'): intangible_info.prepare_data(self, price=price) add_data = data.read_data(['NetIncome_ttm', 'runner_value_36'], ['NetIncome_ttm', 'sue'], shift=True) self.strategy_data.raw_data['NetIncome_ttm'] = add_data['NetIncome_ttm'] self.strategy_data.raw_data['sue'] = add_data['sue'] self.strategy_data.raw_data['ep_ttm'] = self.strategy_data.raw_data['NetIncome_ttm']/\ self.strategy_data.raw_data['FreeMarketValue']
def get_tar_holding_position(self): self.tar_holding_vol = pd.read_hdf('tar_holding_vol', '123') cp = data.read_data(['ClosePrice'], shift=True).iloc[0] holding_value = self.tar_holding_vol.mul(cp, axis=0) # 归一化 self.tar_position = self.tar_holding_vol * np.nan for strg, holding in holding_value.iteritems(): self.tar_position[strg] = holding.div(holding.sum(1), axis=0) self.tar_position.fillna(0.0)
def get_g_egrlf(self): if os.path.isfile('egrlf.csv') and not self.is_update: egrlf = data.read_data(['egrlf'], ['egrlf']) egrlf = egrlf.ix['egrlf'] else: # 用ni_fy2来代替长期预测的净利润 egrlf = (self.bb_data.raw_data.ix['NetIncome_fy2'] / self.bb_data.raw_data.ix['NetIncome_ttm'])**(1 / 2) - 1 self.bb_data.raw_data['egrlf'] = egrlf
def reset_bkt_benchmark(self, new_bkt_benchmark_data): self.bkt_data.benchmark_price = data.read_data(new_bkt_benchmark_data, ['ClosePrice_adj']) # 将benchmark price数据期调整为回测期 self.bkt_data.benchmark_price = data.align_index(self.tar_pct_position.holding_matrix, self.bkt_data.benchmark_price, axis='major') # 重置回测数据 self.reset_bkt_data()
def get_g_egrsf(self): if os.path.isfile('egrsf.csv') and not self.is_update: egrsf = data.read_data(['egrsf'], ['egrsf']) egrsf = egrsf.ix['egrsf'] else: # 用ni_fy1来代替短期预测净利润 egrsf = self.bb_data.raw_data.ix[ 'NetIncome_fy1'] / self.bb_data.raw_data.ix['NetIncome_ttm'] - 1 self.bb_data.raw_data['egrsf'] = egrsf
def get_leverage(self): if os.path.isfile('leverage.csv') and not self.is_update: leverage = data.read_data(['leverage'], ['leverage']) self.bb_data.factor['leverage'] = leverage.ix['leverage'] else: # 用简单的资产负债率计算leverage leverage = self.bb_data.raw_data.ix[ 'TotalLiability'] / self.bb_data.raw_data.ix['TotalAssets'] self.bb_data.factor['leverage'] = leverage
def __init__(self): strategy.__init__(self) # 每个因子策略都需要用到是否可交易的数据 self.strategy_data.generate_if_tradable(shift=True) # 读取市值数据以进行市值加权 self.strategy_data.stock_price = data.read_data(['FreeMarketValue'], ['FreeMarketValue'], shift=True) # 用来画图的pdf对象 self.pdfs = 'default'
def get_ey_cetop(self): if os.path.isfile('cetop.csv') and not self.is_update: cetop = data.read_data(['cetop'], ['cetop']) cetop = cetop.ix['cetop'] else: # 用cash earnings ttm 除以市值 cetop = self.bb_data.raw_data.ix[ 'CashEarnings_ttm'] / self.bb_data.stock_price.ix[ 'FreeMarketValue'] self.bb_data.raw_data['cetop'] = cetop
def get_stock_alpha(self): # 读取储存好的runner value数据 runner_value = pd.read_hdf('runner_value', '123') # 投资域为沪深300 self.strategy_data.stock_pool = 'zz500' self.strategy_data.handle_stock_pool() # 读取行业数据 industry = data.read_data(['Industry']) industry = industry['Industry'] runner_value = runner_value.reindex(major_axis=industry.index, minor_axis=industry.columns) # 过滤沪深300外的数据 runner_value = runner_value.apply(lambda x: pd.DataFrame( np.where(self.strategy_data.if_tradable['if_inv'], x, np.nan), index=x.index, columns=x.columns), axis=(1, 2)) industry = industry.where(self.strategy_data.if_tradable['if_inv'], np.nan) # 定义行业内winsorize以及标准化的函数, 传入的数据是index为股票, columns为因子的dataframe def expo_within_indus_func(raw_data): # 首先做winsorize lower = raw_data.quantile(0.01) upper = raw_data.quantile(0.99) new_data = np.where(raw_data >= lower, raw_data, lower) new_data = np.where(raw_data <= upper, new_data, upper) new_data = np.where(raw_data.isnull(), np.nan, new_data) new_data = pd.DataFrame(new_data, index=raw_data.index, columns=raw_data.columns) # 然后做标准化 expo = new_data.sub(new_data.mean(), axis=1).div(new_data.std(), axis=1) return expo expo_within_indus = runner_value * np.nan # 接下来需要计算因子暴露, 用因子暴露的均值来做alpha, 因子暴露要算行业内的暴露, 以保证算法一致 for cursor, time in enumerate(runner_value.major_axis): curr_data = runner_value.ix[:, time, :] expo = curr_data.groupby( industry.ix[time, :]).apply(expo_within_indus_func) expo_within_indus.ix[:, time, :] = expo # 循环结束后, 计算股票的alpha, 即因子暴露加和 stock_alpha = expo_within_indus.sum(axis=0) # 将不可投资的部分改为nan stock_alpha = stock_alpha.where( self.strategy_data.if_tradable['if_inv'], np.nan) # 储存算出的股票alpha stock_alpha.to_hdf('stock_alpha_zz500', '123') # stock_alpha= expo_within_indus.apply(lambda x: x.where(self.strategy_data.if_tradable['if_inv'], np.nan), # axis=(1, 2)) # stock_alpha.to_hdf('stock_alpha_zz500_split', '123') pass
def get_rv_hsigma(self): if os.path.isfile('hsigma.csv') and not self.is_update: hsigma = data.read_data(['hsigma'], ['hsigma']) hsigma = hsigma.ix['hsigma'] elif hasattr(self, 'temp_hsigma'): hsigma = self.temp_hsigma else: print( 'hsigma has not been accquired, if you have rv file stored instead, ingored this message.\n' ) hsigma = np.nan self.bb_data.raw_data['hsigma'] = hsigma
def get_liq_stom(self): if os.path.isfile('stom.csv') and not self.is_update: stom = data.read_data(['stom'], ['stom']) stom = stom.ix['stom'] else: v2s = self.bb_data.stock_price.ix['Volume'].div( self.bb_data.stock_price.ix['FreeShares']) stom = v2s.rolling( 21, min_periods=5).apply(lambda x: np.log(np.sum(x))) self.bb_data.raw_data['stom'] = stom # 过滤数据,因为stom会影响之后stoq,stoa的计算 self.bb_data.discard_uninv_data()