def get_growth(self): if os.path.isfile('growth.csv') and not self.is_update: growth = data.read_data(['growth'], ['growth']) self.bb_data.factor['growth'] = growth.ix['growth'] else: self.get_g_egrlf() self.get_g_egrsf() self.get_g_egro() self.get_g_sgro() self.bb_data.discard_uninv_data() # 计算四个成分因子的暴露 self.bb_data.raw_data[ 'egrlf_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['egrlf'], self.bb_data.stock_price.ix['FreeMarketValue']) self.bb_data.raw_data[ 'egrsf_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['egrsf'], self.bb_data.stock_price.ix['FreeMarketValue']) self.bb_data.raw_data[ 'egro_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['egro'], self.bb_data.stock_price.ix['FreeMarketValue']) self.bb_data.raw_data[ 'sgro_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['sgro'], self.bb_data.stock_price.ix['FreeMarketValue']) growth = 0.18*self.bb_data.raw_data.ix['egrlf_expo']+0.11*self.bb_data.raw_data.ix['egrsf_expo']+ \ 0.24*self.bb_data.raw_data.ix['egro_expo']+0.47*self.bb_data.raw_data.ix['sgro_expo'] self.bb_data.factor['growth'] = growth
def get_earnings_yeild(self): if os.path.isfile('ey.csv') and not self.is_update: EarningsYield = data.read_data(['ey'], ['ey']) self.bb_data.factor['ey'] = EarningsYield.ix['ey'] else: self.get_ey_epfwd() self.get_ey_cetop() self.get_ey_etop() self.bb_data.discard_uninv_data() # 计算三个成分因子的暴露 self.bb_data.raw_data[ 'epfwd_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['epfwd'], self.bb_data.stock_price.ix['FreeMarketValue']) self.bb_data.raw_data[ 'cetop_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['cetop'], self.bb_data.stock_price.ix['FreeMarketValue']) self.bb_data.raw_data[ 'etop_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['etop'], self.bb_data.stock_price.ix['FreeMarketValue']) EarningsYield = 0.68*self.bb_data.raw_data.ix['epfwd_expo']+0.21*self.bb_data.raw_data.ix['cetop_expo']+ \ 0.11*self.bb_data.raw_data.ix['etop_expo'] self.bb_data.factor['ey'] = EarningsYield
def get_liquidity(self): if os.path.isfile('liquidity.csv') and not self.is_update: liquidity = data.read_data(['liquidity'], ['liquidity']) self.bb_data.factor['liquidity'] = liquidity.ix['liquidity'] else: self.get_liq_stom() self.get_liq_stoq() self.get_liq_stoa() # 过滤数据 self.bb_data.discard_uninv_data() # 计算三个成分因子的暴露 self.bb_data.raw_data[ 'stom_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['stom'], self.bb_data.stock_price.ix['FreeMarketValue']) self.bb_data.raw_data[ 'stoq_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['stoq'], self.bb_data.stock_price.ix['FreeMarketValue']) self.bb_data.raw_data[ 'stoa_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['stoa'], self.bb_data.stock_price.ix['FreeMarketValue']) liquidity = 0.35*self.bb_data.raw_data.ix['stom_expo']+0.35*self.bb_data.raw_data.ix['stoq_expo']+ \ 0.3*self.bb_data.raw_data.ix['stoa_expo'] # 计算liquidity的因子暴露,不再去极值 y = strategy_data.get_cap_wgt_exposure( liquidity, self.bb_data.stock_price.ix['FreeMarketValue'], percentile=0) # 计算市值因子的暴露 x = pd.Panel({ 'lncap_expo': strategy_data.get_cap_wgt_exposure( self.bb_data.factor.ix['lncap'], self.bb_data.stock_price.ix['FreeMarketValue']) }) # 正交化 new_liq = strategy_data.simple_orth_gs( y, x, weights=np.sqrt( self.bb_data.stock_price.ix['FreeMarketValue']))[0] self.bb_data.factor['liquidity'] = new_liq
def get_style_factor_exposure(self): # 给因子暴露panel加上索引 self.bb_data.factor_expo = pd.Panel( data=None, major_axis=self.bb_data.factor.major_axis, minor_axis=self.bb_data.factor.minor_axis) # 循环计算暴露 for item, df in self.bb_data.factor.iteritems(): # 通过内部因子加总得到的因子,或已经计算过一次暴露的因子(如正交化过),不再需要去极值 if item in ['rv', 'nls', 'liquidity', 'ey', 'growth']: self.bb_data.factor_expo[ item] = strategy_data.get_cap_wgt_exposure( df, self.bb_data.stock_price.ix['FreeMarketValue'], percentile=0) else: self.bb_data.factor_expo[ item] = strategy_data.get_cap_wgt_exposure( df, self.bb_data.stock_price.ix['FreeMarketValue'])
def select_stocks_pure_factor_bb(self, *, bb_expo, cov_matrix='Empty', reg_weight='Empty', direction='+', regulation_lambda=1): # 计算因子值的暴露 factor_expo = strategy_data.get_cap_wgt_exposure( self.strategy_data.factor.iloc[0], self.strategy_data.stock_price.ix['FreeMarketValue']) if direction == '-': factor_expo = -factor_expo self.strategy_data.factor_expo = pd.Panel( {'factor_expo': factor_expo}, major_axis=self.strategy_data.factor.major_axis, minor_axis=self.strategy_data.factor.minor_axis) # 循环调仓日 for cursor, time in self.holding_days.iteritems(): # 当前的因子暴露向量,为n*1 x_alpha = self.strategy_data.factor_expo.ix['factor_expo', time, :].fillna(0) # 当前的其他因子暴露向量,为n*(k-1),实际就是barra base因子的暴露 x_sigma = bb_expo.ix[:, time, :].fillna(0) # 有协方差矩阵,优先用协方差矩阵 if type(cov_matrix) != str: inv_v = np.linalg.pinv(cov_matrix.ix[time].fillna(0)) else: assert type(reg_weight) != str, 'The construction of pure factor portfolio require one of following:\n' \ 'Covariance matrix of factor returns (priority), OR \n' \ 'Regression weight when getting factor return using linear regression.\n' # 取当期的回归权重,每只股票的权重在对角线上 # inv_v = np.diag(reg_weight.ix[time].fillna(0)) curr_weight = reg_weight.ix[time] curr_weight = (curr_weight / curr_weight.sum()).fillna(0) inv_v = np.diag(curr_weight) # 通过优化的解析解计算权重,解析解公式见barra, Efficient Replication of Factor Returns, equation (6) temp_1 = np.linalg.pinv(np.dot(np.dot(x_sigma.T, inv_v), x_sigma)) temp_2 = np.dot(np.dot(x_sigma.T, inv_v), x_alpha) temp_3 = x_alpha - np.dot(np.dot(x_sigma, temp_1), temp_2) h_star = 1 / regulation_lambda * np.dot(inv_v, temp_3) # 加权方式只能为这一种,只是需要归一化一下 self.position.holding_matrix.ix[time] = h_star self.position.to_percentage() pass
def get_nonlinear_size(self): if os.path.isfile('nls.csv') and not self.is_update: nls = data.read_data(['nls'], ['nls']) self.bb_data.factor['nls'] = nls.ix['nls'] else: size_cube = self.bb_data.factor.ix['lncap']**3 # 计算原始nls的暴露 y = strategy_data.get_cap_wgt_exposure( size_cube, self.bb_data.stock_price.ix['FreeMarketValue']) # 计算市值因子的暴露,注意解释变量需要为一个panel x = pd.Panel({ 'lncap_expo': strategy_data.get_cap_wgt_exposure( self.bb_data.factor.ix['lncap'], self.bb_data.stock_price.ix['FreeMarketValue']) }) # 对市值因子做正交化 new_nls = strategy_data.simple_orth_gs( y, x, weights=np.sqrt( self.bb_data.stock_price.ix['FreeMarketValue']))[0] self.bb_data.factor['nls'] = new_nls
def construct_factor(self): # 直接使用runner value中的rv8, rv36 self.strategy_data.raw_data = data.read_data( ['runner_value_8', 'runner_value_36'], item_name=['rv8', 'rv36']) self.strategy_data.stock_price = data.read_data(['FreeMarketValue']) # # 在投资域内进行标准化及回归 # self.strategy_data.discard_uninv_data() # 由于rv没有进行标准化, 在这里进行标准化, 进行市值加权标准化 self.strategy_data.raw_data[ 'rv8'] = strategy_data.get_cap_wgt_exposure( self.strategy_data.raw_data['rv8'], self.strategy_data.stock_price['FreeMarketValue']) self.strategy_data.raw_data[ 'rv36'] = strategy_data.get_cap_wgt_exposure( self.strategy_data.raw_data['rv36'], self.strategy_data.stock_price['FreeMarketValue']) # 等权回归 reg_weight = None # 以根号市值作为回归权重 # reg_weight = np.sqrt(self.strategy_data.stock_price['FreeMarketValue']) outcome = strategy_data.simple_orth_gs( self.strategy_data.raw_data['rv8'], self.strategy_data.raw_data[['rv36']], weights=reg_weight) new_factor = outcome[0] pvalues = outcome[1] rsquared = outcome[2] # 储存因子 self.strategy_data.factor = pd.Panel( {'new_reversal': new_factor.shift(1)}) pass
def get_residual_volatility(self): if os.path.isfile('rv.csv') and not self.is_update: rv = data.read_data(['rv'], ['rv']) self.bb_data.factor['rv'] = rv.ix['rv'] else: self.get_rv_dastd() self.get_rv_cmra() self.get_rv_hsigma() # 过滤数据,因为之前的因子数据之后要正交化,会影响计算 # 此处为barra base计算中第一次过滤掉uninv数据,此后的数据都不能再储存,因为依赖于stock pool self.bb_data.discard_uninv_data() # 计算三个成分因子的暴露 self.bb_data.raw_data[ 'dastd_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['dastd'], self.bb_data.stock_price.ix['FreeMarketValue']) self.bb_data.raw_data[ 'cmra_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['cmra'], self.bb_data.stock_price.ix['FreeMarketValue']) self.bb_data.raw_data[ 'hsigma_expo'] = strategy_data.get_cap_wgt_exposure( self.bb_data.raw_data.ix['hsigma'], self.bb_data.stock_price.ix['FreeMarketValue']) rv = 0.74*self.bb_data.raw_data.ix['dastd_expo']+0.16*self.bb_data.raw_data.ix['cmra_expo']+ \ 0.1*self.bb_data.raw_data.ix['hsigma_expo'] # 计算rv的因子暴露,不再去极值 y = strategy_data.get_cap_wgt_exposure( rv, self.bb_data.stock_price.ix['FreeMarketValue'], percentile=0) # 计算市值因子与beta因子的暴露 x = pd.Panel({ 'lncap_expo': strategy_data.get_cap_wgt_exposure( self.bb_data.factor.ix['lncap'], self.bb_data.stock_price.ix['FreeMarketValue']), 'beta_expo': strategy_data.get_cap_wgt_exposure( self.bb_data.factor.ix['beta'], self.bb_data.stock_price.ix['FreeMarketValue']) }) # 正交化 new_rv = strategy_data.simple_orth_gs( y, x, weights=np.sqrt( self.bb_data.stock_price.ix['FreeMarketValue']))[0] # 之后会再次的计算暴露,注意再次计算暴露后,new_rv依然保有对x的正交性 self.bb_data.factor['rv'] = new_rv
def get_nonlinear_size(self): if os.path.isfile(os.path.abspath('.')+'/ResearchData/nls_total'+self.filename_appendix) \ and not self.is_update and self.try_to_read: self.base_data.factor['nls'] = data.read_data( 'nls' + self.filename_appendix) else: # 计算市值因子的暴露,注意解释变量需要为一个panel x = pd.Panel({ 'lncap_expo': strategy_data.get_cap_wgt_exposure( self.base_data.factor.ix['lncap'], self.base_data.stock_price.ix['FreeMarketValue']) }) # 将市值因子暴露取3次方, 得到size cube y = x['lncap_expo']**3 # 将size cube对市值因子做正交化 new_nls = strategy_data.simple_orth_gs( y, x, weights=np.sqrt( self.base_data.stock_price.ix['FreeMarketValue']))[0] self.base_data.factor['nls'] = new_nls
def get_pure_factor_gs_orth(self, base_expo, *, do_active_bb_pure_factor=False, reg_weight=1, add_constant=False, use_factor_expo=True, expo_weight=1): # 计算当前因子的暴露,注意策略里的数据都已经lag过了 if use_factor_expo: if expo_weight == 1: factor_expo = strategy_data.get_cap_wgt_exposure( self.strategy_data.factor.iloc[0], self.strategy_data.stock_price.ix['FreeMarketValue']) elif expo_weight == 0: factor_expo = strategy_data.get_exposure( self.strategy_data.factor.iloc[0]) # 如果计算的是相对基准的纯因子收益率 if do_active_bb_pure_factor: if self.strategy_data.stock_pool == 'all': benchmark_weight = data.read_data(['Weight_zz500'], ['Weight_zz500'], shift=True) else: benchmark_weight = self.strategy_data.benchmark_price.ix[ 'Weight_' + self.strategy_data.stock_pool] # 计算bb base的调整后暴露,以及调整后benchmark在bb base上的暴露 adjusted_bb_expo = strategy_data.adjust_benchmark_related_expo( base_expo, benchmark_weight, self.strategy_data.if_tradable.ix['if_tradable']) benchmark_bb_expo = np.einsum('ijk,jk->ji', adjusted_bb_expo.fillna(0), benchmark_weight.fillna(0)) benchmark_bb_expo = pd.DataFrame(benchmark_bb_expo, index=base_expo.major_axis, columns=base_expo.items) # 计算当前因子的调整后暴露值,以及调整后benchmark在当前因子上的暴露 adjusted_factor_expo = strategy_data.adjust_benchmark_related_expo( self.strategy_data.factor, benchmark_weight, self.strategy_data.if_tradable.ix['if_tradable']) adjusted_factor_expo = adjusted_factor_expo.iloc[0] benchmark_factor_expo = (adjusted_factor_expo * benchmark_weight).sum(1) # 用暴露的绝对值减去基准的暴露值,得到相对基准的超额暴露值 base_expo = base_expo.sub(benchmark_bb_expo, axis=0) factor_expo = factor_expo.sub(benchmark_factor_expo, axis=0) # 在bb expo里去掉国家因子,去掉是为了保证有唯一解,而且去掉后残差值不变,不影响结果 # 因为国家因子向量已经能表示成行业暴露的线性组合了 if 'country_factor' in base_expo.items: base_expo_no_cf = base_expo.drop('country_factor', axis=0) else: base_expo_no_cf = base_expo # 利用多元线性回归进行提纯 if reg_weight == 1: pure_factor_expo = strategy_data.simple_orth_gs( factor_expo, base_expo_no_cf, weights=np.sqrt( self.strategy_data.stock_price.ix['FreeMarketValue']), add_constant=add_constant)[0] elif reg_weight == 0: pure_factor_expo = strategy_data.simple_orth_gs( factor_expo, base_expo_no_cf, add_constant=add_constant)[0] # 将得到的纯化因子放入因子值中储存 self.strategy_data.factor.iloc[0] = pure_factor_expo
def select_stocks(self, *, select_ratio=[0.8, 1], direction='+', weight=0, use_factor_expo=True, expo_weight=1): # 对调仓期进行循环 for cursor, time in self.holding_days.iteritems(): curr_factor_data = self.strategy_data.factor.ix[0, time, :] # 对因子值进行排序,注意这里的秩(rank),类似于得分 if direction is '+': factor_score = curr_factor_data.rank(ascending=True) elif direction is '-': factor_score = curr_factor_data.rank(ascending=False) else: print('Please enter ' '+' ' or ' '-' ' for direction argument') # 取有效的股票数 effective_num = curr_factor_data.dropna().size # 无股票可选,进行下一次循环 if effective_num == 0: continue # 选取股票的得分范围 lower_bound = np.floor(effective_num * select_ratio[0]) upper_bound = np.floor(effective_num * select_ratio[1]) # 选取股票 selected_stocks = curr_factor_data.ix[np.logical_and( factor_score >= lower_bound, factor_score <= upper_bound)].index # 被选取的股票都将持仓调为1 self.position.holding_matrix.ix[time, selected_stocks] = 1 # 循环结束 if self.strategy_data.stock_pool == 'all': # 去除不可交易的股票 self.filter_untradable() else: # 有股票池的情况去除不可投资的股票 self.filter_uninv() # 设置为等权重 self.position.to_percentage() # 如果需要市值加权,则市值加权 if weight == 1: self.position.weighted_holding(self.strategy_data.stock_price.ix[ 'FreeMarketValue', self.position.holding_matrix.index, :]) # 如果是因子加权, 则进行因子值加权 elif weight == 2: # 看是否需要计算因子暴露, 用因子暴露值进行加权 if use_factor_expo: if expo_weight == 1: factor_weight = strategy_data.get_cap_wgt_exposure( self.strategy_data.factor.iloc[0], self.strategy_data.stock_price.ix['FreeMarketValue']) elif expo_weight == 0: factor_weight = strategy_data.get_exposure( self.strategy_data.factor.iloc[0]) else: factor_weight = self.strategy_data.factor.iloc[0] # 进行因子值加权的权重计算 self.position.weighted_holding( factor_weight.ix[self.position.holding_matrix.index, :]) pass
def get_factor_return(self, *, holding_freq='m', weights='default', direction='+', plot_cum=True, start='default', end='default'): # 如果没有price的数据,读入price数据,注意要shift, # 即本来的实现收益率应当是调仓日当天的开盘价,但这里计算调仓日前一个交易日的收盘价。 if 'ClosePrice_adj' not in self.strategy_data.stock_price.items: temp_panel = data.read_data(['ClosePrice_adj'], ['ClosePrice_adj'], shift=True) self.strategy_data.stock_price['ClosePrice_adj'] = temp_panel.ix[ 'ClosePrice_adj'] # 计算因子收益的频率 holding_days = strategy.resample_tradingdays(self.strategy_data.stock_price.\ ix['FreeMarketValue', :, 0], freq=holding_freq) # 如果有指定,只取start和end之间的时间计算 if start != 'default': holding_days = holding_days[start:] if end != 'default': holding_days = holding_days[:end] # 计算股票对数收益以及因子暴露 holding_day_price = self.strategy_data.stock_price.ix['ClosePrice_adj', holding_days, :] holding_day_return = np.log( holding_day_price.div(holding_day_price.shift(1))) holding_day_factor = self.strategy_data.factor.ix[0, holding_days, :] holding_day_factor_expo = strategy_data.get_cap_wgt_exposure( holding_day_factor, self.strategy_data.stock_price.ix['FreeMarketValue', holding_days, :]) # 注意因子暴露要用前一期的数据 holding_day_factor_expo = holding_day_factor_expo.shift(1) # 初始化因子收益序列以及估计量的t统计量序列 factor_return_series = np.empty(holding_days.size) * np.nan t_stats_series = np.empty(holding_days.size) * np.nan self.factor_return_series = pd.Series(factor_return_series, index=holding_days) self.t_stats_series = pd.Series(t_stats_series, index=holding_days) # 进行回归,对调仓日进行循环 for cursor, time in holding_days.iteritems(): y = holding_day_return.ix[time, :] x = holding_day_factor_expo.ix[time, :] if y.isnull().all() or x.isnull().all(): continue x = sm.add_constant(x) if weights is 'default': results = sm.WLS(y, x, missing='drop').fit() else: results = sm.WLS(y, x, weights=weights.ix[time], missing='drop').fit() self.factor_return_series.ix[time] = results.params[1] self.t_stats_series.ix[time] = results.tvalues[1] # 如果方向为负,则将因子收益和t统计量加个负号 if direction == '-': self.factor_return_series = -self.factor_return_series self.t_stats_series = -self.t_stats_series # 输出的string tstats_sig_ratio = self.t_stats_series[ np.abs(self.t_stats_series) >= 2].size / self.t_stats_series.size target_str = 'The average return of this factor: {0:.4f}%\n' \ 'Note that the return of factor is not annualized but corresponding to the holding days interval\n' \ 'The average t-statistics value: {1:.4f}\n' \ 'Ratio of t_stats whose absolute value >= 2: {2:.2f}%\n'.format( self.factor_return_series.mean()*100, self.t_stats_series.mean(), tstats_sig_ratio*100 ) # 循环结束,输出结果 print(target_str) with open(str(os.path.abspath('.')) + '/' + self.strategy_data.stock_pool + '/performance.txt', 'a', encoding='GB18030') as text_file: text_file.write(target_str) # 画图,默认画因子收益的累计收益图 fx = plt.figure() ax = fx.add_subplot(1, 1, 1) zero_series = pd.Series(np.zeros(self.factor_return_series.shape), index=self.factor_return_series.index) if plot_cum: plt.plot(self.factor_return_series.cumsum() * 100, 'b-') else: plt.plot(self.factor_return_series * 100, 'b-') plt.plot(zero_series, 'r-') ax.set_xlabel('Time') ax.set_ylabel('Return of The Factor (%)') ax.set_title('The Return Series of The Factor') plt.savefig(str(os.path.abspath('.')) + '/' + self.strategy_data.stock_pool + '/' + 'FactorReturn.png', dpi=1200) if type(self.pdfs) != str: plt.savefig(self.pdfs, format='pdf') fx = plt.figure() ax = fx.add_subplot(1, 1, 1) plt.plot(self.t_stats_series, 'b-') plt.plot(zero_series, 'r-') ax.set_xlabel('Time') ax.set_ylabel('T-Stats of The Factor Return') ax.set_title('The T-Stats Series of The Factor Return') plt.savefig(str(os.path.abspath('.')) + '/' + self.strategy_data.stock_pool + '/' + 'FactorReturnTStats.png', dpi=1200) if type(self.pdfs) != str: plt.savefig(self.pdfs, format='pdf')
def select_stocks_pure_factor(self, *, base_expo, cov_matrix='Empty', reg_weight='Empty', direction='+', benchmark_weight='Empty', is_long_only=True): # 计算因子值的暴露 factor_expo = strategy_data.get_cap_wgt_exposure( self.strategy_data.factor.iloc[0], self.strategy_data.stock_price.ix['FreeMarketValue']) if direction == '-': factor_expo = -factor_expo self.strategy_data.factor_expo = pd.Panel( {'factor_expo': factor_expo}, major_axis=self.strategy_data.factor.major_axis, minor_axis=self.strategy_data.factor.minor_axis) # 如果有benchmark,则计算benchmark的暴露 if type(benchmark_weight) != str: benchmark_weight = (benchmark_weight.div(benchmark_weight.sum(1), axis=0)).fillna(0) adjusted_base_expo = strategy_data.adjust_benchmark_related_expo( base_expo, benchmark_weight, self.strategy_data.if_tradable.ix['if_tradable']) benchmark_base_expo = np.einsum('ijk,jk->ji', adjusted_base_expo.fillna(0), benchmark_weight.fillna(0)) benchmark_base_expo = pd.DataFrame(benchmark_base_expo, index=base_expo.major_axis, columns=base_expo.items) adjusted_factor_expo = strategy_data.adjust_benchmark_related_expo( pd.Panel({'factor_expo': factor_expo}), benchmark_weight, self.strategy_data.if_tradable.ix['if_tradable']) adjusted_factor_expo = adjusted_factor_expo.ix['factor_expo'] benchmark_curr_factor_expo = (adjusted_factor_expo * benchmark_weight).sum(1) self.strategy_data.factor_expo.ix['factor_expo'] = factor_expo.sub( benchmark_curr_factor_expo, axis=0) # 循环调仓日 for cursor, time in self.holding_days.iteritems(): curr_factor_expo = self.strategy_data.factor_expo.ix['factor_expo', time, :] curr_base_expo = base_expo.ix[:, time, :] # 有协方差矩阵,优先用协方差矩阵 if type(cov_matrix) != str: curr_v = cov_matrix.ix[time] curr_v_diag = curr_v.diagonal() # 去除有nan的数据 all_data = pd.concat( [curr_v_diag, curr_factor_expo, curr_base_expo], axis=1) all_data = all_data.dropna() # 如果有效数据小于等于1,当期不选股票 if all_data.shape[0] <= 1: continue # 指数中选股可能会出现一个行业暴露全是0的情况,所以关于这个行业的限制条件会冗余,于是要进行剔除 all_data = all_data.replace(0, np.nan).dropna( axis=1, how='all').fillna(0.0) curr_factor_expo = all_data.ix[:, 0] curr_v_diag = all_data.ix[:, 1] curr_base_expo = all_data.ix[:, 2:] curr_v = curr_v.reindex(index=curr_v_diag.index, columns=curr_v_diag.index) else: assert type(reg_weight) != str, 'The construction of pure factor portfolio require one of following:\n' \ 'Covariance matrix of factor returns (priority), OR \n' \ 'Regression weight when getting factor return using linear regression.\n' # 取当期的回归权重,每只股票的权重在对角线上 curr_v_diag = reg_weight.ix[time] # 去除有nan的数据 all_data = pd.concat( [curr_v_diag, curr_factor_expo, curr_base_expo], axis=1) all_data = all_data.dropna() # 如果有效数据小于等于1,当期不选股票 if all_data.shape[0] <= 1: continue # 指数中选股可能会出现一个行业暴露全是0的情况,所以关于这个行业的限制条件会冗余,于是要进行剔除 all_data = all_data.replace(0, np.nan).dropna( axis=1, how='all').fillna(0.0) curr_v_diag = all_data.ix[:, 0] curr_factor_expo = all_data.ix[:, 1] curr_base_expo = all_data.ix[:, 2:] # 将回归权重归一化 curr_v_diag = curr_v_diag / curr_v_diag.sum() curr_v = np.linalg.pinv(np.diag(curr_v_diag)) curr_v = pd.DataFrame(curr_v, index=curr_factor_expo.index, columns=curr_factor_expo.index) # 设置其他因子为0的限制条件,在有基准的时候,设置为基准的暴露 if type(benchmark_weight) != str: expo_target = benchmark_base_expo.ix[time].reindex( index=curr_base_expo.columns) else: expo_target = pd.Series(0.0, index=curr_base_expo.columns) # 开始设置优化 # P = V P = matrix(curr_v.as_matrix()) # q = - (factor_expo.T) q = matrix(-curr_factor_expo.as_matrix().transpose()) # 其他因为暴露为0,或等于基准的限制条件 A = matrix(curr_base_expo.as_matrix().transpose()) b = matrix(expo_target.as_matrix()) solvers.options['show_progress'] = False # 如果只能做多,则每只股票的比例都必须大于等于0 if is_long_only: long_only_constraint = pd.DataFrame( -1.0 * np.eye(curr_factor_expo.size), index=curr_factor_expo.index, columns=curr_factor_expo.index) long_only_target = pd.Series(0.0, index=curr_factor_expo.index) G = matrix(long_only_constraint.as_matrix()) h = matrix(long_only_target.as_matrix()) # 解优化问题 results = solvers.qp(P=P, q=q, A=A, b=b, G=G, h=h) else: results = solvers.qp(P=P, q=q, A=A, b=b) results_np = np.array(results['x']).squeeze() results_s = pd.Series(results_np, index=curr_factor_expo.index) # 重索引为所有股票代码 results_s = results_s.reindex( self.strategy_data.stock_price.minor_axis, fill_value=0) # 股票持仓 self.position.holding_matrix.ix[time] = results_s # 循环结束后,进行权重归一化 self.position.to_percentage() pass
def get_table3(self, *, freq='w', foldername=None, startdate=None, enddate=None): # 首先需要按照频率生成holdingdays self.generate_holding_days(holding_freq=freq, loc=-1, start_date=startdate, end_date=enddate) # 读取数据 self.strategy_data.raw_data['rv3'] = data.read_data('runner_value_3') # 对rv3进行标准化 self.strategy_data.raw_data[ 'rv3'] = strategy_data.get_cap_wgt_exposure( self.strategy_data.raw_data['rv3'], self.strategy_data.stock_price['FreeMarketValue']) self.strategy_data.stock_price['ClosePrice_adj'] = data.read_data( 'ClosePrice_adj') self.strategy_data.stock_price['daily_return'] = \ self.strategy_data.stock_price['ClosePrice_adj'].pct_change() # 按照频率算收益率, 和holdingdays同步, 论文用月, 我们一般用w r = self.strategy_data.stock_price['daily_return', startdate:enddate, :].\ resample(freq).sum() # 注意, 回归的左边是未来一期的收益率, 因此要shift(-1), 即用到未来数据 r = r.shift(-1).dropna(how='all') # 因为r的index为月末, 但是月末不一定是交易日, 因此将r的index重置为holding days r = r.set_index(self.holding_days) # 用于回归的右边 reg_panel = pd.Panel({ 'lagged_ep': self.strategy_data.raw_data['rv3'].shift(0), 'sue': self.strategy_data.raw_data['rv36'], 'reversal': self.strategy_data.raw_data['rv8'] }) # 储存table3的结果 table3 = pd.Panel( items=['coef', 't_stats'], major_axis=np.arange(5), minor_axis=['intercept', 'lagged_ep', 'sue', 'reversal']) # 使用holding days中的日期进行回归, # 1. 用lagged lbm回归 results1 = strategy_data.fama_macbeth( r, reg_panel.ix[['lagged_ep'], self.holding_days, :]) table3.ix['coef', 0, :] = results1[0] table3.ix['t_stats', 0, :] = results1[1] # 2. 使用bv return回归 results2 = strategy_data.fama_macbeth( r, reg_panel.ix[['sue'], self.holding_days, :]) table3.ix['coef', 1, :] = results2[0] table3.ix['t_stats', 1, :] = results2[1] # 3. 使用lagged return回归 results3 = strategy_data.fama_macbeth( r, reg_panel.ix[['reversal'], self.holding_days, :]) table3.ix['coef', 2, :] = results3[0] table3.ix['t_stats', 2, :] = results3[1] # 4. 使用lagged lbm与bv return回归 results4 = strategy_data.fama_macbeth( r, reg_panel.ix[['lagged_ep', 'reversal'], self.holding_days, :]) table3.ix['coef', 3, :] = results4[0] table3.ix['t_stats', 3, :] = results4[1] # 5. 使用lagged lbm, bv return, lagged return一起回归 results5 = strategy_data.fama_macbeth( r, reg_panel.ix[['lagged_ep', 'sue', 'reversal'], self.holding_days, :], nw_lags=0) table3.ix['coef', 4, :] = results5[0] table3.ix['t_stats', 4, :] = results5[1] # # 储存信息 # if foldername is None: # table3.ix['coef'].to_csv(str(os.path.abspath('.')) + '/' + self.strategy_data.stock_pool + # '/' + 'Table3_coef.csv', na_rep='N/A', encoding='GB18030') # table3.ix['t_stats'].to_csv(str(os.path.abspath('.')) + '/' + self.strategy_data.stock_pool + # '/' + 'Table3_t_stats.csv', na_rep='N/A', encoding='GB18030') # else: # table3.ix['coef'].to_csv(foldername + # '/' + 'Table3_coef.csv', na_rep='N/A', encoding='GB18030') # table3.ix['t_stats'].to_csv(foldername + # '/' + 'Table3_t_stats.csv', na_rep='N/A', encoding='GB18030') # pass # 尝试使用linearmodels包 reg_panel['const'] = 1.0 fm_result1 = FamaMacBeth( r, reg_panel.ix[['lagged_ep', 'const'], self.holding_days, :]).fit() fm_result2 = FamaMacBeth( r, reg_panel.ix[['sue', 'const'], self.holding_days, :]).fit() fm_result3 = FamaMacBeth( r, reg_panel.ix[['reversal', 'const'], self.holding_days, :]).fit() fm_result4 = FamaMacBeth( r, reg_panel.ix[['reversal', 'lagged_ep', 'const'], self.holding_days, :]).fit() fm_result5 = FamaMacBeth( r, reg_panel.ix[['reversal', 'sue', 'const'], self.holding_days, :]).fit() fm_result6 = FamaMacBeth(r, reg_panel.ix[:, self.holding_days, :]).fit() pass