Esempio n. 1
0
    def get_growth(self):
        if os.path.isfile('growth.csv') and not self.is_update:
            growth = data.read_data(['growth'], ['growth'])
            self.bb_data.factor['growth'] = growth.ix['growth']
        else:
            self.get_g_egrlf()
            self.get_g_egrsf()
            self.get_g_egro()
            self.get_g_sgro()
            self.bb_data.discard_uninv_data()
            # 计算四个成分因子的暴露
            self.bb_data.raw_data[
                'egrlf_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['egrlf'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])
            self.bb_data.raw_data[
                'egrsf_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['egrsf'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])
            self.bb_data.raw_data[
                'egro_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['egro'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])
            self.bb_data.raw_data[
                'sgro_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['sgro'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])

            growth = 0.18*self.bb_data.raw_data.ix['egrlf_expo']+0.11*self.bb_data.raw_data.ix['egrsf_expo']+ \
                             0.24*self.bb_data.raw_data.ix['egro_expo']+0.47*self.bb_data.raw_data.ix['sgro_expo']
            self.bb_data.factor['growth'] = growth
Esempio n. 2
0
    def get_earnings_yeild(self):
        if os.path.isfile('ey.csv') and not self.is_update:
            EarningsYield = data.read_data(['ey'], ['ey'])
            self.bb_data.factor['ey'] = EarningsYield.ix['ey']
        else:
            self.get_ey_epfwd()
            self.get_ey_cetop()
            self.get_ey_etop()
            self.bb_data.discard_uninv_data()
            # 计算三个成分因子的暴露
            self.bb_data.raw_data[
                'epfwd_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['epfwd'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])
            self.bb_data.raw_data[
                'cetop_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['cetop'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])
            self.bb_data.raw_data[
                'etop_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['etop'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])

            EarningsYield = 0.68*self.bb_data.raw_data.ix['epfwd_expo']+0.21*self.bb_data.raw_data.ix['cetop_expo']+ \
                                0.11*self.bb_data.raw_data.ix['etop_expo']
            self.bb_data.factor['ey'] = EarningsYield
Esempio n. 3
0
    def get_liquidity(self):
        if os.path.isfile('liquidity.csv') and not self.is_update:
            liquidity = data.read_data(['liquidity'], ['liquidity'])
            self.bb_data.factor['liquidity'] = liquidity.ix['liquidity']
        else:
            self.get_liq_stom()
            self.get_liq_stoq()
            self.get_liq_stoa()
            # 过滤数据
            self.bb_data.discard_uninv_data()
            # 计算三个成分因子的暴露
            self.bb_data.raw_data[
                'stom_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['stom'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])
            self.bb_data.raw_data[
                'stoq_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['stoq'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])
            self.bb_data.raw_data[
                'stoa_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['stoa'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])

            liquidity = 0.35*self.bb_data.raw_data.ix['stom_expo']+0.35*self.bb_data.raw_data.ix['stoq_expo']+ \
                                                              0.3*self.bb_data.raw_data.ix['stoa_expo']
            # 计算liquidity的因子暴露,不再去极值
            y = strategy_data.get_cap_wgt_exposure(
                liquidity,
                self.bb_data.stock_price.ix['FreeMarketValue'],
                percentile=0)
            # 计算市值因子的暴露
            x = pd.Panel({
                'lncap_expo':
                strategy_data.get_cap_wgt_exposure(
                    self.bb_data.factor.ix['lncap'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])
            })
            # 正交化
            new_liq = strategy_data.simple_orth_gs(
                y,
                x,
                weights=np.sqrt(
                    self.bb_data.stock_price.ix['FreeMarketValue']))[0]
            self.bb_data.factor['liquidity'] = new_liq
Esempio n. 4
0
 def get_style_factor_exposure(self):
     # 给因子暴露panel加上索引
     self.bb_data.factor_expo = pd.Panel(
         data=None,
         major_axis=self.bb_data.factor.major_axis,
         minor_axis=self.bb_data.factor.minor_axis)
     # 循环计算暴露
     for item, df in self.bb_data.factor.iteritems():
         # 通过内部因子加总得到的因子,或已经计算过一次暴露的因子(如正交化过),不再需要去极值
         if item in ['rv', 'nls', 'liquidity', 'ey', 'growth']:
             self.bb_data.factor_expo[
                 item] = strategy_data.get_cap_wgt_exposure(
                     df,
                     self.bb_data.stock_price.ix['FreeMarketValue'],
                     percentile=0)
         else:
             self.bb_data.factor_expo[
                 item] = strategy_data.get_cap_wgt_exposure(
                     df, self.bb_data.stock_price.ix['FreeMarketValue'])
Esempio n. 5
0
    def select_stocks_pure_factor_bb(self,
                                     *,
                                     bb_expo,
                                     cov_matrix='Empty',
                                     reg_weight='Empty',
                                     direction='+',
                                     regulation_lambda=1):
        # 计算因子值的暴露
        factor_expo = strategy_data.get_cap_wgt_exposure(
            self.strategy_data.factor.iloc[0],
            self.strategy_data.stock_price.ix['FreeMarketValue'])
        if direction == '-':
            factor_expo = -factor_expo
        self.strategy_data.factor_expo = pd.Panel(
            {'factor_expo': factor_expo},
            major_axis=self.strategy_data.factor.major_axis,
            minor_axis=self.strategy_data.factor.minor_axis)

        # 循环调仓日
        for cursor, time in self.holding_days.iteritems():
            # 当前的因子暴露向量,为n*1
            x_alpha = self.strategy_data.factor_expo.ix['factor_expo',
                                                        time, :].fillna(0)
            # 当前的其他因子暴露向量,为n*(k-1),实际就是barra base因子的暴露
            x_sigma = bb_expo.ix[:, time, :].fillna(0)

            # 有协方差矩阵,优先用协方差矩阵
            if type(cov_matrix) != str:
                inv_v = np.linalg.pinv(cov_matrix.ix[time].fillna(0))
            else:
                assert type(reg_weight) != str, 'The construction of pure factor portfolio require one of following:\n' \
                                                'Covariance matrix of factor returns (priority), OR \n' \
                                                'Regression weight when getting factor return using linear regression.\n'
                # 取当期的回归权重,每只股票的权重在对角线上
                # inv_v = np.diag(reg_weight.ix[time].fillna(0))
                curr_weight = reg_weight.ix[time]
                curr_weight = (curr_weight / curr_weight.sum()).fillna(0)
                inv_v = np.diag(curr_weight)

            # 通过优化的解析解计算权重,解析解公式见barra, Efficient Replication of Factor Returns, equation (6)
            temp_1 = np.linalg.pinv(np.dot(np.dot(x_sigma.T, inv_v), x_sigma))
            temp_2 = np.dot(np.dot(x_sigma.T, inv_v), x_alpha)
            temp_3 = x_alpha - np.dot(np.dot(x_sigma, temp_1), temp_2)
            h_star = 1 / regulation_lambda * np.dot(inv_v, temp_3)

            # 加权方式只能为这一种,只是需要归一化一下
            self.position.holding_matrix.ix[time] = h_star

        self.position.to_percentage()
        pass
Esempio n. 6
0
 def get_nonlinear_size(self):
     if os.path.isfile('nls.csv') and not self.is_update:
         nls = data.read_data(['nls'], ['nls'])
         self.bb_data.factor['nls'] = nls.ix['nls']
     else:
         size_cube = self.bb_data.factor.ix['lncap']**3
         # 计算原始nls的暴露
         y = strategy_data.get_cap_wgt_exposure(
             size_cube, self.bb_data.stock_price.ix['FreeMarketValue'])
         # 计算市值因子的暴露,注意解释变量需要为一个panel
         x = pd.Panel({
             'lncap_expo':
             strategy_data.get_cap_wgt_exposure(
                 self.bb_data.factor.ix['lncap'],
                 self.bb_data.stock_price.ix['FreeMarketValue'])
         })
         # 对市值因子做正交化
         new_nls = strategy_data.simple_orth_gs(
             y,
             x,
             weights=np.sqrt(
                 self.bb_data.stock_price.ix['FreeMarketValue']))[0]
         self.bb_data.factor['nls'] = new_nls
Esempio n. 7
0
    def construct_factor(self):
        # 直接使用runner value中的rv8, rv36
        self.strategy_data.raw_data = data.read_data(
            ['runner_value_8', 'runner_value_36'], item_name=['rv8', 'rv36'])
        self.strategy_data.stock_price = data.read_data(['FreeMarketValue'])

        # # 在投资域内进行标准化及回归
        # self.strategy_data.discard_uninv_data()

        # 由于rv没有进行标准化, 在这里进行标准化, 进行市值加权标准化
        self.strategy_data.raw_data[
            'rv8'] = strategy_data.get_cap_wgt_exposure(
                self.strategy_data.raw_data['rv8'],
                self.strategy_data.stock_price['FreeMarketValue'])
        self.strategy_data.raw_data[
            'rv36'] = strategy_data.get_cap_wgt_exposure(
                self.strategy_data.raw_data['rv36'],
                self.strategy_data.stock_price['FreeMarketValue'])

        # 等权回归
        reg_weight = None
        # 以根号市值作为回归权重
        # reg_weight = np.sqrt(self.strategy_data.stock_price['FreeMarketValue'])
        outcome = strategy_data.simple_orth_gs(
            self.strategy_data.raw_data['rv8'],
            self.strategy_data.raw_data[['rv36']],
            weights=reg_weight)

        new_factor = outcome[0]
        pvalues = outcome[1]
        rsquared = outcome[2]

        # 储存因子
        self.strategy_data.factor = pd.Panel(
            {'new_reversal': new_factor.shift(1)})
        pass
Esempio n. 8
0
    def get_residual_volatility(self):
        if os.path.isfile('rv.csv') and not self.is_update:
            rv = data.read_data(['rv'], ['rv'])
            self.bb_data.factor['rv'] = rv.ix['rv']
        else:
            self.get_rv_dastd()
            self.get_rv_cmra()
            self.get_rv_hsigma()
            # 过滤数据,因为之前的因子数据之后要正交化,会影响计算
            # 此处为barra base计算中第一次过滤掉uninv数据,此后的数据都不能再储存,因为依赖于stock pool
            self.bb_data.discard_uninv_data()
            # 计算三个成分因子的暴露
            self.bb_data.raw_data[
                'dastd_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['dastd'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])
            self.bb_data.raw_data[
                'cmra_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['cmra'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])
            self.bb_data.raw_data[
                'hsigma_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['hsigma'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])

            rv = 0.74*self.bb_data.raw_data.ix['dastd_expo']+0.16*self.bb_data.raw_data.ix['cmra_expo']+ \
                                                        0.1*self.bb_data.raw_data.ix['hsigma_expo']
            # 计算rv的因子暴露,不再去极值
            y = strategy_data.get_cap_wgt_exposure(
                rv,
                self.bb_data.stock_price.ix['FreeMarketValue'],
                percentile=0)
            # 计算市值因子与beta因子的暴露
            x = pd.Panel({
                'lncap_expo':
                strategy_data.get_cap_wgt_exposure(
                    self.bb_data.factor.ix['lncap'],
                    self.bb_data.stock_price.ix['FreeMarketValue']),
                'beta_expo':
                strategy_data.get_cap_wgt_exposure(
                    self.bb_data.factor.ix['beta'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])
            })
            # 正交化
            new_rv = strategy_data.simple_orth_gs(
                y,
                x,
                weights=np.sqrt(
                    self.bb_data.stock_price.ix['FreeMarketValue']))[0]
            # 之后会再次的计算暴露,注意再次计算暴露后,new_rv依然保有对x的正交性
            self.bb_data.factor['rv'] = new_rv
Esempio n. 9
0
 def get_nonlinear_size(self):
     if os.path.isfile(os.path.abspath('.')+'/ResearchData/nls_total'+self.filename_appendix) \
             and not self.is_update and self.try_to_read:
         self.base_data.factor['nls'] = data.read_data(
             'nls' + self.filename_appendix)
     else:
         # 计算市值因子的暴露,注意解释变量需要为一个panel
         x = pd.Panel({
             'lncap_expo':
             strategy_data.get_cap_wgt_exposure(
                 self.base_data.factor.ix['lncap'],
                 self.base_data.stock_price.ix['FreeMarketValue'])
         })
         # 将市值因子暴露取3次方, 得到size cube
         y = x['lncap_expo']**3
         # 将size cube对市值因子做正交化
         new_nls = strategy_data.simple_orth_gs(
             y,
             x,
             weights=np.sqrt(
                 self.base_data.stock_price.ix['FreeMarketValue']))[0]
         self.base_data.factor['nls'] = new_nls
Esempio n. 10
0
    def get_pure_factor_gs_orth(self,
                                base_expo,
                                *,
                                do_active_bb_pure_factor=False,
                                reg_weight=1,
                                add_constant=False,
                                use_factor_expo=True,
                                expo_weight=1):
        # 计算当前因子的暴露,注意策略里的数据都已经lag过了
        if use_factor_expo:
            if expo_weight == 1:
                factor_expo = strategy_data.get_cap_wgt_exposure(
                    self.strategy_data.factor.iloc[0],
                    self.strategy_data.stock_price.ix['FreeMarketValue'])
            elif expo_weight == 0:
                factor_expo = strategy_data.get_exposure(
                    self.strategy_data.factor.iloc[0])

        # 如果计算的是相对基准的纯因子收益率
        if do_active_bb_pure_factor:
            if self.strategy_data.stock_pool == 'all':
                benchmark_weight = data.read_data(['Weight_zz500'],
                                                  ['Weight_zz500'],
                                                  shift=True)
            else:
                benchmark_weight = self.strategy_data.benchmark_price.ix[
                    'Weight_' + self.strategy_data.stock_pool]
            # 计算bb base的调整后暴露,以及调整后benchmark在bb base上的暴露
            adjusted_bb_expo = strategy_data.adjust_benchmark_related_expo(
                base_expo, benchmark_weight,
                self.strategy_data.if_tradable.ix['if_tradable'])
            benchmark_bb_expo = np.einsum('ijk,jk->ji',
                                          adjusted_bb_expo.fillna(0),
                                          benchmark_weight.fillna(0))
            benchmark_bb_expo = pd.DataFrame(benchmark_bb_expo,
                                             index=base_expo.major_axis,
                                             columns=base_expo.items)
            # 计算当前因子的调整后暴露值,以及调整后benchmark在当前因子上的暴露
            adjusted_factor_expo = strategy_data.adjust_benchmark_related_expo(
                self.strategy_data.factor, benchmark_weight,
                self.strategy_data.if_tradable.ix['if_tradable'])
            adjusted_factor_expo = adjusted_factor_expo.iloc[0]
            benchmark_factor_expo = (adjusted_factor_expo *
                                     benchmark_weight).sum(1)
            # 用暴露的绝对值减去基准的暴露值,得到相对基准的超额暴露值
            base_expo = base_expo.sub(benchmark_bb_expo, axis=0)
            factor_expo = factor_expo.sub(benchmark_factor_expo, axis=0)
        # 在bb expo里去掉国家因子,去掉是为了保证有唯一解,而且去掉后残差值不变,不影响结果
        # 因为国家因子向量已经能表示成行业暴露的线性组合了
        if 'country_factor' in base_expo.items:
            base_expo_no_cf = base_expo.drop('country_factor', axis=0)
        else:
            base_expo_no_cf = base_expo
        # 利用多元线性回归进行提纯
        if reg_weight == 1:
            pure_factor_expo = strategy_data.simple_orth_gs(
                factor_expo,
                base_expo_no_cf,
                weights=np.sqrt(
                    self.strategy_data.stock_price.ix['FreeMarketValue']),
                add_constant=add_constant)[0]
        elif reg_weight == 0:
            pure_factor_expo = strategy_data.simple_orth_gs(
                factor_expo, base_expo_no_cf, add_constant=add_constant)[0]
        # 将得到的纯化因子放入因子值中储存
        self.strategy_data.factor.iloc[0] = pure_factor_expo
Esempio n. 11
0
    def select_stocks(self,
                      *,
                      select_ratio=[0.8, 1],
                      direction='+',
                      weight=0,
                      use_factor_expo=True,
                      expo_weight=1):
        # 对调仓期进行循环
        for cursor, time in self.holding_days.iteritems():
            curr_factor_data = self.strategy_data.factor.ix[0, time, :]
            # 对因子值进行排序,注意这里的秩(rank),类似于得分
            if direction is '+':
                factor_score = curr_factor_data.rank(ascending=True)
            elif direction is '-':
                factor_score = curr_factor_data.rank(ascending=False)
            else:
                print('Please enter ' '+' ' or ' '-' ' for direction argument')

            # 取有效的股票数
            effective_num = curr_factor_data.dropna().size
            # 无股票可选,进行下一次循环
            if effective_num == 0:
                continue
            # 选取股票的得分范围
            lower_bound = np.floor(effective_num * select_ratio[0])
            upper_bound = np.floor(effective_num * select_ratio[1])
            # 选取股票
            selected_stocks = curr_factor_data.ix[np.logical_and(
                factor_score >= lower_bound,
                factor_score <= upper_bound)].index
            # 被选取的股票都将持仓调为1
            self.position.holding_matrix.ix[time, selected_stocks] = 1

        # 循环结束
        if self.strategy_data.stock_pool == 'all':
            # 去除不可交易的股票
            self.filter_untradable()
        else:
            # 有股票池的情况去除不可投资的股票
            self.filter_uninv()
        # 设置为等权重
        self.position.to_percentage()
        # 如果需要市值加权,则市值加权
        if weight == 1:
            self.position.weighted_holding(self.strategy_data.stock_price.ix[
                'FreeMarketValue', self.position.holding_matrix.index, :])
        # 如果是因子加权, 则进行因子值加权
        elif weight == 2:
            # 看是否需要计算因子暴露, 用因子暴露值进行加权
            if use_factor_expo:
                if expo_weight == 1:
                    factor_weight = strategy_data.get_cap_wgt_exposure(
                        self.strategy_data.factor.iloc[0],
                        self.strategy_data.stock_price.ix['FreeMarketValue'])
                elif expo_weight == 0:
                    factor_weight = strategy_data.get_exposure(
                        self.strategy_data.factor.iloc[0])
            else:
                factor_weight = self.strategy_data.factor.iloc[0]
            # 进行因子值加权的权重计算
            self.position.weighted_holding(
                factor_weight.ix[self.position.holding_matrix.index, :])
        pass
Esempio n. 12
0
    def get_factor_return(self,
                          *,
                          holding_freq='m',
                          weights='default',
                          direction='+',
                          plot_cum=True,
                          start='default',
                          end='default'):
        # 如果没有price的数据,读入price数据,注意要shift,
        # 即本来的实现收益率应当是调仓日当天的开盘价,但这里计算调仓日前一个交易日的收盘价。
        if 'ClosePrice_adj' not in self.strategy_data.stock_price.items:
            temp_panel = data.read_data(['ClosePrice_adj'], ['ClosePrice_adj'],
                                        shift=True)
            self.strategy_data.stock_price['ClosePrice_adj'] = temp_panel.ix[
                'ClosePrice_adj']
        # 计算因子收益的频率
        holding_days = strategy.resample_tradingdays(self.strategy_data.stock_price.\
                                                     ix['FreeMarketValue', :, 0], freq=holding_freq)
        # 如果有指定,只取start和end之间的时间计算
        if start != 'default':
            holding_days = holding_days[start:]
        if end != 'default':
            holding_days = holding_days[:end]
        # 计算股票对数收益以及因子暴露
        holding_day_price = self.strategy_data.stock_price.ix['ClosePrice_adj',
                                                              holding_days, :]
        holding_day_return = np.log(
            holding_day_price.div(holding_day_price.shift(1)))
        holding_day_factor = self.strategy_data.factor.ix[0, holding_days, :]
        holding_day_factor_expo = strategy_data.get_cap_wgt_exposure(
            holding_day_factor,
            self.strategy_data.stock_price.ix['FreeMarketValue',
                                              holding_days, :])
        # 注意因子暴露要用前一期的数据
        holding_day_factor_expo = holding_day_factor_expo.shift(1)

        # 初始化因子收益序列以及估计量的t统计量序列
        factor_return_series = np.empty(holding_days.size) * np.nan
        t_stats_series = np.empty(holding_days.size) * np.nan
        self.factor_return_series = pd.Series(factor_return_series,
                                              index=holding_days)
        self.t_stats_series = pd.Series(t_stats_series, index=holding_days)

        # 进行回归,对调仓日进行循环
        for cursor, time in holding_days.iteritems():

            y = holding_day_return.ix[time, :]
            x = holding_day_factor_expo.ix[time, :]
            if y.isnull().all() or x.isnull().all():
                continue
            x = sm.add_constant(x)
            if weights is 'default':
                results = sm.WLS(y, x, missing='drop').fit()
            else:
                results = sm.WLS(y,
                                 x,
                                 weights=weights.ix[time],
                                 missing='drop').fit()
            self.factor_return_series.ix[time] = results.params[1]
            self.t_stats_series.ix[time] = results.tvalues[1]

        # 如果方向为负,则将因子收益和t统计量加个负号
        if direction == '-':
            self.factor_return_series = -self.factor_return_series
            self.t_stats_series = -self.t_stats_series

        # 输出的string
        tstats_sig_ratio = self.t_stats_series[
            np.abs(self.t_stats_series) >= 2].size / self.t_stats_series.size
        target_str = 'The average return of this factor: {0:.4f}%\n' \
                     'Note that the return of factor is not annualized but corresponding to the holding days interval\n' \
                     'The average t-statistics value: {1:.4f}\n' \
                     'Ratio of t_stats whose absolute value >= 2: {2:.2f}%\n'.format(
            self.factor_return_series.mean()*100, self.t_stats_series.mean(), tstats_sig_ratio*100
        )

        # 循环结束,输出结果
        print(target_str)
        with open(str(os.path.abspath('.')) + '/' +
                  self.strategy_data.stock_pool + '/performance.txt',
                  'a',
                  encoding='GB18030') as text_file:
            text_file.write(target_str)

        # 画图,默认画因子收益的累计收益图
        fx = plt.figure()
        ax = fx.add_subplot(1, 1, 1)
        zero_series = pd.Series(np.zeros(self.factor_return_series.shape),
                                index=self.factor_return_series.index)
        if plot_cum:
            plt.plot(self.factor_return_series.cumsum() * 100, 'b-')
        else:
            plt.plot(self.factor_return_series * 100, 'b-')
            plt.plot(zero_series, 'r-')
        ax.set_xlabel('Time')
        ax.set_ylabel('Return of The Factor (%)')
        ax.set_title('The Return Series of The Factor')
        plt.savefig(str(os.path.abspath('.')) + '/' +
                    self.strategy_data.stock_pool + '/' + 'FactorReturn.png',
                    dpi=1200)
        if type(self.pdfs) != str:
            plt.savefig(self.pdfs, format='pdf')

        fx = plt.figure()
        ax = fx.add_subplot(1, 1, 1)
        plt.plot(self.t_stats_series, 'b-')
        plt.plot(zero_series, 'r-')
        ax.set_xlabel('Time')
        ax.set_ylabel('T-Stats of The Factor Return')
        ax.set_title('The T-Stats Series of The Factor Return')
        plt.savefig(str(os.path.abspath('.')) + '/' +
                    self.strategy_data.stock_pool + '/' +
                    'FactorReturnTStats.png',
                    dpi=1200)
        if type(self.pdfs) != str:
            plt.savefig(self.pdfs, format='pdf')
Esempio n. 13
0
    def select_stocks_pure_factor(self,
                                  *,
                                  base_expo,
                                  cov_matrix='Empty',
                                  reg_weight='Empty',
                                  direction='+',
                                  benchmark_weight='Empty',
                                  is_long_only=True):
        # 计算因子值的暴露
        factor_expo = strategy_data.get_cap_wgt_exposure(
            self.strategy_data.factor.iloc[0],
            self.strategy_data.stock_price.ix['FreeMarketValue'])
        if direction == '-':
            factor_expo = -factor_expo
        self.strategy_data.factor_expo = pd.Panel(
            {'factor_expo': factor_expo},
            major_axis=self.strategy_data.factor.major_axis,
            minor_axis=self.strategy_data.factor.minor_axis)
        # 如果有benchmark,则计算benchmark的暴露
        if type(benchmark_weight) != str:
            benchmark_weight = (benchmark_weight.div(benchmark_weight.sum(1),
                                                     axis=0)).fillna(0)
            adjusted_base_expo = strategy_data.adjust_benchmark_related_expo(
                base_expo, benchmark_weight,
                self.strategy_data.if_tradable.ix['if_tradable'])
            benchmark_base_expo = np.einsum('ijk,jk->ji',
                                            adjusted_base_expo.fillna(0),
                                            benchmark_weight.fillna(0))
            benchmark_base_expo = pd.DataFrame(benchmark_base_expo,
                                               index=base_expo.major_axis,
                                               columns=base_expo.items)

            adjusted_factor_expo = strategy_data.adjust_benchmark_related_expo(
                pd.Panel({'factor_expo': factor_expo}), benchmark_weight,
                self.strategy_data.if_tradable.ix['if_tradable'])
            adjusted_factor_expo = adjusted_factor_expo.ix['factor_expo']
            benchmark_curr_factor_expo = (adjusted_factor_expo *
                                          benchmark_weight).sum(1)
            self.strategy_data.factor_expo.ix['factor_expo'] = factor_expo.sub(
                benchmark_curr_factor_expo, axis=0)

        # 循环调仓日
        for cursor, time in self.holding_days.iteritems():
            curr_factor_expo = self.strategy_data.factor_expo.ix['factor_expo',
                                                                 time, :]
            curr_base_expo = base_expo.ix[:, time, :]

            # 有协方差矩阵,优先用协方差矩阵
            if type(cov_matrix) != str:
                curr_v = cov_matrix.ix[time]
                curr_v_diag = curr_v.diagonal()
                # 去除有nan的数据
                all_data = pd.concat(
                    [curr_v_diag, curr_factor_expo, curr_base_expo], axis=1)
                all_data = all_data.dropna()
                # 如果有效数据小于等于1,当期不选股票
                if all_data.shape[0] <= 1:
                    continue
                # 指数中选股可能会出现一个行业暴露全是0的情况,所以关于这个行业的限制条件会冗余,于是要进行剔除
                all_data = all_data.replace(0, np.nan).dropna(
                    axis=1, how='all').fillna(0.0)
                curr_factor_expo = all_data.ix[:, 0]
                curr_v_diag = all_data.ix[:, 1]
                curr_base_expo = all_data.ix[:, 2:]
                curr_v = curr_v.reindex(index=curr_v_diag.index,
                                        columns=curr_v_diag.index)
            else:
                assert type(reg_weight) != str, 'The construction of pure factor portfolio require one of following:\n' \
                                                'Covariance matrix of factor returns (priority), OR \n' \
                                                'Regression weight when getting factor return using linear regression.\n'
                # 取当期的回归权重,每只股票的权重在对角线上
                curr_v_diag = reg_weight.ix[time]
                # 去除有nan的数据
                all_data = pd.concat(
                    [curr_v_diag, curr_factor_expo, curr_base_expo], axis=1)
                all_data = all_data.dropna()
                # 如果有效数据小于等于1,当期不选股票
                if all_data.shape[0] <= 1:
                    continue
                # 指数中选股可能会出现一个行业暴露全是0的情况,所以关于这个行业的限制条件会冗余,于是要进行剔除
                all_data = all_data.replace(0, np.nan).dropna(
                    axis=1, how='all').fillna(0.0)
                curr_v_diag = all_data.ix[:, 0]
                curr_factor_expo = all_data.ix[:, 1]
                curr_base_expo = all_data.ix[:, 2:]
                # 将回归权重归一化
                curr_v_diag = curr_v_diag / curr_v_diag.sum()
                curr_v = np.linalg.pinv(np.diag(curr_v_diag))
                curr_v = pd.DataFrame(curr_v,
                                      index=curr_factor_expo.index,
                                      columns=curr_factor_expo.index)

            # 设置其他因子为0的限制条件,在有基准的时候,设置为基准的暴露
            if type(benchmark_weight) != str:
                expo_target = benchmark_base_expo.ix[time].reindex(
                    index=curr_base_expo.columns)
            else:
                expo_target = pd.Series(0.0, index=curr_base_expo.columns)

            # 开始设置优化
            # P = V
            P = matrix(curr_v.as_matrix())
            # q = - (factor_expo.T)
            q = matrix(-curr_factor_expo.as_matrix().transpose())

            # 其他因为暴露为0,或等于基准的限制条件
            A = matrix(curr_base_expo.as_matrix().transpose())
            b = matrix(expo_target.as_matrix())

            solvers.options['show_progress'] = False

            # 如果只能做多,则每只股票的比例都必须大于等于0
            if is_long_only:
                long_only_constraint = pd.DataFrame(
                    -1.0 * np.eye(curr_factor_expo.size),
                    index=curr_factor_expo.index,
                    columns=curr_factor_expo.index)
                long_only_target = pd.Series(0.0, index=curr_factor_expo.index)

                G = matrix(long_only_constraint.as_matrix())
                h = matrix(long_only_target.as_matrix())

                # 解优化问题
                results = solvers.qp(P=P, q=q, A=A, b=b, G=G, h=h)
            else:
                results = solvers.qp(P=P, q=q, A=A, b=b)

            results_np = np.array(results['x']).squeeze()
            results_s = pd.Series(results_np, index=curr_factor_expo.index)
            # 重索引为所有股票代码
            results_s = results_s.reindex(
                self.strategy_data.stock_price.minor_axis, fill_value=0)

            # 股票持仓
            self.position.holding_matrix.ix[time] = results_s

        # 循环结束后,进行权重归一化
        self.position.to_percentage()
        pass
Esempio n. 14
0
    def get_table3(self,
                   *,
                   freq='w',
                   foldername=None,
                   startdate=None,
                   enddate=None):
        # 首先需要按照频率生成holdingdays
        self.generate_holding_days(holding_freq=freq,
                                   loc=-1,
                                   start_date=startdate,
                                   end_date=enddate)
        # 读取数据
        self.strategy_data.raw_data['rv3'] = data.read_data('runner_value_3')
        # 对rv3进行标准化
        self.strategy_data.raw_data[
            'rv3'] = strategy_data.get_cap_wgt_exposure(
                self.strategy_data.raw_data['rv3'],
                self.strategy_data.stock_price['FreeMarketValue'])

        self.strategy_data.stock_price['ClosePrice_adj'] = data.read_data(
            'ClosePrice_adj')
        self.strategy_data.stock_price['daily_return'] = \
            self.strategy_data.stock_price['ClosePrice_adj'].pct_change()
        # 按照频率算收益率, 和holdingdays同步, 论文用月, 我们一般用w
        r = self.strategy_data.stock_price['daily_return', startdate:enddate, :].\
            resample(freq).sum()
        # 注意, 回归的左边是未来一期的收益率, 因此要shift(-1), 即用到未来数据
        r = r.shift(-1).dropna(how='all')
        # 因为r的index为月末, 但是月末不一定是交易日, 因此将r的index重置为holding days
        r = r.set_index(self.holding_days)
        # 用于回归的右边
        reg_panel = pd.Panel({
            'lagged_ep':
            self.strategy_data.raw_data['rv3'].shift(0),
            'sue':
            self.strategy_data.raw_data['rv36'],
            'reversal':
            self.strategy_data.raw_data['rv8']
        })
        # 储存table3的结果
        table3 = pd.Panel(
            items=['coef', 't_stats'],
            major_axis=np.arange(5),
            minor_axis=['intercept', 'lagged_ep', 'sue', 'reversal'])
        # 使用holding days中的日期进行回归,
        # 1. 用lagged lbm回归
        results1 = strategy_data.fama_macbeth(
            r, reg_panel.ix[['lagged_ep'], self.holding_days, :])
        table3.ix['coef', 0, :] = results1[0]
        table3.ix['t_stats', 0, :] = results1[1]
        # 2. 使用bv return回归
        results2 = strategy_data.fama_macbeth(
            r, reg_panel.ix[['sue'], self.holding_days, :])
        table3.ix['coef', 1, :] = results2[0]
        table3.ix['t_stats', 1, :] = results2[1]
        # 3. 使用lagged return回归
        results3 = strategy_data.fama_macbeth(
            r, reg_panel.ix[['reversal'], self.holding_days, :])
        table3.ix['coef', 2, :] = results3[0]
        table3.ix['t_stats', 2, :] = results3[1]
        # 4. 使用lagged lbm与bv return回归
        results4 = strategy_data.fama_macbeth(
            r, reg_panel.ix[['lagged_ep', 'reversal'], self.holding_days, :])
        table3.ix['coef', 3, :] = results4[0]
        table3.ix['t_stats', 3, :] = results4[1]
        # 5. 使用lagged lbm, bv return, lagged return一起回归
        results5 = strategy_data.fama_macbeth(
            r,
            reg_panel.ix[['lagged_ep', 'sue', 'reversal'],
                         self.holding_days, :],
            nw_lags=0)
        table3.ix['coef', 4, :] = results5[0]
        table3.ix['t_stats', 4, :] = results5[1]

        # # 储存信息
        # if foldername is None:
        #     table3.ix['coef'].to_csv(str(os.path.abspath('.')) + '/' + self.strategy_data.stock_pool +
        #                                  '/' + 'Table3_coef.csv', na_rep='N/A', encoding='GB18030')
        #     table3.ix['t_stats'].to_csv(str(os.path.abspath('.')) + '/' + self.strategy_data.stock_pool +
        #                                     '/' + 'Table3_t_stats.csv', na_rep='N/A', encoding='GB18030')
        # else:
        #     table3.ix['coef'].to_csv(foldername +
        #                              '/' + 'Table3_coef.csv', na_rep='N/A', encoding='GB18030')
        #     table3.ix['t_stats'].to_csv(foldername +
        #                                 '/' + 'Table3_t_stats.csv', na_rep='N/A', encoding='GB18030')
        # pass

        # 尝试使用linearmodels包
        reg_panel['const'] = 1.0
        fm_result1 = FamaMacBeth(
            r, reg_panel.ix[['lagged_ep', 'const'],
                            self.holding_days, :]).fit()
        fm_result2 = FamaMacBeth(
            r, reg_panel.ix[['sue', 'const'], self.holding_days, :]).fit()
        fm_result3 = FamaMacBeth(
            r, reg_panel.ix[['reversal', 'const'],
                            self.holding_days, :]).fit()
        fm_result4 = FamaMacBeth(
            r, reg_panel.ix[['reversal', 'lagged_ep', 'const'],
                            self.holding_days, :]).fit()
        fm_result5 = FamaMacBeth(
            r, reg_panel.ix[['reversal', 'sue', 'const'],
                            self.holding_days, :]).fit()
        fm_result6 = FamaMacBeth(r, reg_panel.ix[:,
                                                 self.holding_days, :]).fit()

        pass