Python read_data 예제들, data.data.read_data Python 예제들

예제 #1

0

파일 보기

파일: pa_report_db.py 프로젝트: wukan1986/mfm-platform

    def update_holdng_tar_data(self, *, end_date=None):
        self.is_update = True
        # 读取旧的目标持仓数据
        old_tar_holding_vol = data.read_data('tar_holding_vol', folder_name='')
        old_tar_position = data.read_data('tar_position', folder_name='')

        # 寻找最后一天
        last_day = old_tar_holding_vol.major_axis[-1]
        self.start_date = last_day
        if isinstance(end_date, pd.Timestamp):
            self.end_date = end_date

        # Initialize database engines and get tradingdays and labels
        # since when you call this function, it usually means you didn't call update_data_from_db()
        self.get_trading_days()
        self.get_labels()
        # 计算更新时间段的目标持仓数据
        self.get_tar_holding_vol()
        self.get_tar_position()
        # 将旧数据中的股票数据重索引成新数据中的股票数据
        old_tar_holding_vol = old_tar_holding_vol.reindex(minor_axis=self.tar_holding_vol.minor_axis, fill_value=0)
        old_tar_position = old_tar_position.reindex(minor_axis=self.tar_position.minor_axis, fill_value=0)
        # 将新旧数据衔接
        new_tar_holding_vol = pd.concat([old_tar_holding_vol.drop(self.start_date, axis=1).sort_index(),
                                         self.tar_holding_vol.sort_index()], axis=1)
        new_tar_position = pd.concat([old_tar_position.drop(self.start_date, axis=1).sort_index(),
                                      self.tar_position.sort_index()], axis=1)
        # 储存新数据
        data.write_data(new_tar_holding_vol, file_name='tar_holding_vol', folder_name='')
        data.write_data(new_tar_position, file_name='tar_position', folder_name='')
        # 重置标记
        self.is_update = False

예제 #2

0

파일 보기

파일: residual_income.py 프로젝트: wukan1986/mfm-platform

    def get_discount_factor(self):
        # 读取beta
        beta = data.read_data(['beta'])
        beta = beta['beta']
        # 读取行业数据
        indus = data.read_data(['Industry'])
        indus = indus['Industry']
        # 计算每天的行业平均beta
        indus_mean_beta = pd.DataFrame(np.nan, index=beta.index, columns=indus.iloc[-1, :].unique())
        for cursor, time in enumerate(beta.index):
            curr_beta = beta.ix[time, :]
            curr_indus = indus.ix[time, :]
            indus_mean_beta.ix[time, :] = curr_beta.groupby(curr_indus).mean()

        # 将每天的行业平均beta根据每支股票的行业分配到每支股票上去
        stocks_with_indus_mean_beta = beta * np.nan
        for cursor, time in enumerate(beta.index):
            if time >= pd.Timestamp('2009-04-01'):
                curr_indus = indus.ix[time, :]
                curr_indus_mean_beta = indus_mean_beta.ix[time, :]
                stocks_with_indus_mean_beta.ix[time, :] = curr_indus.replace(curr_indus_mean_beta.to_dict())
                pass

        # 由于使用的是日beta, 因此算一个过去252个交易日的平均beta作为年beta
        stocks_with_indus_mean_beta_annual = stocks_with_indus_mean_beta.rolling(
            252, min_periods=63).apply(lambda x:np.nanmean(x))
        # 暂时设无风险利率为0
        # 论文中, 市场的超额收益直接设定在了6%的常数, 这个的合理性还有待检验
        # 还可以考虑使用历史数据得出市场超额收益等, 现在先暂时使用6%这个常数
        discount_factor = stocks_with_indus_mean_beta_annual * 0.06
        self.discount_factor = discount_factor
        pass

예제 #3

0

파일 보기

파일: intangible_info.py 프로젝트: quanttrade/mfm-platform

    def construct_factor(self):
        price_data = data.read_data(['ClosePrice_adj', 'OpenPrice_adj', 'vwap_adj'],
                                    ['ClosePrice_adj', 'OpenPrice_adj', 'vwap_adj'],
                                    shift=True)
        ret = np.log(price_data['ClosePrice_adj'] / price_data['ClosePrice_adj'].shift(1))
        ret = ret.fillna(0)
        exp_w = barra_base.construct_expo_weights(126, 504)
        mom = ret.rolling(504).apply(lambda x: (x * exp_w).sum())

        # mom = data.read_data(['runner_value_8'], shift=True)
        # mom = - mom['runner_value_8']

        bb = data.read_data(['rv', 'liquidity', 'lncap', 'runner_value_36'], shift=True)
        # bb = data.read_data(['runner_value_36'], shift=True)

        # 过滤数据
        self.strategy_data.handle_stock_pool()
        mom = mom.where(self.strategy_data.if_tradable.ix['if_inv'], np.nan)
        for item, df in bb.iteritems():
            bb[item] = df.where(self.strategy_data.if_tradable.ix['if_inv'], np.nan)

        # 进行回归
        # orth_mom = strategy_data.simple_orth_gs(mom, bb)
        # orth_mom = - orth_mom[0]

        self.strategy_data.factor = pd.Panel({'mom':-mom})

예제 #4

0

파일 보기

파일: strategy_data.py 프로젝트: rlcjj/mfm-platform

    def handle_stock_pool(self, *, shift=False):
        # 如果未设置股票池
        if self.stock_pool == 'all':
            self.if_tradable['if_inpool'] = True
        # 设置了股票池，若已存在benchmark中的weight，则直接使用
        elif 'Weight_' + self.stock_pool in self.benchmark_price.items:
            self.if_tradable['if_inpool'] = self.benchmark_price.ix[
                'Weight_' + self.stock_pool] > 0
        # 若不在，则读取weight数据，文件名即为stock_pool
        else:
            temp_weights = data.read_data(['Weight_' + self.stock_pool],
                                          ['Weight_' + self.stock_pool],
                                          shift=shift)
            if self.benchmark_price.empty:
                self.benchmark_price = temp_weights
            else:
                self.benchmark_price['Weight_' +
                                     self.stock_pool] = temp_weights[
                                         'Weight_' + self.stock_pool]
            self.if_tradable['if_inpool'] = self.benchmark_price.ix[
                'Weight_' + self.stock_pool] > 0

        # 若还没有if_tradable，报错
        assert 'if_tradable' in self.if_tradable.items, 'Please generate if_tradable first!'

        # 新建一个if_inv，表明在股票池中，且可以交易
        # 在if_tradable中为true，且在if_inpool中为true，才可投资，即在if_inv中为true
        self.if_tradable['if_inv'] = np.logical_and(
            self.if_tradable.ix['if_tradable'],
            self.if_tradable.ix['if_inpool'])

예제 #5

0

파일 보기

파일: barra_base.py 프로젝트: rlcjj/mfm-platform

    def get_growth(self):
        if os.path.isfile('growth.csv') and not self.is_update:
            growth = data.read_data(['growth'], ['growth'])
            self.bb_data.factor['growth'] = growth.ix['growth']
        else:
            self.get_g_egrlf()
            self.get_g_egrsf()
            self.get_g_egro()
            self.get_g_sgro()
            self.bb_data.discard_uninv_data()
            # 计算四个成分因子的暴露
            self.bb_data.raw_data[
                'egrlf_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['egrlf'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])
            self.bb_data.raw_data[
                'egrsf_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['egrsf'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])
            self.bb_data.raw_data[
                'egro_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['egro'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])
            self.bb_data.raw_data[
                'sgro_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['sgro'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])

            growth = 0.18*self.bb_data.raw_data.ix['egrlf_expo']+0.11*self.bb_data.raw_data.ix['egrsf_expo']+ \
                             0.24*self.bb_data.raw_data.ix['egro_expo']+0.47*self.bb_data.raw_data.ix['sgro_expo']
            self.bb_data.factor['growth'] = growth

예제 #6

0

파일 보기

    def handle_stock_pool(self, *, shift=False):
        # 如果未设置股票池
        if self.stock_pool == 'all':
            self.if_tradable['if_inpool'] = True
        # 设置了股票池，若已存在benchmark中的weight，则直接使用
        elif 'Weight_' + self.stock_pool in self.benchmark_price.items:
            self.if_tradable['if_inpool'] = self.benchmark_price.ix[
                'Weight_' + self.stock_pool] > 0
        # 若不在，则读取weight数据，文件名即为stock_pool
        else:
            temp_weights = data.read_data(['Weight_' + self.stock_pool],
                                          ['Weight_' + self.stock_pool],
                                          shift=shift).fillna(0.0)
            if self.benchmark_price.empty:
                self.benchmark_price = temp_weights
            else:
                self.benchmark_price['Weight_' +
                                     self.stock_pool] = temp_weights[
                                         'Weight_' + self.stock_pool]
            # 由于指数权重数据会跟1有一点点偏离, 因此要将其归一化
            self.benchmark_price['Weight_'+self.stock_pool] = self.benchmark_price['Weight_'+self.stock_pool]. \
                apply(position.to_percentage_func, axis=1)
            # 指数权重大于0的股票, 即为在指数内的股票
            self.if_tradable['if_inpool'] = self.benchmark_price.ix[
                'Weight_' + self.stock_pool] > 0

        # 若还没有if_tradable，则生成if_tradable
        if 'if_tradable' not in self.if_tradable.items:
            self.generate_if_tradable(shift=shift)

        # 新建一个if_inv，表明在股票池中，且可以交易
        # 在if_tradable中为true，且在if_inpool中为true，才可投资，即在if_inv中为true
        self.if_tradable['if_inv'] = np.logical_and(
            self.if_tradable.ix['if_tradable'],
            self.if_tradable.ix['if_inpool'])

예제 #7

0

파일 보기

파일: xy_base.py 프로젝트: wukan1986/mfm-platform

    def get_short_reversal(self):
        if os.path.isfile(os.path.abspath('.')+'/ResearchData/short_rev'+self.filename_appendix) \
                and not self.is_update and self.try_to_read:
            self.base_data.factor['short_rev'] = data.read_data(
                ['short_rev' + self.filename_appendix],
                item_name=['short_rev'])
        # 没有就进行计算
        else:
            # rolling求sum, 21个交易日, 半衰期为10天
            exponential_weights = strategy_data.construct_expo_weights(10, 21)

            # 定义reversal的函数
            def func_rev(df, *, weights):
                iweights = pd.Series(weights, index=df.index)
                # 将权重乘在原始数据上, 然后加和计算reversal
                weighted_return = strategy_data.multiply_weights(
                    df, iweights, multiply_power=1.0)
                rev = weighted_return.sum(0)
                # 设定阈值, 表示至少过去21个交易日中有多少数据才能有momentum因子
                threshold_condition = df.notnull().sum(0) >= 5
                rev = rev.where(threshold_condition, np.nan)
                return rev

            reversal = self.base_data.stock_price.ix[
                'daily_excess_log_return'] * np.nan
            for cursor, date in enumerate(self.complete_base_data.stock_price. \
                                                  ix['daily_excess_log_return'].index):
                # 至少第21期才开始计算
                if cursor < 20:
                    continue
                curr_data = self.complete_base_data.stock_price.ix[
                    'daily_excess_log_return', cursor - 20:cursor + 1, :]
                temp = func_rev(curr_data, weights=exponential_weights)
                reversal.ix[cursor, :] = temp
            self.base_data.factor['short_rev'] = reversal

예제 #8

0

파일 보기

파일: barra_base.py 프로젝트: rlcjj/mfm-platform

    def get_earnings_yeild(self):
        if os.path.isfile('ey.csv') and not self.is_update:
            EarningsYield = data.read_data(['ey'], ['ey'])
            self.bb_data.factor['ey'] = EarningsYield.ix['ey']
        else:
            self.get_ey_epfwd()
            self.get_ey_cetop()
            self.get_ey_etop()
            self.bb_data.discard_uninv_data()
            # 计算三个成分因子的暴露
            self.bb_data.raw_data[
                'epfwd_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['epfwd'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])
            self.bb_data.raw_data[
                'cetop_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['cetop'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])
            self.bb_data.raw_data[
                'etop_expo'] = strategy_data.get_cap_wgt_exposure(
                    self.bb_data.raw_data.ix['etop'],
                    self.bb_data.stock_price.ix['FreeMarketValue'])

            EarningsYield = 0.68*self.bb_data.raw_data.ix['epfwd_expo']+0.21*self.bb_data.raw_data.ix['cetop_expo']+ \
                                0.11*self.bb_data.raw_data.ix['etop_expo']
            self.bb_data.factor['ey'] = EarningsYield

예제 #9

0

파일 보기

파일: barra_base.py 프로젝트: rlcjj/mfm-platform

 def get_industry_factor(self):
     # 读取行业信息数据
     industry = data.read_data(['Industry'], ['Industry'])
     self.industry = industry.ix['Industry']
     # 对第一个拥有所有行业的日期取虚拟变量，以建立储存数据的panel
     industry_num = self.industry.apply(lambda x: x.unique().size, axis=1)
     # 注意所有行业28个，加上nan有29个
     first_valid_index = industry_num[industry_num == 29].index[0]
     temp_dum = pd.get_dummies(self.industry.ix[first_valid_index],
                               prefix='Industry')
     industry_dummies = pd.Panel(data=None,
                                 major_axis=temp_dum.index,
                                 minor_axis=temp_dum.columns)
     # 开始循环
     for time, ind_data in self.industry.iterrows():
         industry_dummies[time] = pd.get_dummies(ind_data,
                                                 prefix='Industry')
     # 转置
     industry_dummies = industry_dummies.transpose(2, 0, 1)
     # 将行业因子暴露与风格因子暴露的索引对其
     industry_dummies = data.align_index(self.bb_data.factor_expo.ix[0],
                                         industry_dummies)
     # 将nan填成0，主要是有些行业在某一时间点，没有一只股票属于它，这会造成在这个行业上的暴露是nan
     # 因此需要把这个行业的暴露填成0，而uninv的nan同样会被填上，但会在之后的filter中再次变成nan
     industry_dummies = industry_dummies.fillna(0)
     # 将行业因子暴露与风格因子暴露衔接在一起
     self.bb_data.factor_expo = pd.concat(
         [self.bb_data.factor_expo, industry_dummies])

예제 #10

0

파일 보기

파일: pa_report_generator.py 프로젝트: quanttrade/mfm-platform

 def prepare_benchmark(self):
     self.benchmarks = data.read_data(
         ['Weihgt_sz50', 'Weight_hs300', 'Weight_zz500'])
     # 归一化
     for i, df in self.benchmarks.iteritems():
         self.benchmarks[i] = df.div(df.sum(1), axis=0)
     self.benchmarks.fillna(0.0)

예제 #11

0

파일 보기

파일: barra_base.py 프로젝트: rlcjj/mfm-platform

    def get_ey_epfwd(self):
        if os.path.isfile('epfwd.csv') and not self.is_update:
            epfwd = data.read_data(['epfwd'], ['epfwd'])
            epfwd = epfwd.ix['epfwd']
        else:
            # 定义计算epfwd的函数
            def epfwd_func(fy1_data, fy2_data):
                # 获取当前的月份数
                curr_month = fy1_data.index.month
                # 获取fy1数据与fy2数据的权重，注意：财年是以4月份结束的
                # 因此5月份时，全部用fy1数据，其权重为1，fy2权重为0
                # 4月份时，fy1权重为1/12， fy2权重为11/12
                # 6月份时，fy1权重为11/12，fy2权重为1/12
                # 当前月份与5月的差距
                diff_month = curr_month - 5
                fy1_weight = np.where(diff_month >= 0, (12 - diff_month) / 12,
                                      -diff_month / 12)
                # fy1_weight为一个ndarray，将它改为series
                fy1_weight = pd.Series(fy1_weight, index=fy1_data.index)
                fy2_weight = 1 - fy1_weight
                return (fy1_data.mul(fy1_weight, axis=0) +
                        fy2_data.mul(fy2_weight, axis=0))

            # 用预测的净利润数据除以市值数据得到预测的ep
            ep_fy1 = self.bb_data.raw_data.ix[
                'NetIncome_fy1'] / self.bb_data.stock_price.ix[
                    'FreeMarketValue']
            ep_fy2 = self.bb_data.raw_data.ix[
                'NetIncome_fy2'] / self.bb_data.stock_price.ix[
                    'FreeMarketValue']
            epfwd = epfwd_func(ep_fy1, ep_fy2)
        self.bb_data.raw_data['epfwd'] = epfwd

예제 #12

0

파일 보기

파일: performance_attribution.py 프로젝트: rlcjj/mfm-platform

    def get_pa_return(self, *, discard_factor=[], enable_reading_pa_return=True):
        # 如果有储存的因子收益, 且没有被丢弃的因子, 则读取储存在本地的因子
        if os.path.isfile('bb_factor_return_'+self.bb.bb_data.stock_pool+'.csv') and \
                        len(discard_factor) == 0 and enable_reading_pa_return:
            bb_factor_return = data.read_data(['bb_factor_return_'+self.bb.bb_data.stock_pool], ['pa_returns'])
            self.pa_returns = bb_factor_return['pa_returns']
            print('Barra base factor returns successfully read from local files! \n')
        else:
            # 将被删除的风格因子的暴露全部设置为0
            self.bb.bb_data.factor_expo.ix[discard_factor, :, :] = 0
            # 再次将不能交易的值设置为nan
            self.bb.bb_data.discard_uninv_data()
            # 建立储存因子收益的dataframe
            self.pa_returns = pd.DataFrame(0, index=self.bb.bb_data.factor_expo.major_axis,
                                           columns = self.bb.bb_data.factor_expo.items)
            # 计算barra base因子的因子收益
            self.bb.get_bb_factor_return()
            # barra base因子的因子收益即是归因的因子收益
            self.pa_returns = self.bb.bb_factor_return

            # 将回归得到的因子收益储存在本地, 每次更新了新的数据都要重新回归后储存一次
            self.pa_returns.to_csv('bb_factor_return_'+self.bb.bb_data.stock_pool+'.csv',
                                   index_label='datetime', na_rep='NaN', encoding='GB18030')

        # 将pa_returns的时间轴改为业绩归因的时间轴（而不是bb的时间轴）
        self.pa_returns = self.pa_returns.reindex(self.pa_position.holding_matrix.index)

예제 #13

0

파일 보기

파일: barra_base.py 프로젝트: rlcjj/mfm-platform

    def get_rv_cmra(self):
        if os.path.isfile('cmra.csv') and not self.is_update:
            cmra = data.read_data(['cmra'], ['cmra'])
            cmra = cmra.ix['cmra']
        else:
            # 定义需要cmra的函数，这个函数计算252个交易日中的cmra
            def func_cmra(df):
                # 累计收益率
                cum_df = df.cumsum(axis=0)
                # 取每月的累计收益率
                months = np.arange(20, 252, 21)
                months_cum_df = cum_df.ix[months]
                z_max = months_cum_df.max(axis=0)
                z_min = months_cum_df.min(axis=0)
                #                # 避免出现log函数中出现非正参数
                #                z_min[z_min <= -1] = -0.9999
                #                return np.log(1+z_max)-np.log(1+z_min)
                # 为避免出现z_min<=-1调整后的极端值，cmra改为z_max-z_min
                # 注意：改变后并未改变因子排序，而是将因子原本的scale变成了exp(scale)
                return z_max - z_min

            cmra = self.bb_data.stock_price.ix['daily_excess_return'] * np.nan
            for cursor, date in enumerate(
                    self.bb_data.stock_price.ix['daily_excess_return'].index):
                # 至少252期才开始计算
                if cursor <= 250:
                    continue
                curr_data = self.bb_data.stock_price.ix['daily_excess_return',
                                                        cursor - 251:cursor +
                                                        1, :]
                temp = func_cmra(curr_data)
                cmra.ix[cursor, :] = temp
        self.bb_data.raw_data['cmra'] = cmra

예제 #14

0

파일 보기

파일: barra_base.py 프로젝트: rlcjj/mfm-platform

    def get_rv_dastd(self):
        if os.path.isfile('dastd.csv') and not self.is_update:
            dastd = data.read_data(['dastd'], ['dastd'])
            dastd = dastd.ix['dastd']
        else:
            # rolling后求std，252个交易日，42的半衰期
            exponential_weights = barra_base.construct_expo_weights(42, 252)

            # 定义dastd的函数
            def func_dastd(df, *, weights):
                iweights = pd.Series(weights, index=df.index)
                return df.mul(iweights, axis=0).std(0)

            dastd = self.bb_data.stock_price.ix['daily_excess_return'] * np.nan
            for cursor, date in enumerate(
                    self.bb_data.stock_price.ix['daily_excess_return'].index):
                # 至少252期才开始计算
                if cursor <= 250:
                    continue
                curr_data = self.bb_data.stock_price.ix['daily_excess_return',
                                                        cursor - 251:cursor +
                                                        1, :]
                temp = func_dastd(curr_data, weights=exponential_weights)
                dastd.ix[cursor, :] = temp

        self.bb_data.raw_data['dastd'] = dastd

예제 #15

0

파일 보기

파일: barra_base.py 프로젝트: rlcjj/mfm-platform

    def get_momentum(self):
        if os.path.isfile('momentum.csv') and not self.is_update:
            momentum = data.read_data(['momentum'], ['momentum'])
            self.bb_data.factor['momentum'] = momentum.ix['momentum']
        else:
            # 计算momentum因子
            # 首先数据有一个21天的lag
            lag_return = self.bb_data.stock_price.ix[
                'daily_excess_return'].shift(21)
            # rolling后求sum，504个交易日，126的半衰期
            exponential_weights = barra_base.construct_expo_weights(126, 504)

            # 定义momentum的函数
            def func_mom(df, *, weights):
                iweights = pd.Series(weights, index=df.index)
                return df.mul(iweights, axis=0).sum(0)

            momentum = self.bb_data.stock_price.ix[
                'daily_excess_return'] * np.nan
            for cursor, date in enumerate(lag_return.index):
                # 至少504+21期才开始计算
                if cursor <= (502 + 21):
                    continue
                curr_data = lag_return.ix[cursor - 503:cursor + 1, :]
                temp = func_mom(curr_data, weights=exponential_weights)
                momentum.ix[cursor, :] = temp
            self.bb_data.factor['momentum'] = momentum
            pass

예제 #16

0

파일 보기

파일: sf_strategy_test.py 프로젝트: wukan1986/mfm-platform

def sf_test_multiple_pools(factor=None, sf_obj=single_factor_strategy(), *, direction='+', bb_obj=None,
                           discard_factor=(), folder_names=None, holding_freq='w', benchmarks=None,
                           stock_pools=('all', 'hs300', 'zz500', 'zz800'), bkt_start=None, bkt_end=None,
                           select_method=0, do_bb_pure_factor=False, do_pa=False, do_active_pa=False,
                           do_data_description=False, do_factor_corr_test=False, loc=-1):
    # 打印当前测试的策略名称
    print('Name Of Strategy Under Test: {0}\n'.format(sf_obj.__class__.__name__))

    cp_adj = data.read_data('ClosePrice_adj')
    temp_position = position(cp_adj)
    # 先要初始化bkt对象
    bkt_obj = backtest(temp_position, bkt_start=bkt_start, bkt_end=bkt_end, buy_cost=0.0015,
                       sell_cost=0.0015, bkt_stock_data=['ClosePrice_adj', 'ClosePrice_adj'])
    # 建立bb对象，否则之后每次循环都要建立一次新的bb对象
    if bb_obj is None:
        bb_obj = barra_base()
    # 外部传入的bb对象，要检测其股票池是否为all，如果不是all，则输出警告，因为可能丢失了数据
    elif bb_obj.bb_data.stock_pool != 'all':
        print('The stockpool of the barra_base obj from outside is NOT "all", be aware of possibile'
              'data loss due to this situation!\n')

    # 根据股票池进行循环
    for cursor, stock_pool in enumerate(stock_pools):
        # 进行当前股票池下的单因子测试
        # 注意bb obj进行了一份深拷贝，这是因为在业绩归因的计算中，会根据不同的股票池丢弃数据，导致数据不全，因此不能传引用
        # 对bkt obj做了同样的处理，尽管这里并不是必要的
        sf_obj.single_factor_test(factor=factor, loc=loc, direction=direction, bkt_obj=copy.deepcopy(bkt_obj),
            base_obj=copy.deepcopy(bb_obj), discard_factor=discard_factor,
            folder_name=folder_names[cursor], bkt_start=bkt_start, bkt_end=bkt_end,
            holding_freq=holding_freq, benchmark=benchmarks[cursor], stock_pool=stock_pool,
            select_method=select_method, do_base_pure_factor=do_bb_pure_factor,
            do_pa=do_pa, do_active_pa=do_active_pa, do_data_description=do_data_description,
            do_factor_corr_test=do_factor_corr_test)

예제 #17

0

파일 보기

파일: barra_base.py 프로젝트: rlcjj/mfm-platform

 def get_g_egro(self):
     if os.path.isfile('egro.csv') and not self.is_update:
         egro = data.read_data(['egro'], ['egro'])
         egro = egro.ix['egro']
     else:
         # 用ni ttm的两年增长率代替ni ttm的5年增长率
         egro = self.bb_data.raw_data.ix['NetIncome_ttm_growth_8q']
     self.bb_data.raw_data['egro'] = egro

예제 #18

0

파일 보기

파일: barra_base.py 프로젝트: rlcjj/mfm-platform

 def get_ey_etop(self):
     if os.path.isfile('etop.csv') and not self.is_update:
         etop = data.read_data(['etop'], ['etop'])
         etop = etop.ix['etop']
     else:
         # 用pe_ttm的倒数来计算etop
         etop = 1 / self.bb_data.raw_data.ix['PE_ttm']
     self.bb_data.raw_data['etop'] = etop

예제 #19

0

파일 보기

파일: barra_base.py 프로젝트: rlcjj/mfm-platform

 def get_g_sgro(self):
     if os.path.isfile('sgro.csv') and not self.is_update:
         sgro = data.read_data(['sgro'], ['sgro'])
         sgro = sgro.ix['sgro']
     else:
         # 用历史营业收入代替历史sales per share
         sgro = self.bb_data.raw_data.ix['Revenue_ttm_growth_8q']
     self.bb_data.raw_data['sgro'] = sgro

예제 #20

0

파일 보기

파일: intangible_info.py 프로젝트: quanttrade/mfm-platform

 def prepare_data(self, *, price='ClosePrice'):
     intangible_info.prepare_data(self, price=price)
     add_data = data.read_data(['NetIncome_ttm', 'runner_value_36'], ['NetIncome_ttm', 'sue'],
                               shift=True)
     self.strategy_data.raw_data['NetIncome_ttm'] = add_data['NetIncome_ttm']
     self.strategy_data.raw_data['sue'] = add_data['sue']
     self.strategy_data.raw_data['ep_ttm'] = self.strategy_data.raw_data['NetIncome_ttm']/\
         self.strategy_data.raw_data['FreeMarketValue']

예제 #21

0

파일 보기

파일: pa_report_generator.py 프로젝트: quanttrade/mfm-platform

 def get_tar_holding_position(self):
     self.tar_holding_vol = pd.read_hdf('tar_holding_vol', '123')
     cp = data.read_data(['ClosePrice'], shift=True).iloc[0]
     holding_value = self.tar_holding_vol.mul(cp, axis=0)
     # 归一化
     self.tar_position = self.tar_holding_vol * np.nan
     for strg, holding in holding_value.iteritems():
         self.tar_position[strg] = holding.div(holding.sum(1), axis=0)
     self.tar_position.fillna(0.0)

예제 #22

0

파일 보기

파일: barra_base.py 프로젝트: rlcjj/mfm-platform

 def get_g_egrlf(self):
     if os.path.isfile('egrlf.csv') and not self.is_update:
         egrlf = data.read_data(['egrlf'], ['egrlf'])
         egrlf = egrlf.ix['egrlf']
     else:
         # 用ni_fy2来代替长期预测的净利润
         egrlf = (self.bb_data.raw_data.ix['NetIncome_fy2'] /
                  self.bb_data.raw_data.ix['NetIncome_ttm'])**(1 / 2) - 1
     self.bb_data.raw_data['egrlf'] = egrlf

예제 #23

0

파일 보기

파일: backtest.py 프로젝트: rlcjj/mfm-platform

    def reset_bkt_benchmark(self, new_bkt_benchmark_data):
        self.bkt_data.benchmark_price = data.read_data(new_bkt_benchmark_data, ['ClosePrice_adj'])

        # 将benchmark price数据期调整为回测期
        self.bkt_data.benchmark_price = data.align_index(self.tar_pct_position.holding_matrix,
                                                         self.bkt_data.benchmark_price, axis='major')

        # 重置回测数据
        self.reset_bkt_data()

예제 #24

0

파일 보기

파일: barra_base.py 프로젝트: rlcjj/mfm-platform

 def get_g_egrsf(self):
     if os.path.isfile('egrsf.csv') and not self.is_update:
         egrsf = data.read_data(['egrsf'], ['egrsf'])
         egrsf = egrsf.ix['egrsf']
     else:
         # 用ni_fy1来代替短期预测净利润
         egrsf = self.bb_data.raw_data.ix[
             'NetIncome_fy1'] / self.bb_data.raw_data.ix['NetIncome_ttm'] - 1
     self.bb_data.raw_data['egrsf'] = egrsf

예제 #25

0

파일 보기

파일: barra_base.py 프로젝트: rlcjj/mfm-platform

 def get_leverage(self):
     if os.path.isfile('leverage.csv') and not self.is_update:
         leverage = data.read_data(['leverage'], ['leverage'])
         self.bb_data.factor['leverage'] = leverage.ix['leverage']
     else:
         # 用简单的资产负债率计算leverage
         leverage = self.bb_data.raw_data.ix[
             'TotalLiability'] / self.bb_data.raw_data.ix['TotalAssets']
         self.bb_data.factor['leverage'] = leverage

예제 #26

0

파일 보기

 def __init__(self):
     strategy.__init__(self)
     # 每个因子策略都需要用到是否可交易的数据
     self.strategy_data.generate_if_tradable(shift=True)
     # 读取市值数据以进行市值加权
     self.strategy_data.stock_price = data.read_data(['FreeMarketValue'],
                                                     ['FreeMarketValue'],
                                                     shift=True)
     # 用来画图的pdf对象
     self.pdfs = 'default'

예제 #27

0

파일 보기

파일: barra_base.py 프로젝트: rlcjj/mfm-platform

 def get_ey_cetop(self):
     if os.path.isfile('cetop.csv') and not self.is_update:
         cetop = data.read_data(['cetop'], ['cetop'])
         cetop = cetop.ix['cetop']
     else:
         # 用cash earnings ttm 除以市值
         cetop = self.bb_data.raw_data.ix[
             'CashEarnings_ttm'] / self.bb_data.stock_price.ix[
                 'FreeMarketValue']
     self.bb_data.raw_data['cetop'] = cetop

예제 #28

0

파일 보기

파일: multi_factor_strategy.py 프로젝트: quanttrade/mfm-platform

    def get_stock_alpha(self):
        # 读取储存好的runner value数据
        runner_value = pd.read_hdf('runner_value', '123')
        # 投资域为沪深300
        self.strategy_data.stock_pool = 'zz500'
        self.strategy_data.handle_stock_pool()
        # 读取行业数据
        industry = data.read_data(['Industry'])
        industry = industry['Industry']
        runner_value = runner_value.reindex(major_axis=industry.index,
                                            minor_axis=industry.columns)
        # 过滤沪深300外的数据
        runner_value = runner_value.apply(lambda x: pd.DataFrame(
            np.where(self.strategy_data.if_tradable['if_inv'], x, np.nan),
            index=x.index,
            columns=x.columns),
                                          axis=(1, 2))
        industry = industry.where(self.strategy_data.if_tradable['if_inv'],
                                  np.nan)

        # 定义行业内winsorize以及标准化的函数, 传入的数据是index为股票, columns为因子的dataframe
        def expo_within_indus_func(raw_data):
            # 首先做winsorize
            lower = raw_data.quantile(0.01)
            upper = raw_data.quantile(0.99)
            new_data = np.where(raw_data >= lower, raw_data, lower)
            new_data = np.where(raw_data <= upper, new_data, upper)
            new_data = np.where(raw_data.isnull(), np.nan, new_data)
            new_data = pd.DataFrame(new_data,
                                    index=raw_data.index,
                                    columns=raw_data.columns)
            # 然后做标准化
            expo = new_data.sub(new_data.mean(), axis=1).div(new_data.std(),
                                                             axis=1)

            return expo

        expo_within_indus = runner_value * np.nan
        # 接下来需要计算因子暴露, 用因子暴露的均值来做alpha, 因子暴露要算行业内的暴露, 以保证算法一致
        for cursor, time in enumerate(runner_value.major_axis):
            curr_data = runner_value.ix[:, time, :]
            expo = curr_data.groupby(
                industry.ix[time, :]).apply(expo_within_indus_func)
            expo_within_indus.ix[:, time, :] = expo
        # 循环结束后, 计算股票的alpha, 即因子暴露加和
        stock_alpha = expo_within_indus.sum(axis=0)
        # 将不可投资的部分改为nan
        stock_alpha = stock_alpha.where(
            self.strategy_data.if_tradable['if_inv'], np.nan)
        # 储存算出的股票alpha
        stock_alpha.to_hdf('stock_alpha_zz500', '123')
        # stock_alpha= expo_within_indus.apply(lambda x: x.where(self.strategy_data.if_tradable['if_inv'], np.nan),
        #                                      axis=(1, 2))
        # stock_alpha.to_hdf('stock_alpha_zz500_split', '123')
        pass

예제 #29

0

파일 보기

파일: barra_base.py 프로젝트: rlcjj/mfm-platform

 def get_rv_hsigma(self):
     if os.path.isfile('hsigma.csv') and not self.is_update:
         hsigma = data.read_data(['hsigma'], ['hsigma'])
         hsigma = hsigma.ix['hsigma']
     elif hasattr(self, 'temp_hsigma'):
         hsigma = self.temp_hsigma
     else:
         print(
             'hsigma has not been accquired, if you have rv file stored instead, ingored this message.\n'
         )
         hsigma = np.nan
     self.bb_data.raw_data['hsigma'] = hsigma

예제 #30

0

파일 보기

파일: barra_base.py 프로젝트: rlcjj/mfm-platform

 def get_liq_stom(self):
     if os.path.isfile('stom.csv') and not self.is_update:
         stom = data.read_data(['stom'], ['stom'])
         stom = stom.ix['stom']
     else:
         v2s = self.bb_data.stock_price.ix['Volume'].div(
             self.bb_data.stock_price.ix['FreeShares'])
         stom = v2s.rolling(
             21, min_periods=5).apply(lambda x: np.log(np.sum(x)))
     self.bb_data.raw_data['stom'] = stom
     # 过滤数据，因为stom会影响之后stoq，stoa的计算
     self.bb_data.discard_uninv_data()