Esempio n. 1
0
 def _calc_factor_loading_proc(cls, code, calc_date, q):
     """
     用于并行计算因子载荷
     Parameters:
     --------
     :param code: str
         个股代码, 如SH600000, 600000
     :param calc_date: datetime-like, str
         计算日期, 格式: YYYY-MM-DD
     :param q: 队列, 用于进程间通信
     :return: 添加因子载荷至队列
     """
     logging.debug('[{}] Calc BLEV factor of {}.'.format(
         Utils.datetimelike_to_str(calc_date), code))
     blev_data = None
     try:
         blev_data = cls._calc_factor_loading(code, calc_date)
     except Exception as e:
         print(e)
     if blev_data is None:
         blev_data = pd.Series([Utils.code_to_symbol(code), np.nan],
                               index=['code', 'blev'])
     q.put(blev_data)
Esempio n. 2
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股EPFWD因子载荷
        Parameters:
        --------
        :param code: str
            个股代码, 如SH600000, 600000
        :param calc_date: datetime-like, str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的EPFWD因子载荷
            0. code
            1. epfwd
            如果计算失败, 返回None
        """
        code = Utils.code_to_symbol(code)
        # 读取个股的预期盈利数据
        predictedearnings_data =  Utils.get_consensus_data(calc_date, code, ConsensusType.PredictedEarings)
        if predictedearnings_data is None:
            # 如果个股的预期盈利数据不存在, 那么代替ttm净利润
            ttm_fin_data = Utils.get_ttm_fin_basic_data(code, calc_date)
            if ttm_fin_data is None:
                return None
            predictedearnings_data = pd.Series([code, ttm_fin_data['NetProfit']], index=['code', 'predicted_earnings'])
        fpredictedearnings = predictedearnings_data['predicted_earnings']
        if np.isnan(fpredictedearnings):
            return None
        # 读取个股市值
        size_path = os.path.join(factor_ct.FACTOR_DB.db_path, risk_ct.LNCAP_CT.db_file)
        size_factor_loading = Utils.read_factor_loading(size_path, Utils.datetimelike_to_str(calc_date, dash=False), code)
        if size_factor_loading.empty:
            return None
        # epfwd = 盈利预期/市值
        epfwd = fpredictedearnings * 10000.0 / np.exp(size_factor_loading['factorvalue'])

        return pd.Series([code, epfwd], index=['code', 'epfwd'])
Esempio n. 3
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股BLEV因子载荷
        Parameters:
        --------
        :param code: str
            个股代码, 如SH600000, 600000
        :param calc_date: datetime-like, str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的BLEV因子载荷
            0. code
            1. blev
            如果计算失败, 返回None
        """
        code = Utils.code_to_symbol(code)
        report_date = Utils.get_fin_report_date(calc_date)
        # 读取个股最新财务报表摘要数据
        fin_summary_data = Utils.get_fin_summary_data(code, report_date)
        if fin_summary_data is None:
            return None
        be = fin_summary_data['TotalShareholderEquity']
        if np.isnan(be):
            return None
        if abs(be) < utils_con.TINY_ABS_VALUE:
            return None
        ld = fin_summary_data['TotalNonCurrentLiabilities']
        if np.isnan(ld):
            ld = fin_summary_data['TotalLiabilities']
            if np.isnan(ld):
                return None
        pe = 0
        # blev = (be + pe + ld) / be
        blev = (be + pe + ld) / be

        return pd.Series([code, blev], index=['code', 'blev'])
Esempio n. 4
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股DASTD因子载荷
     Parameters:
     --------
     :param code: str
         个股代码, 如SH600000, 600000
     :param calc_date: datetime-like, str
         计算日期, 格式: YYYY-MM-DD
     :return: pd.Series
     --------
         个股的DASTD因子载荷
         0. code
         1. dastr
         如果计算失败, 返回None
     """
     # 取得个股复权行情数据
     df_secu_quote = Utils.get_secu_daily_mkt(code, end=calc_date, ndays=risk_ct.DASTD_CT.trailing+1, fq=True)
     if df_secu_quote is None:
         return None
     df_secu_quote.reset_index(drop=True, inplace=True)
     # 计算个股的日对数收益率序列及收益率均值
     arr_secu_close = np.array(df_secu_quote.iloc[1:]['close'])
     arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close'])
     arr_secu_daily_ret = np.log(arr_secu_close / arr_secu_preclose)
     avg_daily_ret = np.mean(arr_secu_daily_ret)
     # 计算权重(指数移动加权平均)
     T = len(arr_secu_daily_ret)
     time_spans = sorted(range(T), reverse=True)
     alpha = 1 - np.exp(np.log(0.5)/risk_ct.DASTD_CT.half_life)
     x = [1-alpha] * T
     y = [alpha] * (T-1)
     y.insert(0, 1)
     weights = np.float_power(x, time_spans) * y
     # 计算个股DASTD因子值
     dastd = np.sqrt(np.sum((arr_secu_daily_ret - avg_daily_ret) ** 2 * weights))
     return pd.Series([Utils.code_to_symbol(code), dastd], index=['code', 'dastd'])
Esempio n. 5
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股ETOP因子载荷
        Parameters:
        --------
        :param code: str
            个股代码, 如SH600000, 600000
        :param calc_date: datetime-like, str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的ETOP因子载荷
            0. code
            1. etop
            如果计算失败, 返回None
        """
        code = Utils.code_to_symbol(code)
        # 读取个股的ttm净利润
        ttm_fin_data = Utils.get_ttm_fin_basic_data(code, calc_date)
        if ttm_fin_data is None:
            return None
        ttm_netprofit = ttm_fin_data['NetProfit']
        if np.isnan(ttm_netprofit):
            return None
        # 读取个股市值
        lncap_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                  risk_ct.LNCAP_CT.db_file)
        lncap_data = Utils.read_factor_loading(
            lncap_path, Utils.datetimelike_to_str(calc_date, dash=False), code)
        if lncap_data.empty:
            return None
        secu_cap = np.exp(lncap_data['factorvalue'])
        # etop = ttm净利润/市值
        etop = ttm_netprofit * 10000 / secu_cap

        return pd.Series([code, etop], index=['code', 'etop'])
Esempio n. 6
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股的价值因子,包含ep_ttm, bp_lr, ocf_ttm
     Parameters:
     --------
     :param code: str
         个股代码:如600000或SH600000
     :param calc_date: datetime-like or str
         计算日期,格式YYYY-MM-DD, YYYYMMDD
     :return: pd.Series
     --------
         价值类因子值
         0. ep_ttm: TTM净利润/总市值
         1. bp_lr: 净资产(最新财报)/总市值
         2. ocf_ttm: TTM经营性现金流/总市值
         若计算失败,返回None
     """
     code = Utils.code_to_symbol(code)
     calc_date = Utils.to_date(calc_date)
     # 读取TTM财务数据
     ttm_fin_data = Utils.get_ttm_fin_basic_data(code, calc_date)
     if ttm_fin_data is None:
         return None
     # 读取最新财报数据
     report_date = Utils.get_fin_report_date(calc_date)
     fin_basic_data = Utils.get_fin_basic_data(code, report_date)
     if fin_basic_data is None:
         return None
     # 计算总市值
     mkt_daily = Utils.get_secu_daily_mkt(code,
                                          calc_date,
                                          fq=False,
                                          range_lookup=True)
     if mkt_daily.shape[0] == 0:
         return None
     cap_struct = Utils.get_cap_struct(code, calc_date)
     if cap_struct is None:
         return None
     total_cap = cap_struct.total - cap_struct.liquid_b - cap_struct.liquid_h
     total_mkt_cap = total_cap * mkt_daily.close
     # 计算价值类因子
     ep_ttm = ttm_fin_data[
         'NetProfit'] * util_ct.FIN_DATA_AMOUNT_UNIT / total_mkt_cap
     ocf_ttm = ttm_fin_data[
         'NetOperateCashFlow'] * util_ct.FIN_DATA_AMOUNT_UNIT / total_mkt_cap
     bp_lr = fin_basic_data[
         'ShareHolderEquity'] * util_ct.FIN_DATA_AMOUNT_UNIT / total_mkt_cap
     return Series([round(ep_ttm, 6),
                    round(bp_lr, 6),
                    round(ocf_ttm, 6)],
                   index=['ep_ttm', 'bp_lr', 'ocf_ttm'])
Esempio n. 7
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股的规模因子值
     Parameters:
     --------
     :param code: str
         个股代码,如600000、SH600000
     :param calc_date: datetime-like, str
         规模因子计算日期,格式YYYY-MM-DD或YYYYMMDD
     :return: pd.Series
     --------
         个股规模因子值,各个index对应的含义如下:
         0. LnTotalMktCap: 总市值对数
         1. LnLiquidMktCap: 流通市值对数
         若计算失败,返回None
     """
     # 取得证券截止指定日期最新的非复权行情数据
     code = Utils.code_to_symbol(code)
     calc_date = Utils.to_date(calc_date)
     mkt_daily = Utils.get_secu_daily_mkt(code,
                                          calc_date,
                                          fq=False,
                                          range_lookup=True)
     if mkt_daily.shape[0] == 0:
         return None
     # 取得证券截止指定日期前最新的股本结构数据
     cap_struct = Utils.get_cap_struct(code, calc_date)
     if cap_struct is None:
         return None
     # 计算证券的规模因子
     scale_factor = Series()
     total_cap = cap_struct.total - cap_struct.liquid_b - cap_struct.liquid_h
     scale_factor['LnTotalMktCap'] = math.log(total_cap * mkt_daily.close)
     scale_factor['LnLiquidMktCap'] = math.log(cap_struct.liquid_a *
                                               mkt_daily.close)
     return scale_factor
Esempio n. 8
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股的动量因子,包含短期动量和长期动量因子
     Parameters:
     --------
     :param code: str
         个股代码,如SH600000或600000
     :param calc_date: datetime-like or str
         因子载荷计算日期,格式YYYY-MM-DD, YYYYMMDD
     :return: pd.Series
     --------
         传统动量因子值,分为短期和长期动量
         0. short_term_0: 短期动量0
         1. short_term_1: 短期动量1
         2. long_term_0: 长期动量0
         3. long_term_1: 长期动量1
         若计算失败,返回None
     """
     short_terms = [
         int(x) for x in factor_ct.MOMENTUM_CT.short_term_days.split('|')
     ]  # 短期动量的交易日天数list
     long_terms = [
         int(x) for x in factor_ct.MOMENTUM_CT.long_term_days.split('|')
     ]  # 长期动量的交易日天数list
     momentum_terms = short_terms + long_terms
     # 构造momentum_lable
     momentum_label = []
     for days in short_terms:
         momentum_label.append('short_term_%d' % days)
     for days in long_terms:
         momentum_label.append('long_term_%d' % days)
     # 计算动量
     momentum_value = []
     for days in momentum_terms:
         ret = Utils.calc_interval_ret(code, end=calc_date, ndays=days)
         if ret is None:
             if len(momentum_value) == 0:
                 return None  # 如果最短期的动量计算失败,那么返回None
             else:
                 ret = momentum_value[-1]
         momentum_value.append(round(ret, 6))
     momentum = Series(momentum_value, index=momentum_label)
     return momentum
def _check_dlisted_indclassify():
    """检查退市股票行业代码分类"""
    # 读取退市股票行业分类数据
    cfg = ConfigParser()
    cfg.read('config.ini')
    delisted_data_path = os.path.join(
        cfg.get('factor_db', 'db_path'),
        cfg.get('industry_classify', 'classify_data_path'),
        'delisted_classify_sw.csv')
    df_delisted_indclassify = pd.read_csv(delisted_data_path, header=0)
    # 读取已退市个股基本信息数据
    df_stock_basics = Utils.get_stock_basics(all=True)
    df_delisted_basics = df_stock_basics[df_stock_basics['status'] == 3]
    # 检查退市股票行业分类数据中是否已包含所有的已退市股票
    df_delisted_basics = df_delisted_basics[~df_delisted_basics['symbol'].isin(
        df_delisted_indclassify['id'].tolist())]
    if ~df_delisted_basics.empty:
        print('\033[1;31;40m个股{}已退市, 需加入退市股票行业分类数据中.\033[0m'.format(
            str(df_delisted_basics['symbol'].tolist())))
Esempio n. 10
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股的BTOP因子载荷
     Paramters:
     --------
     :param code: str
         个股代码, 如SH600000, 600000
     :param calc_date: datetime-like, str
         计算日期, 格式: YYYY-MM-DD
     :return: pd.Series
     --------
         个股的BTOP因子载荷
         0. code
         1. btop
         如果计算失败, 返回None
     """
     # 读取个股的财务数据
     fin_report_date = Utils.get_fin_report_date(calc_date)
     fin_basic_data = Utils.get_fin_basic_data(code, fin_report_date)
     if fin_basic_data is None:
         return None
     # 读取个股的市值因子(LNCAP)
     df_lncap = cls._LNCAP_Cache.get(
         Utils.datetimelike_to_str(calc_date, dash=False))
     if df_lncap is None:
         lncap_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                   risk_ct.LNCAP_CT.db_file)
         df_lncap = Utils.read_factor_loading(
             lncap_path, Utils.datetimelike_to_str(calc_date, dash=False))
         cls._LNCAP_Cache.set(
             Utils.datetimelike_to_str(calc_date, dash=False), df_lncap)
     secu_lncap = df_lncap[df_lncap['id'] == Utils.code_to_symbol(code)]
     if secu_lncap.empty:
         return None
     flncap = secu_lncap.iloc[0]['factorvalue']
     # 账面市值比=净资产/市值
     btop = (fin_basic_data['TotalAsset'] -
             fin_basic_data['TotalLiability']) * 10000 / np.exp(flncap)
     return pd.Series([Utils.code_to_symbol(code), btop],
                      index=['code', 'btop'])
Esempio n. 11
0
 def _calc_factor_loading_proc(cls, code, calc_date, q):
     """
     用于并行计算因子治安和
     Parameters:
     --------
     :param code: str
         个股代码, 如SH600000, 600000
     :param calc_date: datetime-like, str
         计算日期, 格式: YYYY-MM-DD
     :param q: 队列, 用于进程间通信
     :return: 添加因子载荷至队列
     """
     logging.info('[{}] Calc BTOP factor of {}.'.format(Utils.datetimelike_to_str(calc_date), code))
     btop_data = None
     try:
         btop_data = cls._calc_factor_loading(code, calc_date)
     except Exception as e:
         print(e)
     if btop_data is not None:
         q.put(btop_data)
Esempio n. 12
0
 def _calc_factor_loading_proc(cls, code, calc_date, q):
     """
     用于并行计算因子载荷
     Parameters:
     --------
     :param code: str
         个股代码,如600000或SH600000
     :param calc_date: datetime-like, str
         计算日期,格式:YYYY-MM-DD or YYYYMMDD
     :param q: 队列,用于进程间通信
     :return: 添加因子载荷至队列中
     """
     logging.info('[%s] Calc Growth factor of %s.' % (Utils.datetimelike_to_str(calc_date), code))
     growth = None
     try:
         growth = cls._calc_factor_loading(code, calc_date)
     except Exception as e:
         print(e)
     if growth is not None:
         q.put(growth)
Esempio n. 13
0
    def _get_factor_weight(cls, date=None):
        """
        取得日内各时点动量因子的权重
        --------
        :param date: datetime-like or str
            日期, 默认为None
            如果date=None, 返回全部权重数据
        :return: pd.Series, pd.DataFrame
            各时点权重信息
        --------
            0. date: 日期
            1. w0: 第一个时点动量因子的权重
            2. w1: 第二个时点动量因子的权重
            3. w2: 第三个时点动量因子的权重
            4. w3: 第四个时点动量因子的权重
            5. w4: 第五个时点动量因子的权重
            读取不到数据,返回None
        """

        weight_file_path = os.path.join(SETTINGS.FACTOR_DB_PATH, alphafactor_ct.INTRADAYMOMENTUM_CT.optimal_weight_file)
        if not os.path.isfile(weight_file_path):
            return None
        df_optimal_weight = pd.read_csv(weight_file_path, parse_dates=[0], header=0)
        df_optimal_weight.sort_values(by='date', inplace=True)

        if date is None:
            if df_optimal_weight.empty:
                return None
            else:
                return df_optimal_weight
        else:
            date = Utils.to_date(date)
            df_weight = df_optimal_weight[df_optimal_weight.date <= date]
            if df_weight.shape[0] > 0:
                return df_weight.iloc[-1]
            else:
                df_weight = df_optimal_weight[df_optimal_weight.date >= date]
                if df_weight.shape[0] > 0:
                    return df_weight.iloc[0]
                else:
                    return None
Esempio n. 14
0
 def _calc_factor_loading_proc(cls, code, calc_date, q):
     """
     用于并行计算因子载荷
     Parameters
     --------
     :param code: str
         个股代码,如600000或SH600000
     :param calc_date: datetime-like or str
         计算日期
     :param q: 队列,用于进程间通信
     :return: 添加因子载荷至队列q中
     """
     logging.info('[%s] Calc SmartQ of %s.' %
                  (calc_date.strftime('%Y-%m-%d'), code))
     smart_q = None
     try:
         smart_q = cls._calc_factor_loading(code, calc_date)
     except Exception as e:
         print(e)
     if smart_q is not None:
         q.put((Utils.code_to_symbol(code), smart_q))
Esempio n. 15
0
 def _calc_factor_loading_proc(cls, code, calc_date, q):
     """
     用于并行计算因子载荷
     Parameters:
     --------
     :param code: str
         个股代码, 如SH600000, 600000
     :param calc_date: datetime-like, str
         计算日期, 格式: YYYY-MM-DD
     :param q: 队列, 用于进程间通信
     :return: 添加因子载荷至队列中
     """
     logging.info('[%s] Calc RSTR factor of %s.' %
                  (Utils.datetimelike_to_str(calc_date), code))
     rstr_data = None
     try:
         rstr_data = cls._calc_factor_loading(code, calc_date)
     except Exception as e:
         print(e)
     if rstr_data is not None:
         q.put(rstr_data)
Esempio n. 16
0
def load_st_info():
    """导入个股st带帽摘帽时间信息"""
    cfg = ConfigParser()
    cfg.read('config.ini')
    factor_db_path = cfg.get('factor_db', 'db_path')
    raw_data_path = cfg.get('st_info', 'raw_data_path')
    st_info_path = cfg.get('st_info', 'st_info_path')
    st_start_types = cfg.get('st_info', 'st_start_types').split(',')
    st_end_types = cfg.get('st_info', 'st_end_types').split(',')

    if not os.path.isfile(os.path.join(raw_data_path, 'st_info.csv')):
        print('\033[1;31;40mst_info.csv原始文件不存在.\033[0m')
        return
    df_st_rawinfo = pd.read_csv(os.path.join(raw_data_path, 'st_info.csv'), header=0)
    df_st_rawinfo = df_st_rawinfo[(df_st_rawinfo['st_info'] != '0') & (~df_st_rawinfo['st_info'].isna())]
    df_st_info = pd.DataFrame(columns=['code', 'st_start', 'st_end'])
    for _, st_data in df_st_rawinfo.iterrows():
        st_start_date = None
        st_end_date = None

        code = Utils.code_to_symbol(st_data['code'])
        st_info_list = st_data['st_info'].split(',')
        st_info_list = st_info_list[::-1]
        for st_info in st_info_list:
            if ':' in st_info:
                st_type = st_info.split(':')[0]
                st_date = st_info.split(':')[1]
                if not (st_type in st_start_types or st_type in st_end_types):
                    print('st type: {} is not counted.'.format(st_type))
                    continue
                if st_type in st_start_types and st_start_date is None:
                    st_start_date = st_date
                elif st_type in st_end_types and st_start_date is not None:
                    st_end_date = st_date
                    df_st_info = df_st_info.append(pd.Series([code, st_start_date, st_end_date], index=['code', 'st_start', 'st_end']), ignore_index=True)
                    st_start_date = None
                    st_end_date = None
        if st_start_date is not None and st_end_date is None:
            df_st_info = df_st_info.append(pd.Series([code, st_start_date, '20301231'], index=['code', 'st_start', 'st_end']), ignore_index=True)
    df_st_info.to_csv(os.path.join(factor_db_path, st_info_path, 'st_info.csv'), index=False)
Esempio n. 17
0
    def _calc_periodmomentum_ic(cls, calc_date, date_interval_type='month'):
        """
        计算日内各时段动量因子的Rank IC值向量
        Parameters:
        --------
        :param calc_date: datetime-like, str
            计算日期, e.g: YYYY-MM-DD, YYYYMMDD
        :param date_interval_type: str
            个股收益率计算的时间长度, 'month'=月度收益, 'day'=日收益
        :return: pd.Series
        --------
            IC值向量
            0. date, 日期
            1. IC0, 隔夜时段动量因子IC
            2. IC1, 第1小时动量因子IC
            3. IC2, 第2小时动量因子IC
            4. IC3, 第3小时动量因子IC
            5. IC4, 第4小时动量因子IC
        """
        # 读取日内各时段动量因子载荷数据
        df_period_mom = cls._get_factor_loading(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False),
                                                factor_name='periodmomentum', factor_type='raw', drop_na=True)
        if df_period_mom.empty:
            return None

        if date_interval_type == 'month':
            # 读取个股下个月的月度收益率数据
            ret_start, ret_end = Utils.next_month(calc_date)
        elif date_interval_type == 'day':
            ret_start = ret_end = Utils.get_trading_days(start=calc_date, ndays=2)[1]

        df_period_mom['ret'] = np.nan
        for idx, factorloading_data in df_period_mom.iterrows():
            fret = Utils.calc_interval_ret(factorloading_data['id'], start=ret_start, end=ret_end)
            if fret is not None:
                df_period_mom.loc[idx, 'ret'] = fret
        df_period_mom.dropna(inplace=True)
        # 计算Rank IC值
        df_period_mom.drop(columns=['date', 'id', 'm_normal'], inplace=True)
        df_spearman_corr = df_period_mom.corr(method='spearman')
        rank_IC = df_spearman_corr.loc['ret', ['m0', 'm1', 'm2', 'm3', 'm4']]
        rank_IC['date'] = calc_date
        # 保存Rank IC值
        ic_filepath = os.path.join(SETTINGS.FACTOR_DB_PATH, alphafactor_ct.INTRADAYMOMENTUM_CT['factor_ic_file'])
        Utils.save_timeseries_data(rank_IC, ic_filepath, save_type='a', columns=['date', 'm0', 'm1', 'm2', 'm3', 'm4'])

        return rank_IC
Esempio n. 18
0
 def _get_factor_loading(cls,
                         db_file,
                         str_key,
                         factor_name=None,
                         factor_type=None,
                         **kwargs):
     """
     读取因子载荷数据
     Parameters:
     --------
     :param db_file: str
         因子载荷数据文件路径(绝对路径)
     :param str_key: str
         键值, 一般为日期, e.g: YYYY-MM-DD, YYYYMMDD
     :param factor_name: str, 默认为None
         因子名称
     :param factor_type: str, 默认为None
         因子类型, e.g: 'raw', 'standardized', 'orthogonalized'
     :param kwargs:
         kwargs['code']: str, 默认为None; 个股代码, e.g: SH600000, 600000
         kwargs['nan_value']: object, 默认为None; 如果不为None, 那么缺失值用nan_value替换
         kwargs['drop_na']: bool, 默认False; 是否删除含有NaN值的行
     :return: pd.DataFrame or pd.Series, 因子载荷
     --------
         pd.DataFrame(code==None) or pd.Series(code!=None)
         0. date
         1. id
         2. factorvalue
     """
     if factor_type is not None:
         db_file = os.path.join(db_file, factor_type, factor_name)
     if 'code' not in kwargs:
         kwargs['code'] = None
     if 'na_value' not in kwargs:
         kwargs['na_value'] = None
     if 'drop_na' not in kwargs:
         kwargs['drop_na'] = False
     return Utils.read_factor_loading(db_file, str_key, kwargs['code'],
                                      kwargs['na_value'], kwargs['drop_na'])
Esempio n. 19
0
def load_cap_struct(date):
    """导入个股最新股本结构数据"""
    date = Utils.datetimelike_to_str(date, dash=False)
    cfg = ConfigParser()
    cfg.read('config.ini')
    raw_data_path = cfg.get('cap_struct', 'raw_data_path')
    db_path = cfg.get('cap_struct', 'db_path')
    if not os.path.isfile(os.path.join(raw_data_path, '{}.csv'.format(date))):
        print('\033[1;31;40mCap struct file of %s does not exits.\033[0m' %
              date)
        return
    df_cap_struct = pd.read_csv(os.path.join(raw_data_path,
                                             '{}.csv'.format(date)),
                                names=[
                                    'mkt', 'code', 'date', 'reason', 'total',
                                    'liquid_a', 'liquid_b', 'liquid_h'
                                ],
                                header=0,
                                encoding='GB18030',
                                dtype={'code': str})
    df_cap_struct.code = df_cap_struct.apply(lambda x: x.mkt + x.code, axis=1)
    del df_cap_struct['mkt']
    # 先保存全部股本结构数据为一个文件
    df_cap_struct.to_csv(
        os.path.join(db_path, 'cap_struct.csv'),
        index=False,
        header=['代码', '变更日期', '变更原因', '总股本', '流通A股', '流通B股', '流通H股'])
    # 然后每个个股分别保存一个股本结构数据文件
    codes = df_cap_struct.code.unique()
    for code in codes:
        # print('processing capital structure data of %s.' % code)
        df_single_cap_struct = df_cap_struct[df_cap_struct.code == code]
        df_single_cap_struct.to_csv(
            os.path.join(db_path, code + '.csv'),
            index=False,
            header=['代码', '变更日期', '变更原因', '总股本', '流通A股', '流通B股', '流通H股'])
Esempio n. 20
0
def smartq_backtest(start, end):
    """
    SmartQ因子的历史回测
    Parameters:
    --------
    :param start: datetime-like, str
        回测开始日期,格式:YYYY-MM-DD,开始日期应该为月初
    :param end: datetime-like, str
        回测结束日期,格式:YYYY-MM-DD
    :return:
    """
    # 取得开始结束日期间的交易日序列
    trading_days = Utils.get_trading_days(start, end)
    # 读取截止开始日期前最新的组合回测数据
    prev_trading_day = Utils.get_prev_n_day(trading_days.iloc[0], 1)
    backtest_path = os.path.join(SETTINGS.FACTOR_DB_PATH,
                                 alphafactor_ct.SMARTMONEY_CT.backtest_path)
    factor_data, port_nav = Utils.get_backtest_data(backtest_path,
                                                    trading_days.iloc[0])
    # factor_data = None  # 记录每次调仓时最新入选个股的SmartQ因子信息,pd.DataFrame<date,factorvalue,id,buprice>
    if port_nav is None:
        port_nav = DataFrame({
            'date': [prev_trading_day.strftime('%Y-%m-%d')],
            'nav': [1.0]
        })
    # 遍历交易日,如果是月初,则读取SmartQ因子载荷值,进行调仓;如果不是月初,则进行组合估值
    t = 0  # 记录调仓次数
    for trading_day in trading_days:
        if factor_data is None:
            nav = port_nav[port_nav.date == prev_trading_day.strftime(
                '%Y-%m-%d')].iloc[0].nav
        else:
            nav = port_nav[port_nav.date ==
                           factor_data.iloc[0].date].iloc[0].nav
        interval_ret = 0.0
        # 月初进行调仓
        if Utils.is_month_start(trading_day):
            logging.info('[%s] 月初调仓.' %
                         Utils.datetimelike_to_str(trading_day, True))
            # 调仓前,先计算组合按均价卖出原先组合个股在当天的估值
            if factor_data is not None:
                for ind, factor_info in factor_data.iterrows():
                    daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                         trading_day,
                                                         fq=True,
                                                         range_lookup=True)
                    if daily_mkt.date == trading_day.strftime('%Y-%m-%d'):
                        vwap_price = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor
                    else:
                        vwap_price = daily_mkt.close
                    interval_ret += vwap_price / factor_info.buyprice - 1.0
                interval_ret /= float(len(factor_data))
                nav *= (1.0 + interval_ret)
            # 读取factor_data
            factor_data = Utils.read_factor_loading(
                SmartMoney.get_db_file(),
                Utils.datetimelike_to_str(prev_trading_day, False))
            # 遍历factor_data, 计算每个个股过去20天的涨跌幅,并剔除在调仓日没有正常交易(如停牌)及涨停的个股
            ind_to_be_deleted = []
            factor_data['ret20'] = np.zeros(len(factor_data))
            for ind, factor_info in factor_data.iterrows():
                trading_status = Utils.trading_status(factor_info.id,
                                                      trading_day)
                if trading_status == SecuTradingStatus.Suspend or trading_status == SecuTradingStatus.LimitUp:
                    ind_to_be_deleted.append(ind)
                fret20 = Utils.calc_interval_ret(factor_info.id,
                                                 end=prev_trading_day,
                                                 ndays=20)
                if fret20 is None:
                    if ind not in ind_to_be_deleted:
                        ind_to_be_deleted.append(ind)
                else:
                    factor_data.loc[ind, 'ret20'] = fret20
            factor_data = factor_data.drop(ind_to_be_deleted, axis=0)
            # 对factor_data过去20天涨跌幅降序排列,剔除涨幅最大的20%个股
            k = int(factor_data.shape[0] * 0.2)
            factor_data = factor_data.sort_values(by='ret20',
                                                  ascending=False).iloc[k:]
            del factor_data['ret20']  # 删除ret20列
            # 对factor_data按因子值升序排列,取前10%个股
            factor_data = factor_data.sort_values(by='factorvalue',
                                                  ascending=True)
            k = int(factor_data.shape[0] * 0.1)
            factor_data = factor_data.iloc[:k]
            # 遍历factor_data,添加买入价格,并估值计算当天调仓后的组合收益
            factor_data['buyprice'] = 0.0
            interval_ret = 0.0
            for ind, factor_info in factor_data.iterrows():
                daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                     trading_day,
                                                     fq=True,
                                                     range_lookup=False)
                assert len(daily_mkt) > 0
                factor_data.loc[
                    ind,
                    'buyprice'] = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor
                interval_ret += daily_mkt.close / factor_data.loc[
                    ind, 'buyprice'] - 1.0
            interval_ret /= float(factor_data.shape[0])
            nav *= (1.0 + interval_ret)
            # 保存factor_data
            port_data_path = os.path.join(
                SETTINGS.FACTOR_DB_PATH,
                alphafactor_ct.SMARTMONEY_CT.backtest_path,
                'port_data_%s.csv' %
                Utils.datetimelike_to_str(trading_day, False))
            factor_data.to_csv(port_data_path, index=False)
            t += 1
            if t % 6 == 0:
                logging.info('Suspended for 300s.')
                time.sleep(300)
        else:
            # 非调仓日,对组合进行估值
            logging.info('[%s] 月中估值.' %
                         Utils.datetimelike_to_str(trading_day, True))
            if factor_data is not None:
                for ind, factor_info in factor_data.iterrows():
                    daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                         trading_day,
                                                         fq=True,
                                                         range_lookup=True)
                    interval_ret += daily_mkt.close / factor_info.buyprice - 1.0
                interval_ret /= float(factor_data.shape[0])
                nav *= (1.0 + interval_ret)
        # 添加nav
        port_nav = port_nav.append(Series({
            'date':
            Utils.datetimelike_to_str(trading_day, True),
            'nav':
            nav
        }),
                                   ignore_index=True)
        # 设置prev_trading_day
        prev_trading_day = trading_day
    # 保存port_nav
    port_nav_path = os.path.join(SETTINGS.FACTOR_DB_PATH,
                                 alphafactor_ct.SMARTMONEY_CT.backtest_path,
                                 'port_nav.csv')
    port_nav.to_csv(port_nav_path, index=False)
Esempio n. 21
0
    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本个股的因子载荷,并保存至因子数据库
        Parameters
        --------
        :param start_date: datetime-like, str
            开始日期
        :param end_date: datetime-like, str,默认None
            结束日期,如果为None,则只计算start_date日期的因子载荷
        :param month_end: bool,默认True
            只计算月末时点的因子载荷
        :param save: 是否保存至因子数据库,默认为False
        :param kwargs:
            'multi_proc': bool, True=采用多进程并行计算, False=采用单进程计算, 默认为False
        :return: 因子载荷,DataFrame
        --------
            因子载荷,DataFrame
            0. date, 日期, 为计算日期的下一个交易日
            1: id, 证券代码
            2: factorvalue, 因子载荷
            如果end_date=None,返回start_date对应的因子载荷数据
            如果end_date!=None,返回最后一天的对应的因子载荷数据
            如果没有计算数据,返回None
        """
        # 0.取得交易日序列
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        # 取得样本个股信息
        # all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列,计算SMartQ因子载荷
        dict_factor = None
        for calc_date in trading_days_series:
            dict_factor = {'id': [], 'factorvalue': []}
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            # 1.获取用于读取分钟行情的交易日列表(过去30天的交易日列表,降序排列)
            # trading_days = _get_trading_days(calc_date, 30)
            # trading_days = Utils.get_trading_days(end=calc_date, ndays=30, ascending=False)
            # 2.取得样本个股信息
            # stock_basics = ts.get_stock_basics()
            s = (calc_date - datetime.timedelta(days=90)).strftime('%Y%m%d')
            stock_basics = Utils.get_stock_basics(s)
            # 3.遍历样本个股代码,计算Smart_Q因子载荷值
            dict_factor = {'date': None, 'id': [], 'factorvalue': []}

            if 'multi_proc' not in kwargs:
                kwargs['multi_proc'] = False
            if not kwargs['multi_proc']:
                # 采用单进程进行计算
                for _, stock_info in stock_basics.iterrows():
                    # code = '%s%s' % ('SH' if code[:2] == '60' else 'SZ', code)
                    factor_loading = cls._calc_factor_loading(
                        stock_info.symbol, calc_date)
                    print(
                        "[%s]Calculating %s's SmartMoney factor loading = %.4f."
                        % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol,
                           -1.0 if factor_loading is None else factor_loading))
                    if factor_loading is not None:
                        # df_factor.ix[code, 'factorvalue'] = factor_loading
                        dict_factor['id'].append(
                            Utils.code_to_symbol(stock_info.symbol))
                        dict_factor['factorvalue'].append(factor_loading)
            else:
                # 采用多进程并行计算SmartQ因子载荷
                q = Manager().Queue()  # 队列,用于进程间通信,存储每个进程计算的因子载荷值
                p = Pool(4)  # 进程池,最多同时开启4个进程
                for _, stock_info in stock_basics.iterrows():
                    p.apply_async(cls._calc_factor_loading_proc,
                                  args=(
                                      stock_info.symbol,
                                      calc_date,
                                      q,
                                  ))
                p.close()
                p.join()
                while not q.empty():
                    smart_q = q.get(True)
                    dict_factor['id'].append(smart_q[0])
                    dict_factor['factorvalue'].append(smart_q[1])

            date_label = Utils.get_trading_days(calc_date, ndays=2)[1]
            dict_factor['date'] = [date_label] * len(dict_factor['id'])
            # 4.计算去极值标准化后的因子载荷
            df_std_factor = Utils.normalize_data(pd.DataFrame(dict_factor),
                                                 columns='factorvalue',
                                                 treat_outlier=True,
                                                 weight='eq')
            # 5.保存因子载荷至因子数据库
            if save:
                # Utils.factor_loading_persistent(cls._db_file, calc_date.strftime('%Y%m%d'), dict_factor)
                cls._save_factor_loading(cls._db_file,
                                         Utils.datetimelike_to_str(calc_date,
                                                                   dash=False),
                                         dict_factor,
                                         'SmartMoney',
                                         factor_type='raw',
                                         columns=['date', 'id', 'factorvalue'])
                cls._save_factor_loading(cls._db_file,
                                         Utils.datetimelike_to_str(calc_date,
                                                                   dash=False),
                                         df_std_factor,
                                         'SmartMoney',
                                         factor_type='standardized',
                                         columns=['date', 'id', 'factorvalue'])
            # 休息300秒
            logging.info('Suspending for 360s.')
            time.sleep(360)
        return dict_factor
Esempio n. 22
0
    def calc_factor_loading(cls, start_date, end_date=None, month_end=True, save=False, **kwargs):
        """
        计算指定日期的样本个股的因子载荷, 并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like, str
            开始日期, 格式: YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD
        :param month_end: bool, 默认为True
            如果为True, 则只计算月末时点的因子载荷
        :param save: bool, 默认为True
            是否保存至因子数据库
        :param kwargs:
            'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False
        :return: dict
            因子载荷
        """
        # 取得交易日序列及股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date, end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date, ndays=1)
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列, 计算DASTD因子载荷
        dict_dastd = None
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            logging.info('[%s] Calc DASTD factor loading.' % Utils.datetimelike_to_str(calc_date))
            # 遍历个股, 计算个股的DASTD因子值
            s = (calc_date - datetime.timedelta(days=risk_ct.DASTD_CT.listed_days)).strftime('%Y%m%d')
            stock_basics = all_stock_basics[all_stock_basics.list_date < s]
            ids = []        # 个股代码list
            dastds = []     # DASTD因子值list

            if 'multi_proc' not in kwargs:
                kwargs['multi_proc'] = False
            if not kwargs['multi_proc']:
                # 采用单进程计算DASTD因子值
                for _, stock_info in stock_basics.iterrows():
                    logging.info("[%s] Calc %s's DASTD factor loading." % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol))
                    dastd_data = cls._calc_factor_loading(stock_info.symbol, calc_date)
                    if dastd_data is None:
                        ids.append(Utils.code_to_symbol(stock_info.symbol))
                        dastds.append(np.nan)
                    else:
                        ids.append(dastd_data['code'])
                        dastds.append(dastd_data['dastd'])
            else:
                # 采用多进程并行计算DASTD因子值
                q = Manager().Queue()   # 队列, 用于进程间通信, 存储每个进程计算的因子载荷
                p = Pool(4)             # 进程池, 最多同时开启4个进程
                for _, stock_info in stock_basics.iterrows():
                    p.apply_async(cls._calc_factor_loading_proc, args=(stock_info.symbol, calc_date, q,))
                p.close()
                p.join()
                while not q.empty():
                    dastd_data = q.get(True)
                    ids.append(dastd_data['code'])
                    dastds.append(dastd_data['dastd'])

            date_label = Utils.get_trading_days(start=calc_date, ndays=2)[1]
            dict_dastd = {'date': [date_label]*len(ids), 'id': ids, 'factorvalue': dastds}
            if save:
                Utils.factor_loading_persistent(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), dict_dastd, ['date', 'id', 'factorvalue'])
            # 暂停180秒
            logging.info('Suspending for 180s.')
            # time.sleep(180)
        return dict_dastd
Esempio n. 23
0
 def calc_factor_loading_(cls, start_date, end_date=None, month_end=True, save=False, **kwargs):
     """
     计算指定日期的样本个股的因子载荷, 并保存至因子数据库
     Parameters:
     --------
     :param start_date: datetime-like, str
         开始日期, 格式: YYYY-MM-DD or YYYYMMDD
     :param end_date: datetime-like, str
         结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD
     :param month_end: bool, 默认为True
         如果为True, 则只计算月末时点的因子载荷
     :param save: bool, 默认为True
         是否保存至因子数据库
     :param kwargs:
         'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False
     :return: dict
         因子载荷数据
     """
     # 取得交易日序列
     start_date = Utils.to_date(start_date)
     if end_date is not None:
         end_date = Utils.to_date(end_date)
         trading_days_series = Utils.get_trading_days(start=start_date, end=end_date)
     else:
         trading_days_series = Utils.get_trading_days(end=start_date, ndays=1)
     # 遍历交易日序列, 计算ResVolatility因子下各个成分因子的因子载荷
     if 'multi_proc' not in kwargs:
         kwargs['multi_proc'] = False
     for calc_date in trading_days_series:
         if month_end and (not Utils.is_month_end(calc_date)):
             continue
         # 计算各成分因子的因子载荷
         for com_factor in risk_ct.RESVOLATILITY_CT.component:
             factor = eval(com_factor + '()')
             factor.calc_factor_loading(start_date=calc_date, end_date=None, month_end=month_end, save=save, multi_proc=kwargs['multi_proc'])
         # 合成ResVolatility因子载荷
         resvol_factor = pd.DataFrame()
         for com_factor in risk_ct.RESVOLATILITY_CT.component:
             factor_path = os.path.join(factor_ct.FACTOR_DB.db_path, eval('risk_ct.' + com_factor + '_CT')['db_file'])
             factor_loading = Utils.read_factor_loading(factor_path, Utils.datetimelike_to_str(calc_date, dash=False))
             factor_loading.drop(columns='date', inplace=True)
             factor_loading[com_factor] = Utils.normalize_data(Utils.clean_extreme_value(np.array(factor_loading['factorvalue']).reshape((len(factor_loading), 1))))
             factor_loading.drop(columns='factorvalue', inplace=True)
             if resvol_factor.empty:
                 resvol_factor = factor_loading
             else:
                 resvol_factor = pd.merge(left=resvol_factor, right=factor_loading, how='inner', on='id')
         resvol_factor.set_index('id', inplace=True)
         weight = pd.Series(risk_ct.RESVOLATILITY_CT.weight)
         resvol_factor = (resvol_factor * weight).sum(axis=1)
         resvol_factor.name = 'factorvalue'
         resvol_factor.index.name = 'id'
         resvol_factor = pd.DataFrame(resvol_factor)
         resvol_factor.reset_index(inplace=True)
         resvol_factor['date'] = Utils.get_trading_days(start=calc_date, ndays=2)[1]
         # 保存ResVolatility因子载荷
         if save:
             Utils.factor_loading_persistent(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), resvol_factor.to_dict('list'),['date', 'id', 'factorvalue'])
Esempio n. 24
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        Parameter:
        --------
        :param code: str
            个股代码, 如SH600000, 600000
        :param calc_date: datetime-like, str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的CMRA因子载荷
            0. code
            1. cmra
            如果计算失败, 返回None
        """
        # 取得个股日复权行情数据
        # df_secu_quote = Utils.get_secu_daily_mkt(code, end=calc_date,ndays=risk_ct.CMRA_CT.trailing*risk_ct.CMRA_CT.days_scale+1, fq=True)
        # if df_secu_quote is None:
        #     return None
        # if len(df_secu_quote) < risk_ct.CMRA_CT.listed_days:
        #     return None
        # df_secu_quote.reset_index(drop=True, inplace=True)
        # 计算个股的日对数收益率序列
        # arr_secu_close = np.array(df_secu_quote.iloc[1:]['close'])
        # arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close'])
        # arr_secu_daily_ret = np.log(arr_secu_close / arr_secu_preclose)
        # 每个月计算累积收益率
        # z = []
        # for t in range(1, risk_ct.CMRA_CT.trailing+1):
        #     k = t * risk_ct.CMRA_CT.days_scale - 1
        #     if k > len(arr_secu_daily_ret) - 1:
        #         k = len(arr_secu_daily_ret) - 1
        #         z.append(np.sum(arr_secu_daily_ret[:k]))
        #         break
        #     else:
        #         z.append(np.sum(arr_secu_daily_ret[:k]))

        # 计算每个月的个股价格变化率(1+r)
        # z = []
        # for t in range(1, risk_ct.CMRA_CT.trailing+1):
        #     k = t * risk_ct.CMRA_CT.days_scale
        #     if k > len(df_secu_quote)-1:
        #         k = len(df_secu_quote)-1
        #         z.append(df_secu_quote.iloc[k]['close']/df_secu_quote.iloc[0]['close'])
        #         break
        #     else:
        #         z.append(df_secu_quote.iloc[k]['close']/df_secu_quote.iloc[0]['close'])
        # cmra = np.log(max(z)) - np.log(min(z))



        # 取得交易日序列
        trading_days = Utils.get_trading_days(end=calc_date, ndays=risk_ct.CMRA_CT.trailing*risk_ct.CMRA_CT.days_scale+1)
        trading_days = [day.strftime('%Y-%m-%d') for day in trading_days]
        # 取得个股复权行情数据
        df_secu_quote = Utils.get_secu_daily_mkt(code, end=calc_date, fq=True)
        # 提取相应交易日的个股复权行情数据
        df_secu_quote = df_secu_quote[df_secu_quote['date'].isin(trading_days)]
        df_secu_quote.reset_index(drop=True, inplace=True)
        # 计算个股每个月的个股价格变化率
        z = []
        if len(df_secu_quote) < int(risk_ct.CMRA_CT.trailing*risk_ct.CMRA_CT.days_scale/2):
            # 如果提取的个股复权行情长度小于所需时间长度的一半(126个交易日), 返回None
            return None
        else:
            prev_trading_day = df_secu_quote.iloc[0]['date']
            for t in range(1, risk_ct.CMRA_CT.trailing+1):
                k = t * risk_ct.CMRA_CT.days_scale
                trading_day = trading_days[k]
                if trading_day < df_secu_quote.iloc[0]['date']:
                    continue
                # try:
                secu_trading_day = df_secu_quote[df_secu_quote['date'] <= trading_day].iloc[-1]['date']
                if secu_trading_day <= prev_trading_day:
                    continue
                else:
                    ret = df_secu_quote[df_secu_quote['date']==secu_trading_day].iloc[0]['close']/df_secu_quote.iloc[0]['close']
                    z.append(ret)
                    prev_trading_day = secu_trading_day
                # except Exception as e:
                #     print(e)
            cmra = math.log(max(z)) - math.log(min(z))
        return pd.Series([Utils.code_to_symbol(code), cmra], index=['code', 'cmra'])
Esempio n. 25
0
    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本个股的因子载荷, 并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like, str
            开始日期, 格式: YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD
        :param month_end: bool, 默认为True
            如果为True, 则只计算月末时点的因子载荷
        :param save: bool, 默认为True
            是否保存至因子数据库
        :param kwargs:
            'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False
        :return: dict
            因子载荷数据
        """
        # 取得交易日序列及股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列, 计算LIQUIDITY因子载荷
        dict_raw_liquidity = None
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            dict_stom = None
            dict_stoq = None
            dict_stoa = None
            dict_raw_liquidity = None
            logging.info('[%s] Calc LIQUIDITY factor loading.' %
                         Utils.datetimelike_to_str(calc_date))
            # 遍历个股,计算个股LIQUIDITY因子值
            s = (calc_date - datetime.timedelta(
                days=risk_ct.LIQUID_CT.listed_days)).strftime('%Y%m%d')
            stock_basics = all_stock_basics[all_stock_basics.list_date < s]
            ids = []
            stoms = []
            stoqs = []
            stoas = []
            raw_liquidities = []

            if 'multi_proc' not in kwargs:
                kwargs['multi_proc'] = False
            if not kwargs['multi_proc']:
                # 采用单进程计算LIQUIDITY因子值
                for _, stock_info in stock_basics.iterrows():
                    logging.info("[%s] Calc %s's LIQUIDITY factor loading." %
                                 (Utils.datetimelike_to_str(
                                     calc_date, dash=True), stock_info.symbol))
                    liquidity_data = cls._calc_factor_loading(
                        stock_info.symbol, calc_date)
                    if liquidity_data is not None:
                        ids.append(liquidity_data['code'])
                        stoms.append(liquidity_data['stom'])
                        stoqs.append(liquidity_data['stoq'])
                        stoas.append(liquidity_data['stoa'])
                        raw_liquidities.append(liquidity_data['liquidity'])
            else:
                # 采用多进程计算LIQUIDITY因子值
                q = Manager().Queue()
                p = Pool(4)
                for _, stock_info in stock_basics.iterrows():
                    p.apply_async(cls._calc_factor_loading_proc,
                                  args=(
                                      stock_info.symbol,
                                      calc_date,
                                      q,
                                  ))
                p.close()
                p.join()
                while not q.empty():
                    liquidity_data = q.get(True)
                    ids.append(liquidity_data['code'])
                    stoms.append(liquidity_data['stom'])
                    stoqs.append(liquidity_data['stoq'])
                    stoas.append(liquidity_data['stoa'])
                    raw_liquidities.append(liquidity_data['liquidity'])

            date_label = Utils.get_trading_days(start=calc_date, ndays=2)[1]
            dict_stom = dict({
                'date': [date_label] * len(ids),
                'id': ids,
                'factorvalue': stoms
            })
            dict_stoq = dict({
                'date': [date_label] * len(ids),
                'id': ids,
                'factorvalue': stoqs
            })
            dict_stoa = dict({
                'date': [date_label] * len(ids),
                'id': ids,
                'factorvalue': stoas
            })
            dict_raw_liquidity = dict({
                'date': [date_label] * len(ids),
                'id': ids,
                'factorvalue': raw_liquidities
            })
            # 读取Size因子值, 将流动性因子与Size因子正交化
            size_factor_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                            risk_ct.SIZE_CT.db_file)
            df_size = Utils.read_factor_loading(
                size_factor_path,
                Utils.datetimelike_to_str(calc_date, dash=False))
            df_size.drop(columns='date', inplace=True)
            df_size.rename(columns={'factorvalue': 'size'}, inplace=True)
            df_liquidity = pd.DataFrame(
                dict({
                    'id': ids,
                    'liquidity': raw_liquidities
                }))
            df_liquidity = pd.merge(left=df_liquidity,
                                    right=df_size,
                                    how='inner',
                                    on='id')
            arr_liquidity = Utils.normalize_data(
                Utils.clean_extreme_value(
                    np.array(df_liquidity['liquidity']).reshape(
                        (len(df_liquidity), 1))))
            arr_size = Utils.normalize_data(
                Utils.clean_extreme_value(
                    np.array(df_liquidity['size']).reshape(
                        (len(df_liquidity), 1))))
            model = sm.OLS(arr_liquidity, arr_size)
            results = model.fit()
            df_liquidity['liquidity'] = results.resid
            df_liquidity.drop(columns='size', inplace=True)
            df_liquidity.rename(columns={'liquidity': 'factorvalue'},
                                inplace=True)
            df_liquidity['date'] = date_label
            # 保存因子载荷
            if save:
                str_date = Utils.datetimelike_to_str(calc_date, dash=False)
                factor_header = ['date', 'id', 'factorvalue']
                Utils.factor_loading_persistent(cls._db_file,
                                                'stom_{}'.format(str_date),
                                                dict_stom, factor_header)
                Utils.factor_loading_persistent(cls._db_file,
                                                'stoq_{}'.format(str_date),
                                                dict_stoq, factor_header)
                Utils.factor_loading_persistent(cls._db_file,
                                                'stoa_{}'.format(str_date),
                                                dict_stoa, factor_header)
                Utils.factor_loading_persistent(
                    cls._db_file, 'rawliquidity_{}'.format(str_date),
                    dict_raw_liquidity, factor_header)
                Utils.factor_loading_persistent(cls._db_file, str_date,
                                                df_liquidity.to_dict('list'),
                                                factor_header)

            # 暂停180秒
            logging.info('Suspending for 180s.')
            time.sleep(180)
        return dict_raw_liquidity
Esempio n. 26
0
    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本股的因子载荷,并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like or str
            开始日期,格式:YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like or str
            结束日期,格式:YYYY-MM-DD or YYYYMMDD
        :param month_end: bool, 默认True
            如果为True,则只计算月末时点的因子载荷;否则每个交易日都计算
        :param save: bool, 默认False
            是否保存至因子数据库
        :return: 因子载荷,DataFrame
        --------
            因子载荷,DataFrame
            0. date: 日期
            1. id: 证券symbol
            2. LnTotalMktCap: 总市值对数值
            3. LnLiquidMktCap: 流通市值对数值
        """
        # 取得交易日序列股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列,计算规模因子值
        dict_scale = None
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            dict_scale = {
                'date': [],
                'id': [],
                'LnTotalMktCap': [],
                'LnLiquidMktCap': []
            }
            # 遍历个股,计算个股规模因子值
            s = (calc_date - datetime.timedelta(days=90)).strftime('%Y%m%d')
            stock_basics = all_stock_basics[all_stock_basics.list_date < s]

            # 采用单进程进行计算规模因子
            # for _, stock_info in stock_basics.iterrows():
            #     scale_data = cls._calc_factor_loading(stock_info.symbol, calc_date)
            #     if scale_data is not None:
            #         logging.info("[%s] %s's total mkt cap = %.0f, liquid mkt cap = %.0f" % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol, scale_data.LnTotalMktCap, scale_data.LnLiquidMktCap))
            #         dict_scale['id'].append(Utils.code_to_symbol(stock_info.symbol))
            #         dict_scale['LnTotalMktCap'].append(round(scale_data.LnTotalMktCap, 4))
            #         dict_scale['LnLiquidMktCap'].append(round(scale_data.LnLiquidMktCap, 4))

            # 采用多进程并行计算规模因子
            q = Manager().Queue()  # 队列,用于进程间通信,存储每个进程计算的规模因子值
            p = Pool(4)  # 进程池,最多同时开启4个进程
            for _, stock_info in stock_basics.iterrows():
                p.apply_async(cls._calc_factor_loading_proc,
                              args=(
                                  stock_info.symbol,
                                  calc_date,
                                  q,
                              ))
            p.close()
            p.join()
            while not q.empty():
                scale_data = q.get(True)
                dict_scale['id'].append(scale_data[0])
                dict_scale['LnTotalMktCap'].append(round(scale_data[1], 4))
                dict_scale['LnLiquidMktCap'].append(round(scale_data[2], 4))

            date_label = Utils.get_trading_days(start=calc_date, ndays=2)[1]
            dict_scale['date'] = [date_label] * len(dict_scale['id'])
            # 保存规模因子载荷至因子数据库
            if save:
                Utils.factor_loading_persistent(cls._db_file,
                                                calc_date.strftime('%Y%m%d'),
                                                dict_scale)
            # 休息60秒
            logging.info('Suspending for 60s.')
            time.sleep(60)
        return dict_scale
Esempio n. 27
0
    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本个股的因子载荷,并保存至因子数据库
        Parameters
        --------
        :param start_date: datetime-like, str
            开始日期
        :param end_date: datetime-like, str,默认None
            结束日期,如果为None,则只计算start_date日期的因子载荷
        :param month_end: bool,默认True
            只计算月末时点的因子载荷
        :param save: 是否保存至因子数据库,默认为False
        :return: 因子载荷,DataFrame
        --------
            因子载荷,DataFrame
            0: ID, 证券ID,为索引
            1: factorvalue, 因子载荷
            如果end_date=None,返回start_date对应的因子载荷数据
            如果end_date!=None,返回最后一天的对应的因子载荷数据
            如果没有计算数据,返回None
        """
        # 0.取得交易日序列
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        # 取得样本个股信息
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列,计算SMartQ因子载荷
        dict_factor = None
        for calc_date in trading_days_series:
            dict_factor = {'id': [], 'factorvalue': []}
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            # 1.获取用于读取分钟行情的交易日列表(过去30天的交易日列表,降序排列)
            # trading_days = _get_trading_days(calc_date, 30)
            # trading_days = Utils.get_trading_days(end=calc_date, ndays=30, ascending=False)
            # 2.取得样本个股信息
            # stock_basics = ts.get_stock_basics()
            s = (calc_date - datetime.timedelta(days=90)).strftime('%Y%m%d')
            stock_basics = all_stock_basics[all_stock_basics.list_date < s]
            # 3.遍历样本个股代码,计算Smart_Q因子载荷值
            dict_factor = {'id': [], 'factorvalue': []}

            # 采用单进程进行计算
            # for _, stock_info in stock_basics.iterrows():
            #     # code = '%s%s' % ('SH' if code[:2] == '60' else 'SZ', code)
            #     factor_loading = cls._calc_factor_loading(stock_info.symbol, calc_date)
            #     print("[%s]Calculating %s's SmartMoney factor loading = %.4f." % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol, -1.0 if factor_loading is None else factor_loading))
            #     if factor_loading is not None:
            #         # df_factor.ix[code, 'factorvalue'] = factor_loading
            #         dict_factor['id'].append(Utils.code_to_symbol(stock_info.symbol))
            #         dict_factor['factorvalue'].append(factor_loading)

            # 采用多进程并行计算SmartQ因子载荷
            q = Manager().Queue()  # 队列,用于进程间通信,存储每个进程计算的因子载荷值
            p = Pool(4)  # 进程池,最多同时开启4个进程
            for _, stock_info in stock_basics.iterrows():
                p.apply_async(cls._calc_factor_loading_proc,
                              args=(
                                  stock_info.symbol,
                                  calc_date,
                                  q,
                              ))
            p.close()
            p.join()
            while not q.empty():
                smart_q = q.get(True)
                dict_factor['id'].append(smart_q[0])
                dict_factor['factorvalue'].append(smart_q[1])

            date_label = Utils.get_trading_days(calc_date, ndays=2)[1]
            dict_factor['date'] = [date_label] * len(dict_factor['id'])
            # 4.保存因子载荷至因子数据库
            if save:
                # db = shelve.open(cls._db_file, flag='c', protocol=None, writeback=False)
                # try:
                #     db[calc_date.strftime('%Y%m%d')] = df_factor
                # finally:
                #     db.close()
                Utils.factor_loading_persistent(cls._db_file,
                                                calc_date.strftime('%Y%m%d'),
                                                dict_factor)
            # 休息300秒
            logging.info('Suspending for 360s.')
            time.sleep(360)
        return dict_factor
Esempio n. 28
0
    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本个股的因子载荷,并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like, str
            开始日期,格式:YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期,格式:YYYY-MM-DD or YYYYMMDD
            如果为None,则只计算start_date日期的因子载荷
        :param month_end:bool, 默认True
            如果为True,则只结算月末时点的因子载荷
        :param save: bool, 默认False
            是否保存至因子数据库
        :return: 因子载荷,DataFrame
        --------
            因子载荷,DataFrame
            0. date: 日期
            1. id: 证券symbol
            2. short_term_0: 第一个短期动量因子
            3. short_term_1: 第二个短期动量因子
            4. long_term_0: 第一个长期动量因子
            5. long_term_1: 第二个长期动量因子
        """
        # 取得交易日序列及股票基本信息表
        # start_date = Utils.to_date(start_date)
        trading_days_series = Utils.get_trading_days(start=start_date,
                                                     end=end_date)
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列,计算动量因子
        dict_momentum = None
        momentum_label = cls.momentum_label()
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            dict_momentum = {'date': [], 'id': []}
            for label in momentum_label:
                dict_momentum[label] = []
            # 遍历个股,计算个股动量因子
            s = (calc_date - datetime.timedelta(days=90)).strftime('%Y%m%d')
            stock_basics = all_stock_basics[all_stock_basics.list_date < s]

            # 采用单进程进行计算
            # for _, stock_info in stock_basics.iterrows():
            #     momentum_data = cls._calc_factor_loading(stock_info.symbol, calc_date)
            #     if momentum_data is not None:
            #         logging.info("[%s] calc %s's momentum factor loading." % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol))
            #         dict_momentum['id'].append(Utils.code_to_symbol(stock_info.symbol))
            #         for label in momentum_label:
            #             dict_momentum[label].append(momentum_data[label])

            # 采用多进程并行计算动量因子载荷
            q = Manager().Queue()  # 队列,用于进程间通信,存储每个进程计算的因子载荷
            p = Pool(4)  # 进程池,最多同时开启4个进程
            for _, stock_info in stock_basics.iterrows():
                p.apply_async(cls._calc_factor_loading_proc,
                              args=(
                                  stock_info.symbol,
                                  calc_date,
                                  q,
                              ))
            p.close()
            p.join()
            while not q.empty():
                momentum_data = q.get(True)
                dict_momentum['id'].append(momentum_data['id'])
                for label in momentum_label:
                    dict_momentum[label].append(momentum_data[label])

            date_label = Utils.get_trading_days(start=calc_date, ndays=2)[1]
            dict_momentum['date'] = [date_label] * len(dict_momentum['id'])
            # 保存因子载荷至因子数据库
            if save:
                Utils.factor_loading_persistent(cls._db_file,
                                                calc_date.strftime('%Y%m%d'),
                                                dict_momentum)
            # 休息60秒
            logging.info('Suspending for 60s.')
            time.sleep(60)
        return dict_momentum
Esempio n. 29
0
def _get_prevN_years_finbasicdata(date, code, years):
    """
    读取过去n年的主要财务指标数据, 其中每股数据会经过复权因子调整
    :param date: datetime-like
        日期
    :param code: str
        个股代码, 格式: SH600000
    :param years: int
        返回的报告期年数
    :return: list of pd.Series
    """
    year = date.year
    month = date.month
    if month in (1, 2, 3, 4):
        # report_dates = [datetime.datetime(year-5, 12, 31),
        #                 datetime.datetime(year-4, 12, 31),
        #                 datetime.datetime(year-3, 12, 31),
        #                 datetime.datetime(year-2, 12, 31)]
        report_dates = [
            datetime.datetime(year - n, 12, 31) for n in range(years, 1, -1)
        ]
        is_ttm = True
    elif month in (5, 6, 7, 8):
        # report_dates = [datetime.datetime(year-5, 12, 31),
        #                 datetime.datetime(year-4, 12, 31),
        #                 datetime.datetime(year-3, 12, 31),
        #                 datetime.datetime(year-2, 12, 31),
        #                 datetime.datetime(year-1, 12, 31)]
        report_dates = [
            datetime.datetime(year - n, 12, 31) for n in range(years, 0, -1)
        ]
        is_ttm = False
    else:
        # report_dates = [datetime.datetime(year-4, 12, 31),
        #                 datetime.datetime(year-3, 12, 31),
        #                 datetime.datetime(year-2, 12, 31),
        #                 datetime.datetime(year-1, 12, 31)]
        report_dates = [
            datetime.datetime(year - n, 12, 31)
            for n in range(years - 1, 0, -1)
        ]
        is_ttm = True

    df_mkt_data = Utils.get_secu_daily_mkt(code, end=date,
                                           fq=True)  # 个股复权行情, 用于调整每股数据

    prevN_years_finbasicdata = []
    for report_date in report_dates:
        fin_basic_data = Utils.get_fin_basic_data(code,
                                                  report_date,
                                                  date_type='report_date')
        if fin_basic_data is None:
            return None
        fin_basic_data = fin_basic_data.to_dict()
        df_extract_mkt = df_mkt_data[
            df_mkt_data.date <= report_date.strftime('%Y-%m-%d')]
        if not df_extract_mkt.empty:
            fq_factor = df_extract_mkt.iloc[-1]['factor']
            # 调整每股数据
            fin_basic_data[
                'BasicEPS_adj'] = fin_basic_data['BasicEPS'] * fq_factor
            fin_basic_data['UnitNetAsset_adj'] = fin_basic_data[
                'UnitNetAsset'] * fq_factor
            fin_basic_data['UnitNetOperateCashFlow_adj'] = fin_basic_data[
                'UnitNetOperateCashFlow'] * fq_factor
            # 计算调整后的主营业务收入
            fin_basic_data['MainOperateRevenue_adj'] = fin_basic_data[
                'MainOperateRevenue'] / fq_factor
        else:
            fin_basic_data['BasicEPS_adj'] = fin_basic_data['BasicEPS']
            fin_basic_data['UnitNetAsset_adj'] = fin_basic_data['UnitNetAsset']
            fin_basic_data['UnitNetOperateCashFlow_adj'] = fin_basic_data[
                'UnitNetOperateCashFlow']
            fin_basic_data['MainOperateRevenue_adj'] = fin_basic_data[
                'MainOperateRevenue']
        prevN_years_finbasicdata.append(fin_basic_data)
    if is_ttm:
        ttm_fin_basic_data = Utils.get_ttm_fin_basic_data(code, date)
        if ttm_fin_basic_data is None:
            return None
        ttm_fin_basic_data = ttm_fin_basic_data.to_dict()
        df_extract_mkt = df_mkt_data[
            df_mkt_data.date <= ttm_fin_basic_data['ReportDate'].strftime(
                '%Y-%m-%d')]
        if not df_extract_mkt.empty:
            fq_factor = df_extract_mkt.iloc[-1]['factor']
            # 调整每股数据
            ttm_fin_basic_data[
                'BasicEPS_adj'] = ttm_fin_basic_data['BasicEPS'] * fq_factor
            # 计算调整后的主营业务收入
            ttm_fin_basic_data['MainOperateRevenue_adj'] = ttm_fin_basic_data[
                'MainOperateRevenue'] / fq_factor
        else:
            ttm_fin_basic_data['BasicEPS_adj'] = ttm_fin_basic_data['BasicEPS']
            ttm_fin_basic_data['MainOperateRevenue_adj'] = ttm_fin_basic_data[
                'MainOperateRevenue']
        prevN_years_finbasicdata.append(ttm_fin_basic_data)
    return prevN_years_finbasicdata
Esempio n. 30
0
    def calc_factor_loading_(cls,
                             start_date,
                             end_date=None,
                             month_end=True,
                             save=False,
                             **kwargs):
        """
        计算指定日期的样本个股的因子载荷, 并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like, str
            开始日期, 格式: YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD
        :param month_end: bool, 默认为True
            如果为True, 则只计算月末时点的因子载荷
        :param save: bool, 默认为True
            是否保存至因子数据库
        :param kwargs:
            'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False
        :return: dict
            因子载荷数据
        """
        # 取得交易日序列
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        # 遍历交易日序列, 计算growth因子下各个成分因子的因子载荷
        if 'multi_proc' not in kwargs:
            kwargs['multi_proc'] = False
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            # 计算各成分因子的因子载荷
            for com_factor in risk_ct.GROWTH_CT.component:
                factor = eval(com_factor + '()')
                factor.calc_factor_loading(start_date=calc_date,
                                           end_date=None,
                                           month_end=month_end,
                                           save=save,
                                           multi_proc=kwargs['multi_proc'])
            # 合成Growth因子载荷
            growth_factor = pd.DataFrame()
            df_industry_classify = Utils.get_industry_classify()  # 个股行业分类数据
            for com_factor in risk_ct.GROWTH_CT.component:
                factor_path = os.path.join(
                    factor_ct.FACTOR_DB.db_path,
                    eval('risk_ct.' + com_factor + '_CT')['db_file'])
                factor_loading = Utils.read_factor_loading(
                    factor_path,
                    Utils.datetimelike_to_str(calc_date, dash=False))
                factor_loading.drop(columns='date', inplace=True)
                # factor_loading[com_factor] = Utils.normalize_data(Utils.clean_extreme_value(np.array(factor_loading['factorvalue']).reshape((len(factor_loading), 1))))
                # factor_loading.drop(columns='factorvalue', inplace=True)
                factor_loading.rename(columns={'factorvalue': com_factor},
                                      inplace=True)
                # 添加行业分类数据
                factor_loading = pd.merge(
                    left=factor_loading,
                    right=df_industry_classify[['id', 'ind_code']],
                    how='inner',
                    on='id')
                # 取得含缺失值的因子载荷数据
                missingdata_factor = factor_loading[
                    factor_loading[com_factor].isna()]
                # 删除factor_loading中的缺失值
                factor_loading.dropna(axis='index', how='any', inplace=True)
                # 对factor_loading去极值、标准化
                factor_loading = Utils.normalize_data(factor_loading,
                                                      id='id',
                                                      columns=com_factor,
                                                      treat_outlier=True,
                                                      weight='cap',
                                                      calc_date=calc_date)
                # 把missingdata_factor中的缺失值替换为行业均值
                ind_codes = set(missingdata_factor['ind_code'])
                ind_mean_factor = {}
                for ind_code in ind_codes:
                    ind_mean_factor[ind_code] = factor_loading[
                        factor_loading['ind_code'] ==
                        ind_code][com_factor].mean()
                for idx, missingdata in missingdata_factor.iterrows():
                    missingdata_factor.loc[idx, com_factor] = ind_mean_factor[
                        missingdata['ind_code']]
                # 把missingdata_factor和factor_loading合并
                factor_loading = pd.concat(
                    [factor_loading, missingdata_factor])
                # 删除ind_code列
                factor_loading.drop(columns='ind_code', inplace=True)

                if growth_factor.empty:
                    growth_factor = factor_loading
                else:
                    growth_factor = pd.merge(left=growth_factor,
                                             right=factor_loading,
                                             how='inner',
                                             on='id')

            # # 读取个股行业分类数据, 添加至growth_factor中
            # df_industry_classify = Utils.get_industry_classify()
            # growth_factor = pd.merge(left=growth_factor, right=df_industry_classify[['id', 'ind_code']])
            # # 取得含缺失值的因子载荷数据
            # missingdata_factor = growth_factor.loc[[ind for ind, data in growth_factor.iterrows() if data.hasnans]]
            # # 删除growth_factot中的缺失值
            # growth_factor.dropna(axis='index', how='any', inplace=True)
            # # 对growth_factor去极值、标准化
            # growth_factor = Utils.normalize_data(growth_factor, id='id', columns=risk_ct.GROWTH_CT.component, treat_outlier=True, weight='cap', calc_date=calc_date)
            # # 把missingdata_factor中的缺失值替换为行业均值
            # ind_codes = set(missingdata_factor['ind_code'])
            # ind_mean_factor = {}
            # for ind_code in ind_codes:
            #     ind_mean_factor[ind_code] = growth_factor[growth_factor['ind_code'] == ind_code].mean()
            # missingdata_label = {ind: missingdata_factor.columns[missingdata.isna()].tolist() for ind, missingdata in missingdata_factor.iterrows()}
            # for ind, cols in missingdata_label.items():
            #     missingdata_factor.loc[ind, cols] = ind_mean_factor[missingdata_factor.loc[ind, 'ind_code']][cols]
            # # 把missingdata_factor和growth_factor合并
            # growth_factor = pd.concat([growth_factor, missingdata_factor])
            # # 删除ind_code列
            # growth_factor.drop(columns='ind_code', inplace=True)

            # 合成Growth因子
            growth_factor.set_index('id', inplace=True)
            weight = pd.Series(risk_ct.GROWTH_CT.weight)
            growth_factor = (growth_factor * weight).sum(axis=1)
            growth_factor.name = 'factorvalue'
            growth_factor.index.name = 'id'
            growth_factor = pd.DataFrame(growth_factor)
            growth_factor.reset_index(inplace=True)
            growth_factor['date'] = Utils.get_trading_days(start=calc_date,
                                                           ndays=2)[1]
            # 保存growth因子载荷
            if save:
                Utils.factor_loading_persistent(
                    cls._db_file,
                    Utils.datetimelike_to_str(calc_date, dash=False),
                    growth_factor.to_dict('list'),
                    ['date', 'id', 'factorvalue'])