Esempio n. 1
0
 def _calc_factor_loading_proc(cls, code, calc_date, q):
     logging.info('[%s] Calc APM of %s.' %
                  (calc_date.strftime('%Y-%m-%d'), code))
     stat = cls._calc_factor_loading(code, calc_date)
     ret20 = Utils.calc_interval_ret(code, end=calc_date, ndays=20)
     if stat is not None and ret20 is not None:
         q.put((Utils.code_to_symbol(code), stat, ret20))
Esempio n. 2
0
def calc_future_ret(date, ndays):
    """
    计算date日期ndays个交易日前个股的未来1至ndays天的各个区间收益率数据
    Parameters:
    --------
    :param date: datetime-like, str
        日期, e.g: YYYY-MM-DD, YYYYMMDD
    :param ndays: int
        天数
    :return:
    """
    # 读取过去ndays+1个交易日序列
    trading_days_series = Utils.get_trading_days(end=date, ndays=ndays+1)
    # 读取个股基本信息
    stock_basics = Utils.get_stock_basics(trading_days_series[0])
    # 从第2天开始遍历trading_days_series, 计算各个区间收益率数据
    headers = ['code'] + ['day'+str(k) for k in range(1, ndays+1)]
    df_future_ret = pd.DataFrame(columns=headers)
    for _, stock_info in stock_basics.iterrows():
        future_ret = pd.Series()
        future_ret['code'] = stock_info.symbol
        for k in range(1, ndays+1):
            future_ret['day'+str(k)] = Utils.calc_interval_ret(stock_info.symbol, start=trading_days_series[1], end=trading_days_series[k])
            if future_ret['day'+str(k)] is None:
                future_ret['day'+str(k)] = np.nan
            else:
                future_ret['day' + str(k)] = round(future_ret['day' + str(k)], 6)
        df_future_ret = df_future_ret.append(future_ret, ignore_index=True)
        df_future_ret.dropna(axis=0, how='any', inplace=True)

    # 保存数据
    cfg = ConfigParser()
    cfg.read('config.ini')
    future_ret_path = os.path.join(SETTINGS.FACTOR_DB_PATH, cfg.get('future_ret', 'ret_path'), '{}.csv'.format(Utils.datetimelike_to_str(trading_days_series[0], dash=False)))
    df_future_ret.to_csv(future_ret_path, index=False, encoding='utf-8')
Esempio n. 3
0
    def _calc_periodmomentum_ic(cls, calc_date, date_interval_type='month'):
        """
        计算日内各时段动量因子的Rank IC值向量
        Parameters:
        --------
        :param calc_date: datetime-like, str
            计算日期, e.g: YYYY-MM-DD, YYYYMMDD
        :param date_interval_type: str
            个股收益率计算的时间长度, 'month'=月度收益, 'day'=日收益
        :return: pd.Series
        --------
            IC值向量
            0. date, 日期
            1. IC0, 隔夜时段动量因子IC
            2. IC1, 第1小时动量因子IC
            3. IC2, 第2小时动量因子IC
            4. IC3, 第3小时动量因子IC
            5. IC4, 第4小时动量因子IC
        """
        # 读取日内各时段动量因子载荷数据
        df_period_mom = cls._get_factor_loading(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False),
                                                factor_name='periodmomentum', factor_type='raw', drop_na=True)
        if df_period_mom.empty:
            return None

        if date_interval_type == 'month':
            # 读取个股下个月的月度收益率数据
            ret_start, ret_end = Utils.next_month(calc_date)
        elif date_interval_type == 'day':
            ret_start = ret_end = Utils.get_trading_days(start=calc_date, ndays=2)[1]

        df_period_mom['ret'] = np.nan
        for idx, factorloading_data in df_period_mom.iterrows():
            fret = Utils.calc_interval_ret(factorloading_data['id'], start=ret_start, end=ret_end)
            if fret is not None:
                df_period_mom.loc[idx, 'ret'] = fret
        df_period_mom.dropna(inplace=True)
        # 计算Rank IC值
        df_period_mom.drop(columns=['date', 'id', 'm_normal'], inplace=True)
        df_spearman_corr = df_period_mom.corr(method='spearman')
        rank_IC = df_spearman_corr.loc['ret', ['m0', 'm1', 'm2', 'm3', 'm4']]
        rank_IC['date'] = calc_date
        # 保存Rank IC值
        ic_filepath = os.path.join(SETTINGS.FACTOR_DB_PATH, alphafactor_ct.INTRADAYMOMENTUM_CT['factor_ic_file'])
        Utils.save_timeseries_data(rank_IC, ic_filepath, save_type='a', columns=['date', 'm0', 'm1', 'm2', 'm3', 'm4'])

        return rank_IC
Esempio n. 4
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股的动量因子,包含短期动量和长期动量因子
     Parameters:
     --------
     :param code: str
         个股代码,如SH600000或600000
     :param calc_date: datetime-like or str
         因子载荷计算日期,格式YYYY-MM-DD, YYYYMMDD
     :return: pd.Series
     --------
         传统动量因子值,分为短期和长期动量
         0. short_term_0: 短期动量0
         1. short_term_1: 短期动量1
         2. long_term_0: 长期动量0
         3. long_term_1: 长期动量1
         若计算失败,返回None
     """
     short_terms = [
         int(x) for x in factor_ct.MOMENTUM_CT.short_term_days.split('|')
     ]  # 短期动量的交易日天数list
     long_terms = [
         int(x) for x in factor_ct.MOMENTUM_CT.long_term_days.split('|')
     ]  # 长期动量的交易日天数list
     momentum_terms = short_terms + long_terms
     # 构造momentum_lable
     momentum_label = []
     for days in short_terms:
         momentum_label.append('short_term_%d' % days)
     for days in long_terms:
         momentum_label.append('long_term_%d' % days)
     # 计算动量
     momentum_value = []
     for days in momentum_terms:
         ret = Utils.calc_interval_ret(code, end=calc_date, ndays=days)
         if ret is None:
             if len(momentum_value) == 0:
                 return None  # 如果最短期的动量计算失败,那么返回None
             else:
                 ret = momentum_value[-1]
         momentum_value.append(round(ret, 6))
     momentum = Series(momentum_value, index=momentum_label)
     return momentum
Esempio n. 5
0
def smartq_backtest(start, end):
    """
    SmartQ因子的历史回测
    Parameters:
    --------
    :param start: datetime-like, str
        回测开始日期,格式:YYYY-MM-DD,开始日期应该为月初
    :param end: datetime-like, str
        回测结束日期,格式:YYYY-MM-DD
    :return:
    """
    # 取得开始结束日期间的交易日序列
    trading_days = Utils.get_trading_days(start, end)
    # 读取截止开始日期前最新的组合回测数据
    prev_trading_day = Utils.get_prev_n_day(trading_days.iloc[0], 1)
    backtest_path = os.path.join(SETTINGS.FACTOR_DB_PATH,
                                 alphafactor_ct.SMARTMONEY_CT.backtest_path)
    factor_data, port_nav = Utils.get_backtest_data(backtest_path,
                                                    trading_days.iloc[0])
    # factor_data = None  # 记录每次调仓时最新入选个股的SmartQ因子信息,pd.DataFrame<date,factorvalue,id,buprice>
    if port_nav is None:
        port_nav = DataFrame({
            'date': [prev_trading_day.strftime('%Y-%m-%d')],
            'nav': [1.0]
        })
    # 遍历交易日,如果是月初,则读取SmartQ因子载荷值,进行调仓;如果不是月初,则进行组合估值
    t = 0  # 记录调仓次数
    for trading_day in trading_days:
        if factor_data is None:
            nav = port_nav[port_nav.date == prev_trading_day.strftime(
                '%Y-%m-%d')].iloc[0].nav
        else:
            nav = port_nav[port_nav.date ==
                           factor_data.iloc[0].date].iloc[0].nav
        interval_ret = 0.0
        # 月初进行调仓
        if Utils.is_month_start(trading_day):
            logging.info('[%s] 月初调仓.' %
                         Utils.datetimelike_to_str(trading_day, True))
            # 调仓前,先计算组合按均价卖出原先组合个股在当天的估值
            if factor_data is not None:
                for ind, factor_info in factor_data.iterrows():
                    daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                         trading_day,
                                                         fq=True,
                                                         range_lookup=True)
                    if daily_mkt.date == trading_day.strftime('%Y-%m-%d'):
                        vwap_price = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor
                    else:
                        vwap_price = daily_mkt.close
                    interval_ret += vwap_price / factor_info.buyprice - 1.0
                interval_ret /= float(len(factor_data))
                nav *= (1.0 + interval_ret)
            # 读取factor_data
            factor_data = Utils.read_factor_loading(
                SmartMoney.get_db_file(),
                Utils.datetimelike_to_str(prev_trading_day, False))
            # 遍历factor_data, 计算每个个股过去20天的涨跌幅,并剔除在调仓日没有正常交易(如停牌)及涨停的个股
            ind_to_be_deleted = []
            factor_data['ret20'] = np.zeros(len(factor_data))
            for ind, factor_info in factor_data.iterrows():
                trading_status = Utils.trading_status(factor_info.id,
                                                      trading_day)
                if trading_status == SecuTradingStatus.Suspend or trading_status == SecuTradingStatus.LimitUp:
                    ind_to_be_deleted.append(ind)
                fret20 = Utils.calc_interval_ret(factor_info.id,
                                                 end=prev_trading_day,
                                                 ndays=20)
                if fret20 is None:
                    if ind not in ind_to_be_deleted:
                        ind_to_be_deleted.append(ind)
                else:
                    factor_data.loc[ind, 'ret20'] = fret20
            factor_data = factor_data.drop(ind_to_be_deleted, axis=0)
            # 对factor_data过去20天涨跌幅降序排列,剔除涨幅最大的20%个股
            k = int(factor_data.shape[0] * 0.2)
            factor_data = factor_data.sort_values(by='ret20',
                                                  ascending=False).iloc[k:]
            del factor_data['ret20']  # 删除ret20列
            # 对factor_data按因子值升序排列,取前10%个股
            factor_data = factor_data.sort_values(by='factorvalue',
                                                  ascending=True)
            k = int(factor_data.shape[0] * 0.1)
            factor_data = factor_data.iloc[:k]
            # 遍历factor_data,添加买入价格,并估值计算当天调仓后的组合收益
            factor_data['buyprice'] = 0.0
            interval_ret = 0.0
            for ind, factor_info in factor_data.iterrows():
                daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                     trading_day,
                                                     fq=True,
                                                     range_lookup=False)
                assert len(daily_mkt) > 0
                factor_data.loc[
                    ind,
                    'buyprice'] = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor
                interval_ret += daily_mkt.close / factor_data.loc[
                    ind, 'buyprice'] - 1.0
            interval_ret /= float(factor_data.shape[0])
            nav *= (1.0 + interval_ret)
            # 保存factor_data
            port_data_path = os.path.join(
                SETTINGS.FACTOR_DB_PATH,
                alphafactor_ct.SMARTMONEY_CT.backtest_path,
                'port_data_%s.csv' %
                Utils.datetimelike_to_str(trading_day, False))
            factor_data.to_csv(port_data_path, index=False)
            t += 1
            if t % 6 == 0:
                logging.info('Suspended for 300s.')
                time.sleep(300)
        else:
            # 非调仓日,对组合进行估值
            logging.info('[%s] 月中估值.' %
                         Utils.datetimelike_to_str(trading_day, True))
            if factor_data is not None:
                for ind, factor_info in factor_data.iterrows():
                    daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                         trading_day,
                                                         fq=True,
                                                         range_lookup=True)
                    interval_ret += daily_mkt.close / factor_info.buyprice - 1.0
                interval_ret /= float(factor_data.shape[0])
                nav *= (1.0 + interval_ret)
        # 添加nav
        port_nav = port_nav.append(Series({
            'date':
            Utils.datetimelike_to_str(trading_day, True),
            'nav':
            nav
        }),
                                   ignore_index=True)
        # 设置prev_trading_day
        prev_trading_day = trading_day
    # 保存port_nav
    port_nav_path = os.path.join(SETTINGS.FACTOR_DB_PATH,
                                 alphafactor_ct.SMARTMONEY_CT.backtest_path,
                                 'port_nav.csv')
    port_nav.to_csv(port_nav_path, index=False)
Esempio n. 6
0
 def _calc_factor_loading1(cls, code, calc_date):
     """
     计算指定日期、指定个股筹码分布的四个代理变量以及下一期(下个月)的收益率
     Parameters
     -------
     :param code: str
         个股代码, 如600000或SH600000
     :param calc_date: datetime-like, str
         计算日期, 格式YYYY-MM-DD
     :return: pd.Series
     --------
         个股筹码分布的额四个代理变量
         0. arc: 筹码分布的均值
         1. vrc: 筹码分布的方差
         2. src: 筹码分布的偏度
         3. krc: 筹码分布的峰度
         4. next_ret: 下一期的收益率
         若计算失败, 返回None
     """
     # 读取过去__days天的个股复权日K线行情数据
     df_mkt = Utils.get_secu_daily_mkt(code,
                                       end=calc_date,
                                       ndays=cls.__days,
                                       fq=True,
                                       range_lookup=True)
     if df_mkt is None:
         return None
     if len(df_mkt) < 20:
         return None
     # 按日期降序排列行情数据
     df_mkt.sort_values(by='date', ascending=False, inplace=True)
     # 遍历行情数据, 计算RC(相对资本收益)向量和ATR(调整换手率)向量
     arr_rc = np.zeros(len(df_mkt))
     arr_atr = np.zeros(len(df_mkt))
     p_c = df_mkt.iloc[0]['close']  # 截止日期的收盘价
     for j in range(len(df_mkt)):
         p_avg = df_mkt.iloc[j]['amount'] / df_mkt.iloc[j][
             'vol'] * df_mkt.iloc[j]['factor']
         arr_rc[j] = (p_c - p_avg) / p_c
         tr_j = df_mkt.iloc[j]['turnover1']
         if j == 0:
             arr_atr[j] = tr_j
         else:
             arr_atr[j] = arr_atr[j - 1] / pre_tr * tr_j * (1. - pre_tr)
         pre_tr = tr_j
     arc = np.average(arr_rc, weights=arr_atr)
     if np.isnan(arc):
         return None
     rc_dev = arr_rc - arc
     n = len(df_mkt)
     vrc = n / (n - 1.) * np.sum(
         arr_atr * rc_dev * rc_dev) / np.sum(arr_atr)
     if np.isnan(vrc):
         return None
     src = n / (n - 1.) * np.sum(arr_atr * np.float_power(
         rc_dev, 3)) / np.sum(arr_atr) / np.float_power(vrc, 1.5)
     if np.isnan(src):
         return None
     krc = n / (n - 1.) * np.sum(arr_atr * np.float_power(
         rc_dev, 4)) / np.sum(arr_atr) / np.float_power(vrc, 2)
     if np.isnan(krc):
         return None
     # 计算个股下一期的收益率
     # next_date = calc_date + datetime.timedelta(days=1)
     next_date = Utils.get_trading_days(start=calc_date, ndays=2)[1]
     wday, month_range = calendar.monthrange(next_date.year,
                                             next_date.month)
     date_end = datetime.datetime(next_date.year, next_date.month,
                                  month_range)
     next_ret = Utils.calc_interval_ret(code, start=next_date, end=date_end)
     if next_ret is None:
         return None
     else:
         return pd.Series([arc, vrc, src, krc, next_ret],
                          index=['arc', 'vrc', 'src', 'krc', 'next_ret'])
Esempio n. 7
0
def _calc_mvpfp_performance(factor_name, start_date, end_date):
    """
    计算最小波动纯因子组合的绩效
    Parameters:
    --------
    :param factor_name: str
        因子名称, e.g: SmartMoney
    :param start_date: datetime-like, str
        开始日期, e.g: YYYY-MM-DD, YYYYMMDD
    :param end_date: datetime-like, str
        结束日期, e.g: YYYY-MM-DD, YYYYMMDD
    :return:
    """
    start_date = Utils.to_date(start_date)
    end_date = Utils.to_date(end_date)
    # 读取mvpfp组合持仓数据, 构建Portfolio
    mvpfp_path = os.path.join(
        SETTINGS.FACTOR_DB_PATH,
        eval('alphafactor_ct.' + factor_name.upper() + '.CT')['db_file'],
        'mvpfp')
    if not os.path.isdir(mvpfp_path):
        raise NotADirectoryError("%s因子的mvpfp组合文件夹不存在.")
    mvpfp_port = CPortfolio('weight_holding')
    for mvpfp_filename in os.listdir(mvpfp_path):
        if os.path.splitext(mvpfp_filename)[1] != '.csv':
            continue
        mvpfp_date = Utils.to_date(mvpfp_filename.split('.')[0])
        if mvpfp_date < start_date or mvpfp_date > end_date:
            continue
        mvpfp_filepath = os.path.join(mvpfp_path, mvpfp_filename)
        mvpfp_port.load_holdings_fromfile(mvpfp_filepath)
    # 遍历持仓数据, 计算组合绩效
    df_daily_performance = pd.DataFrame(
        columns=alphamodel_ct.FACTOR_PERFORMANCE_HEADER['daily_performance']
    )  # 日度绩效
    df_monthly_performance = pd.DataFrame(
        columns=alphamodel_ct.FACTOR_PERFORMANCE_HEADER['monthly_performance']
    )  # 月度绩效

    df_daily_performance.loc[0, 'daily_ret'] = 0.0
    df_daily_performance.loc[0, 'nav'] = 1.0
    df_daily_performance.loc[0, 'accu_ret'] = 0.0

    mvpfp_holdings = mvpfp_port.holdings
    prev_holdingdate = curr_holding_date = None
    prevmonth_idx = 0
    holding_dates = list(mvpfp_holdings.keys())
    df_daily_performance.loc[0, 'date'] = holding_dates[0]
    if end_date > holding_dates[-1]:
        holding_dates += [end_date]
    mvpfp_daily_performance = pd.Series(
        index=alphamodel_ct.FACTOR_PERFORMANCE_HEADER['daily_performance'])
    mvpfp_monthly_performance = pd.Series(
        index=alphamodel_ct.FACTOR_PERFORMANCE_HEADER['monthly_performance'])
    for holding_date in holding_dates:
        prev_holdingdate = curr_holding_date
        curr_holding_date = holding_date
        if prev_holdingdate is None:
            continue
        prevmonth_idx = df_daily_performance.index[-1]
        holding_data = mvpfp_holdings[prev_holdingdate]
        trading_days_series = Utils.get_trading_days(
            start=prev_holdingdate + datetime.timedelta(days=1),
            end=curr_holding_date)
        for calc_date in trading_days_series:
            mvpfp_daily_performance['date'] = calc_date
            daily_ret = 0
            # TODO 增加并行计算个股绩效的功能
            for _, holding in holding_data.holding.iterrows():
                ret = Utils.calc_interval_ret(holding['code'],
                                              start=trading_days_series[0],
                                              end=calc_date)
                if ret is not None:
                    daily_ret += ret * holding['weight']
            mvpfp_daily_performance['daily_ret'] = daily_ret
            mvpfp_daily_performance[
                'nav'] = df_daily_performance.iloc[-1]['nav'] * (1 + daily_ret)
            mvpfp_daily_performance[
                'accu_ret'] = mvpfp_daily_performance['nav'] - 1

            df_daily_performance = df_daily_performance.append(
                mvpfp_daily_performance, ignore_index=True)

        mvpfp_monthly_performance['date'] = curr_holding_date
        mvpfp_monthly_performance['monthly_ret'] = df_daily_performance.iloc[
            -1]['nav'] / df_daily_performance.loc[prevmonth_idx, 'nav'] - 1.0
        df_monthly_performance = df_monthly_performance.append(
            mvpfp_monthly_performance, ignore_index=True)

    # for k in range(1, len(df_daily_performance)):
    #     df_daily_performance.loc[k, 'nav'] = df_daily_performance.loc[k-1, 'nav'] * (1 + df_daily_performance.loc[k, 'daily_ret'])
    #     df_daily_performance.loc[k, 'accu_ret'] = df_daily_performance.loc[k, 'nav'] - 1

    # 保存数据
    _save_mvpfp_performance(df_daily_performance, factor_name, 'daily', 'a')
    _save_mvpfp_performance(df_monthly_performance, factor_name, 'monthly',
                            'a')
Esempio n. 8
0
    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本个股的因子载荷,并保存至因子数据库
        Parameters
        --------
        :param start_date: datetime-like, str
            开始日期
        :param end_date: datetime-like, str,默认None
            结束日期,如果为None,则只计算start_date日期的因子载荷
        :param month_end: bool,默认True
            只计算月末时点的因子载荷,该参数只在end_date不为None时有效,并且不论end_date是否为None,都会计算第一天的因子载荷
        :param save: 是否保存至因子数据库,默认为False
        :param kwargs:
            'multi_proc': bool, True=采用多进程并行计算, False=采用单进程计算, 默认为False
        :return: 因子载荷,DataFrame
        --------
            因子载荷,DataFrame
            0: id, 证券ID
            1: factorvalue, 因子载荷
            如果end_date=None,返回start_date对应的因子载荷数据
            如果end_date!=None,返回最后一天的对应的因子载荷数据
            如果没有计算数据,返回None
        """
        # 1.取得交易日序列及股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        # all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 2.遍历交易日序列,计算APM因子载荷
        dict_apm = None
        for calc_date in trading_days_series:
            dict_apm = {'date': [], 'id': [], 'factorvalue': []}
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            # 2.1.遍历个股,计算个股APM.stat统计量,过去20日收益率,分别放进stat_lst,ret20_lst列表中
            s = (calc_date - datetime.timedelta(days=90)).strftime('%Y%m%d')
            stock_basics = Utils.get_stock_basics(s)
            stat_lst = []
            ret20_lst = []
            symbol_lst = []

            if 'multi_proc' not in kwargs:
                kwargs['multi_proc'] = False
            if not kwargs['multi_proc']:
                # 采用单进程计算
                for _, stock_info in stock_basics.iterrows():
                    stat_i = cls._calc_factor_loading(stock_info.symbol,
                                                      calc_date)
                    ret20_i = Utils.calc_interval_ret(stock_info.symbol,
                                                      end=calc_date,
                                                      ndays=20)
                    if stat_i is not None and ret20_i is not None:
                        stat_lst.append(stat_i)
                        ret20_lst.append(ret20_i)
                        symbol_lst.append(
                            Utils.code_to_symbol(stock_info.symbol))
                        logging.info('APM of %s = %f' %
                                     (stock_info.symbol, stat_i))
            else:
                # 采用多进程并行计算
                q = Manager().Queue()
                p = Pool(4)  # 最多同时开启4个进程
                for _, stock_info in stock_basics.iterrows():
                    p.apply_async(cls._calc_factor_loading_proc,
                                  args=(
                                      stock_info.symbol,
                                      calc_date,
                                      q,
                                  ))
                p.close()
                p.join()
                while not q.empty():
                    apm_value = q.get(True)
                    symbol_lst.append(apm_value[0])
                    stat_lst.append(apm_value[1])
                    ret20_lst.append(apm_value[2])

            assert len(stat_lst) == len(ret20_lst)
            assert len(stat_lst) == len(symbol_lst)

            # 2.2.构建APM因子
            # 2.2.1.将统计量stat对动量因子ret20j进行截面回归:stat_j = \beta * Ret20_j + \epsilon_j
            #     残差向量即为对应个股的APM因子
            # 截面回归之前,先对stat统计量和动量因子进行去极值和标准化处理
            stat_arr = np.array(stat_lst).reshape((len(stat_lst), 1))
            ret20_arr = np.array(ret20_lst).reshape((len(ret20_lst), 1))
            stat_arr = Utils.clean_extreme_value(stat_arr)
            stat_arr = Utils.normalize_data(stat_arr)
            ret20_arr = Utils.clean_extreme_value(ret20_arr)
            ret20_arr = Utils.normalize_data(ret20_arr)
            # 回归分析
            # ret20_arr = sm.add_constant(ret20_arr)
            apm_model = sm.OLS(stat_arr, ret20_arr)
            apm_result = apm_model.fit()
            apm_lst = list(np.around(apm_result.resid, 6))  # amp因子载荷精确到6位小数
            assert len(apm_lst) == len(symbol_lst)
            # 2.2.2.构造APM因子字典,并持久化
            date_label = Utils.get_trading_days(calc_date, ndays=2)[1]
            dict_apm = {
                'date': [date_label] * len(symbol_lst),
                'id': symbol_lst,
                'factorvalue': apm_lst
            }
            df_std_apm = Utils.normalize_data(pd.DataFrame(dict_apm),
                                              columns='factorvalue',
                                              treat_outlier=True,
                                              weight='eq')
            if save:
                # Utils.factor_loading_persistent(cls._db_file, calc_date.strftime('%Y%m%d'), dict_apm)
                cls._save_factor_loading(cls._db_file,
                                         Utils.datetimelike_to_str(calc_date,
                                                                   dash=False),
                                         dict_apm,
                                         'APM',
                                         factor_type='raw',
                                         columns=['date', 'id', 'factorvalue'])
                cls._save_factor_loading(cls._db_file,
                                         Utils.datetimelike_to_str(calc_date,
                                                                   dash=False),
                                         df_std_apm,
                                         'APM',
                                         factor_type='standardized',
                                         columns=['date', 'id', 'factorvalue'])

            # # 2.3.构建PureAPM因子
            # # 将stat_arr转换为DataFrame, 此时的stat_arr已经经过了去极值和标准化处理
            # df_stat = DataFrame(stat_arr, index=symbol_lst, columns=['stat'])
            # # 取得提纯的因变量因子
            # df_dependent_factor = cls.get_dependent_factors(calc_date)
            # # 将df_stat和因变量因子拼接
            # df_data = pd.concat([df_stat, df_dependent_factor], axis=1, join='inner')
            # # OLS回归,提纯APM因子
            # arr_data = np.array(df_data)
            # pure_apm_model = sm.OLS(arr_data[:, 0], arr_data[:, 1:])
            # pure_apm_result = pure_apm_model.fit()
            # pure_apm_lst = list(np.around(pure_apm_result.resid, 6))
            # pure_symbol_lst = list(df_data.index)
            # assert len(pure_apm_lst) == len(pure_symbol_lst)
            # # 构造pure_apm因子字典,并持久化
            # dict_pure_apm = {'date': [date_label]*len(pure_symbol_lst), 'id': pure_symbol_lst, 'factorvalue': pure_apm_lst}
            # pure_apm_db_file = os.path.join(factor_ct.FACTOR_DB.db_path, factor_ct.APM_CT.pure_apm_db_file)
            # if save:
            #     Utils.factor_loading_persistent(pure_apm_db_file, calc_date.strftime('%Y%m%d'), dict_pure_apm)
            # # 休息360秒
            # logging.info('Suspended for 360s.')
            # time.sleep(360)
        return dict_apm