Exemple #1
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股BETA因子载荷
     Parameters:
     --------
     :param code: str
         个股代码, 如600000或SH600000
     :param calc_date: datetime-like, str
         计算日期, 格式YYYY-MM-DD
     :return: pd.Series
     --------
         个股的BETA因子和HSIGMA因子载荷
         0. code: 个股代码
         1. beta: BETA因子载荷
         2. hsigma: HSIGMA因子载荷
         若计算失败, 返回None
     """
     # 取得个股复权行情数据
     df_secu_quote = Utils.get_secu_daily_mkt(
         code, end=calc_date, ndays=risk_ct.BETA_CT.trailing + 1, fq=True)
     if df_secu_quote is None:
         return None
     df_secu_quote.reset_index(drop=True, inplace=True)
     # 取得基准复权行情数据
     benchmark_code = risk_ct.BETA_CT.benchmark
     df_benchmark_quote = Utils.get_secu_daily_mkt(benchmark_code,
                                                   end=calc_date,
                                                   fq=True)
     if df_benchmark_quote is None:
         return None
     df_benchmark_quote = df_benchmark_quote[
         df_benchmark_quote['date'].isin(list(df_secu_quote['date']))]
     df_benchmark_quote.reset_index(drop=True, inplace=True)
     # 计算个股和基准的日收益率序列
     arr_secu_close = np.array(df_secu_quote.iloc[1:]['close'])
     arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close'])
     arr_secu_daily_ret = arr_secu_close / arr_secu_preclose - 1.
     arr_benchmark_close = np.array(df_benchmark_quote.iloc[1:]['close'])
     arr_benchmark_preclose = np.array(
         df_benchmark_quote.shift(1).iloc[1:]['close'])
     arr_benchmark_daily_ret = arr_benchmark_close / arr_benchmark_preclose - 1.
     # 计算权重(指数移动加权平均)
     T = len(arr_benchmark_daily_ret)
     time_spans = sorted(range(T), reverse=True)
     alpha = 1 - np.exp(np.log(0.5) / risk_ct.BETA_CT.half_life)
     x = [1 - alpha] * T
     y = [alpha] * (T - 1)
     y.insert(0, 1)
     weights = np.float_power(x, time_spans) * y
     # 采用加权最小二乘法计算Beta因子载荷及hsigma
     arr_benchmark_daily_ret = sm.add_constant(arr_benchmark_daily_ret)
     cap_model = sm.WLS(arr_secu_daily_ret,
                        arr_benchmark_daily_ret,
                        weights=weights)
     result = cap_model.fit()
     beta = result.params[1]
     hsigma = np.sqrt(result.mse_resid)
     return pd.Series([Utils.code_to_symbol(code), beta, hsigma],
                      index=['code', 'beta', 'hsigma'])
Exemple #2
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股的STOA因子载荷
        Parameters:
        --------
        :param code: str
            个股代码, 如SH600000, 600000
        :param calc_date: datetime-like or str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的STOA因子载荷
            0. code
            1. stoa
            如果计算失败, 返回None
        """
        # 读取个股过去252个交易日的日行情数据(非复权)
        df_mkt_data = Utils.get_secu_daily_mkt(code,
                                               end=calc_date,
                                               ndays=252,
                                               fq=False)
        if df_mkt_data is None or df_mkt_data.empty:
            return None
        # stoa
        days = risk_ct.STOA_CT.month_days * risk_ct.STOA_CT.months
        if len(df_mkt_data) >= days:
            stoa = math.log(df_mkt_data.iloc[-days:]['turnover1'].sum() /
                            risk_ct.STOA_CT.months)
        else:
            stoa = math.log(df_mkt_data['turnover1'].sum() /
                            risk_ct.STOA_CT.months)

        return pd.Series([Utils.code_to_symbol(code), stoa],
                         index=['code', 'stoa'])
Exemple #3
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股LNCAP因子载荷
     Parameters:
     --------
     :param code: str
         个股代码, 如SH600000, 600000
     :param calc_date: datetime-like, str
         计算日期, 格式: YYYY-MM-DD
     :return: pd.Series
     --------
         个股的LNCAP因子载荷
         0. code
         1. lncap
         如果计算失败, 返回None
     """
     # 取得个股的非复权收盘价
     df_secu_quote = Utils.get_secu_daily_mkt(code,
                                              start=calc_date,
                                              fq=False,
                                              range_lookup=True)
     if df_secu_quote is None:
         return None
     secu_close = df_secu_quote['close']
     # 取得个股最新的A股总股本数据
     cap_struct = Utils.get_cap_struct(code, calc_date)
     if cap_struct is None:
         return None
     total_share = cap_struct.total - cap_struct.liquid_b - cap_struct.liquid_h
     # 计算总市值的自然对数值
     lncap = np.log(secu_close * total_share)
     liquid_cap = secu_close * cap_struct.liquid_a
     return pd.Series([Utils.code_to_symbol(code), lncap, liquid_cap],
                      index=['code', 'lncap', 'liquid_cap'])
Exemple #4
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股的LIQUIDITY因子载荷
        Parameters:
        --------
        :param code: str
            个股代码, 如SH600000, 600000
        :param calc_date: datetime-like, str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的LIQUIDILITY因子载荷
            0. code
            1. stom 月度换手率
            2. stoq 季度换手率
            3. stoa 年度换手率
            4. liquidity
            如果就是按失败, 返回None
        """
        # 读取个股过去252个交易日的日行情数据(非复权)
        stom_days = risk_ct.LIQUID_CT.stom_days
        stoq_months = risk_ct.LIQUID_CT.stoq_months
        stoa_months = risk_ct.LIQUID_CT.stoa_months
        df_mkt_data = Utils.get_secu_daily_mkt(code,
                                               end=calc_date,
                                               ndays=stoa_months * stom_days,
                                               fq=False)
        if df_mkt_data is None or df_mkt_data.empty:
            return None
        # stom
        if len(df_mkt_data) >= stom_days:
            stom = math.log(df_mkt_data.iloc[-stom_days:]['turnover1'].sum())
        else:
            stom = math.log(df_mkt_data['turnover1'].sum())
        # stoq
        stoq_days = stom_days * stoq_months
        if len(df_mkt_data) >= stoq_days:
            stoq = math.log(df_mkt_data.iloc[-stoq_days:]['turnover1'].sum() /
                            stoq_months)
        else:
            stoq = math.log(df_mkt_data['turnover1'].sum() / stoq_months)
        # stoa
        stoa = math.log(df_mkt_data['turnover1'].sum() / stoa_months)
        # liquidity = 0.35*stom + 0.35*stoq + 0.3*stoa
        stom_weight = risk_ct.LIQUID_CT.stom_weight
        stoq_weight = risk_ct.LIQUID_CT.stoq_weight
        stoa_weight = risk_ct.LIQUID_CT.stoa_weight
        liquidity = stom_weight * stom + stoq_weight * stoq + stoa_weight * stoa

        return pd.Series(
            [Utils.code_to_symbol(code), stom, stoq, stoa, liquidity],
            index=['code', 'stom', 'stoq', 'stoa', 'liquidity'])
Exemple #5
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股筹码分布数据
     Parameters:
     --------
     :param code: str
         个股代码, 如600000或SH600000
     :param calc_date: datetime-like, str
         计算日期, 格式YYYY-MM-DD
     :return: tuple(code, close, cyq_data)
     --------
         1. code
         2. close: float
         个股在calc_date的收盘价
         3. cyq_data: pd.Series
         个股从IPO开始、至calc_date为止的筹码分布数据
         Series的index为筹码价格, values为对应每个筹码价格的持仓比例
         若计算失败, 返回None
     """
     # 读取个股IPO数据
     ipo_data = Utils.get_ipo_info(code)
     if ipo_data is None:
         return None
     if ipo_data['发行价格'][:-1] == '--':
         return None
     ipo_price = float(ipo_data['发行价格'][:-1])
     # 读取个股上市以来的日复权行情数据
     mkt_data = Utils.get_secu_daily_mkt(code, end=calc_date, fq=True)
     secu_close = mkt_data.iloc[-1]['close']
     # 计算每天的均价
     mkt_data['vwap'] = np.around(
         mkt_data['amount'] / mkt_data['vol'] * mkt_data['factor'], 2)
     mkt_data.dropna(axis=0, how='any', inplace=True)
     # 行情数据按日期降序排列
     mkt_data.sort_values(by='date', ascending=False, inplace=True)
     mkt_data.reset_index(drop=True, inplace=True)
     # 计算筹码分布
     cyq_data = mkt_data[['vwap', 'turnover1']]
     cyq_data = cyq_data.append(Series([ipo_price, 0],
                                       index=['vwap', 'turnover1']),
                                ignore_index=True)
     cyq_data['minusTR'] = 1 - cyq_data['turnover1']
     cyq_data['cumprod_TR'] = cyq_data['minusTR'].cumprod().shift(1)
     cyq_data.loc[0, 'cumprod_TR'] = 1.
     cyq_data['cyq'] = cyq_data['turnover1'] * cyq_data['cumprod_TR']
     secu_cyq = cyq_data['cyq'].groupby(cyq_data['vwap']).sum()
     # 如果筹码价格数量小于30个, 返回None
     if len(secu_cyq) < 30:
         return None
     secu_cyq = secu_cyq[secu_cyq.values > 0.00001]
     return (Utils.code_to_symbol(code), secu_close, secu_cyq)
Exemple #6
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股的价值因子,包含ep_ttm, bp_lr, ocf_ttm
     Parameters:
     --------
     :param code: str
         个股代码:如600000或SH600000
     :param calc_date: datetime-like or str
         计算日期,格式YYYY-MM-DD, YYYYMMDD
     :return: pd.Series
     --------
         价值类因子值
         0. ep_ttm: TTM净利润/总市值
         1. bp_lr: 净资产(最新财报)/总市值
         2. ocf_ttm: TTM经营性现金流/总市值
         若计算失败,返回None
     """
     code = Utils.code_to_symbol(code)
     calc_date = Utils.to_date(calc_date)
     # 读取TTM财务数据
     ttm_fin_data = Utils.get_ttm_fin_basic_data(code, calc_date)
     if ttm_fin_data is None:
         return None
     # 读取最新财报数据
     report_date = Utils.get_fin_report_date(calc_date)
     fin_basic_data = Utils.get_fin_basic_data(code, report_date)
     if fin_basic_data is None:
         return None
     # 计算总市值
     mkt_daily = Utils.get_secu_daily_mkt(code,
                                          calc_date,
                                          fq=False,
                                          range_lookup=True)
     if mkt_daily.shape[0] == 0:
         return None
     cap_struct = Utils.get_cap_struct(code, calc_date)
     if cap_struct is None:
         return None
     total_cap = cap_struct.total - cap_struct.liquid_b - cap_struct.liquid_h
     total_mkt_cap = total_cap * mkt_daily.close
     # 计算价值类因子
     ep_ttm = ttm_fin_data[
         'NetProfit'] * util_ct.FIN_DATA_AMOUNT_UNIT / total_mkt_cap
     ocf_ttm = ttm_fin_data[
         'NetOperateCashFlow'] * util_ct.FIN_DATA_AMOUNT_UNIT / total_mkt_cap
     bp_lr = fin_basic_data[
         'ShareHolderEquity'] * util_ct.FIN_DATA_AMOUNT_UNIT / total_mkt_cap
     return Series([round(ep_ttm, 6),
                    round(bp_lr, 6),
                    round(ocf_ttm, 6)],
                   index=['ep_ttm', 'bp_lr', 'ocf_ttm'])
Exemple #7
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股RSTR因子载荷
     Parameters:
     --------
     :param code: str
         个股代码, 如SH600000, 600000
     :param calc_date: datetime-like, str
         计算日期, 格式: YYYY-MM-DD
     :return: pd.Series
     --------
         个股的RSTR因子载荷
         0. code
         1. rstr
         如果计算失败, 返回None
     """
     # 取得个股复权行情数据
     df_secu_quote = Utils.get_secu_daily_mkt(
         code,
         end=calc_date,
         ndays=risk_ct.RSTR_CT.trailing_start + 1,
         fq=True)
     if df_secu_quote is None:
         return None
     if len(df_secu_quote) < risk_ct.RSTR_CT.half_life * 2:
         return None
     df_secu_quote = df_secu_quote.head(
         len(df_secu_quote) - risk_ct.RSTR_CT.trailing_end)
     df_secu_quote.reset_index(drop=True, inplace=True)
     # 计算个股的日对数收益率
     arr_secu_close = np.array(df_secu_quote.iloc[1:]['close'])
     arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close'])
     arr_secu_daily_ret = np.log(arr_secu_close / arr_secu_preclose)
     # 计算权重(指数移动加权平均)
     T = len(arr_secu_daily_ret)
     time_spans = sorted(range(T), reverse=True)
     alpha = 1 - np.exp(np.log(0.5) / risk_ct.RSTR_CT.half_life)
     x = [1 - alpha] * T
     y = [alpha] * (T - 1)
     y.insert(0, 1)
     weights = np.float_power(x, time_spans) * y
     # 计算RSTR
     rstr = np.sum(arr_secu_daily_ret * weights)
     return pd.Series([Utils.code_to_symbol(code), rstr],
                      index=['code', 'rstr'])
Exemple #8
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股DASTD因子载荷
     Parameters:
     --------
     :param code: str
         个股代码, 如SH600000, 600000
     :param calc_date: datetime-like, str
         计算日期, 格式: YYYY-MM-DD
     :return: pd.Series
     --------
         个股的DASTD因子载荷
         0. code
         1. dastr
         如果计算失败, 返回None
     """
     # 取得个股复权行情数据
     df_secu_quote = Utils.get_secu_daily_mkt(
         code, end=calc_date, ndays=risk_ct.DASTD_CT.trailing + 1, fq=True)
     if df_secu_quote is None:
         return None
     # 如果行情数据长度小于trailing的一半(即126个交易日),那么返回None
     if len(df_secu_quote) < int(risk_ct.DASTD_CT.trailing / 2):
         return None
     df_secu_quote.reset_index(drop=True, inplace=True)
     # 计算个股的日对数收益率序列及收益率均值
     arr_secu_close = np.array(df_secu_quote.iloc[1:]['close'])
     arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close'])
     arr_secu_daily_ret = np.log(arr_secu_close / arr_secu_preclose)
     avg_daily_ret = np.mean(arr_secu_daily_ret)
     # 计算权重(指数移动加权平均)
     T = len(arr_secu_daily_ret)
     time_spans = sorted(range(T), reverse=True)
     alpha = 1 - np.exp(np.log(0.5) / risk_ct.DASTD_CT.half_life)
     x = [1 - alpha] * T
     y = [alpha] * (T - 1)
     y.insert(0, 1)
     weights = np.float_power(x, time_spans) * y
     # 计算个股DASTD因子值
     dastd = np.sqrt(
         np.sum((arr_secu_daily_ret - avg_daily_ret)**2 * weights))
     return pd.Series([Utils.code_to_symbol(code), dastd],
                      index=['code', 'dastd'])
Exemple #9
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股的规模因子值
     Parameters:
     --------
     :param code: str
         个股代码,如600000、SH600000
     :param calc_date: datetime-like, str
         规模因子计算日期,格式YYYY-MM-DD或YYYYMMDD
     :return: pd.Series
     --------
         个股规模因子值,各个index对应的含义如下:
         0. LnTotalMktCap: 总市值对数
         1. LnLiquidMktCap: 流通市值对数
         若计算失败,返回None
     """
     # 取得证券截止指定日期最新的非复权行情数据
     code = Utils.code_to_symbol(code)
     calc_date = Utils.to_date(calc_date)
     mkt_daily = Utils.get_secu_daily_mkt(code,
                                          calc_date,
                                          fq=False,
                                          range_lookup=True)
     if mkt_daily.shape[0] == 0:
         return None
     # 取得证券截止指定日期前最新的股本结构数据
     cap_struct = Utils.get_cap_struct(code, calc_date)
     if cap_struct is None:
         return None
     # 计算证券的规模因子
     scale_factor = Series()
     total_cap = cap_struct.total - cap_struct.liquid_b - cap_struct.liquid_h
     scale_factor['LnTotalMktCap'] = math.log(total_cap * mkt_daily.close)
     scale_factor['LnLiquidMktCap'] = math.log(cap_struct.liquid_a *
                                               mkt_daily.close)
     return scale_factor
Exemple #10
0
def smartq_backtest(start, end):
    """
    SmartQ因子的历史回测
    Parameters:
    --------
    :param start: datetime-like, str
        回测开始日期,格式:YYYY-MM-DD,开始日期应该为月初
    :param end: datetime-like, str
        回测结束日期,格式:YYYY-MM-DD
    :return:
    """
    # 取得开始结束日期间的交易日序列
    trading_days = Utils.get_trading_days(start, end)
    # 读取截止开始日期前最新的组合回测数据
    prev_trading_day = Utils.get_prev_n_day(trading_days.iloc[0], 1)
    backtest_path = os.path.join(SETTINGS.FACTOR_DB_PATH,
                                 alphafactor_ct.SMARTMONEY_CT.backtest_path)
    factor_data, port_nav = Utils.get_backtest_data(backtest_path,
                                                    trading_days.iloc[0])
    # factor_data = None  # 记录每次调仓时最新入选个股的SmartQ因子信息,pd.DataFrame<date,factorvalue,id,buprice>
    if port_nav is None:
        port_nav = DataFrame({
            'date': [prev_trading_day.strftime('%Y-%m-%d')],
            'nav': [1.0]
        })
    # 遍历交易日,如果是月初,则读取SmartQ因子载荷值,进行调仓;如果不是月初,则进行组合估值
    t = 0  # 记录调仓次数
    for trading_day in trading_days:
        if factor_data is None:
            nav = port_nav[port_nav.date == prev_trading_day.strftime(
                '%Y-%m-%d')].iloc[0].nav
        else:
            nav = port_nav[port_nav.date ==
                           factor_data.iloc[0].date].iloc[0].nav
        interval_ret = 0.0
        # 月初进行调仓
        if Utils.is_month_start(trading_day):
            logging.info('[%s] 月初调仓.' %
                         Utils.datetimelike_to_str(trading_day, True))
            # 调仓前,先计算组合按均价卖出原先组合个股在当天的估值
            if factor_data is not None:
                for ind, factor_info in factor_data.iterrows():
                    daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                         trading_day,
                                                         fq=True,
                                                         range_lookup=True)
                    if daily_mkt.date == trading_day.strftime('%Y-%m-%d'):
                        vwap_price = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor
                    else:
                        vwap_price = daily_mkt.close
                    interval_ret += vwap_price / factor_info.buyprice - 1.0
                interval_ret /= float(len(factor_data))
                nav *= (1.0 + interval_ret)
            # 读取factor_data
            factor_data = Utils.read_factor_loading(
                SmartMoney.get_db_file(),
                Utils.datetimelike_to_str(prev_trading_day, False))
            # 遍历factor_data, 计算每个个股过去20天的涨跌幅,并剔除在调仓日没有正常交易(如停牌)及涨停的个股
            ind_to_be_deleted = []
            factor_data['ret20'] = np.zeros(len(factor_data))
            for ind, factor_info in factor_data.iterrows():
                trading_status = Utils.trading_status(factor_info.id,
                                                      trading_day)
                if trading_status == SecuTradingStatus.Suspend or trading_status == SecuTradingStatus.LimitUp:
                    ind_to_be_deleted.append(ind)
                fret20 = Utils.calc_interval_ret(factor_info.id,
                                                 end=prev_trading_day,
                                                 ndays=20)
                if fret20 is None:
                    if ind not in ind_to_be_deleted:
                        ind_to_be_deleted.append(ind)
                else:
                    factor_data.loc[ind, 'ret20'] = fret20
            factor_data = factor_data.drop(ind_to_be_deleted, axis=0)
            # 对factor_data过去20天涨跌幅降序排列,剔除涨幅最大的20%个股
            k = int(factor_data.shape[0] * 0.2)
            factor_data = factor_data.sort_values(by='ret20',
                                                  ascending=False).iloc[k:]
            del factor_data['ret20']  # 删除ret20列
            # 对factor_data按因子值升序排列,取前10%个股
            factor_data = factor_data.sort_values(by='factorvalue',
                                                  ascending=True)
            k = int(factor_data.shape[0] * 0.1)
            factor_data = factor_data.iloc[:k]
            # 遍历factor_data,添加买入价格,并估值计算当天调仓后的组合收益
            factor_data['buyprice'] = 0.0
            interval_ret = 0.0
            for ind, factor_info in factor_data.iterrows():
                daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                     trading_day,
                                                     fq=True,
                                                     range_lookup=False)
                assert len(daily_mkt) > 0
                factor_data.loc[
                    ind,
                    'buyprice'] = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor
                interval_ret += daily_mkt.close / factor_data.loc[
                    ind, 'buyprice'] - 1.0
            interval_ret /= float(factor_data.shape[0])
            nav *= (1.0 + interval_ret)
            # 保存factor_data
            port_data_path = os.path.join(
                SETTINGS.FACTOR_DB_PATH,
                alphafactor_ct.SMARTMONEY_CT.backtest_path,
                'port_data_%s.csv' %
                Utils.datetimelike_to_str(trading_day, False))
            factor_data.to_csv(port_data_path, index=False)
            t += 1
            if t % 6 == 0:
                logging.info('Suspended for 300s.')
                time.sleep(300)
        else:
            # 非调仓日,对组合进行估值
            logging.info('[%s] 月中估值.' %
                         Utils.datetimelike_to_str(trading_day, True))
            if factor_data is not None:
                for ind, factor_info in factor_data.iterrows():
                    daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                         trading_day,
                                                         fq=True,
                                                         range_lookup=True)
                    interval_ret += daily_mkt.close / factor_info.buyprice - 1.0
                interval_ret /= float(factor_data.shape[0])
                nav *= (1.0 + interval_ret)
        # 添加nav
        port_nav = port_nav.append(Series({
            'date':
            Utils.datetimelike_to_str(trading_day, True),
            'nav':
            nav
        }),
                                   ignore_index=True)
        # 设置prev_trading_day
        prev_trading_day = trading_day
    # 保存port_nav
    port_nav_path = os.path.join(SETTINGS.FACTOR_DB_PATH,
                                 alphafactor_ct.SMARTMONEY_CT.backtest_path,
                                 'port_nav.csv')
    port_nav.to_csv(port_nav_path, index=False)
Exemple #11
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        Parameter:
        --------
        :param code: str
            个股代码, 如SH600000, 600000
        :param calc_date: datetime-like, str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的CMRA因子载荷
            0. code
            1. cmra
            如果计算失败, 返回None
        """
        # 取得个股日复权行情数据
        # df_secu_quote = Utils.get_secu_daily_mkt(code, end=calc_date,ndays=risk_ct.CMRA_CT.trailing*risk_ct.CMRA_CT.days_scale+1, fq=True)
        # if df_secu_quote is None:
        #     return None
        # if len(df_secu_quote) < risk_ct.CMRA_CT.listed_days:
        #     return None
        # df_secu_quote.reset_index(drop=True, inplace=True)
        # 计算个股的日对数收益率序列
        # arr_secu_close = np.array(df_secu_quote.iloc[1:]['close'])
        # arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close'])
        # arr_secu_daily_ret = np.log(arr_secu_close / arr_secu_preclose)
        # 每个月计算累积收益率
        # z = []
        # for t in range(1, risk_ct.CMRA_CT.trailing+1):
        #     k = t * risk_ct.CMRA_CT.days_scale - 1
        #     if k > len(arr_secu_daily_ret) - 1:
        #         k = len(arr_secu_daily_ret) - 1
        #         z.append(np.sum(arr_secu_daily_ret[:k]))
        #         break
        #     else:
        #         z.append(np.sum(arr_secu_daily_ret[:k]))

        # 计算每个月的个股价格变化率(1+r)
        # z = []
        # for t in range(1, risk_ct.CMRA_CT.trailing+1):
        #     k = t * risk_ct.CMRA_CT.days_scale
        #     if k > len(df_secu_quote)-1:
        #         k = len(df_secu_quote)-1
        #         z.append(df_secu_quote.iloc[k]['close']/df_secu_quote.iloc[0]['close'])
        #         break
        #     else:
        #         z.append(df_secu_quote.iloc[k]['close']/df_secu_quote.iloc[0]['close'])
        # cmra = np.log(max(z)) - np.log(min(z))



        # 取得交易日序列
        trading_days = Utils.get_trading_days(end=calc_date, ndays=risk_ct.CMRA_CT.trailing*risk_ct.CMRA_CT.days_scale+1)
        trading_days = [day.strftime('%Y-%m-%d') for day in trading_days]
        # 取得个股复权行情数据
        df_secu_quote = Utils.get_secu_daily_mkt(code, end=calc_date, fq=True)
        # 提取相应交易日的个股复权行情数据
        df_secu_quote = df_secu_quote[df_secu_quote['date'].isin(trading_days)]
        df_secu_quote.reset_index(drop=True, inplace=True)
        # 计算个股每个月的个股价格变化率
        z = []
        if len(df_secu_quote) < int(risk_ct.CMRA_CT.trailing*risk_ct.CMRA_CT.days_scale/2):
            # 如果提取的个股复权行情长度小于所需时间长度的一半(126个交易日), 返回None
            return None
        else:
            prev_trading_day = df_secu_quote.iloc[0]['date']
            for t in range(1, risk_ct.CMRA_CT.trailing+1):
                k = t * risk_ct.CMRA_CT.days_scale
                trading_day = trading_days[k]
                if trading_day < df_secu_quote.iloc[0]['date']:
                    continue
                # try:
                secu_trading_day = df_secu_quote[df_secu_quote['date'] <= trading_day].iloc[-1]['date']
                if secu_trading_day <= prev_trading_day:
                    continue
                else:
                    ret = df_secu_quote[df_secu_quote['date']==secu_trading_day].iloc[0]['close']/df_secu_quote.iloc[0]['close']
                    z.append(ret)
                    prev_trading_day = secu_trading_day
                # except Exception as e:
                #     print(e)
            cmra = math.log(max(z)) - math.log(min(z))
        return pd.Series([Utils.code_to_symbol(code), cmra], index=['code', 'cmra'])
Exemple #12
0
def _get_prevN_years_finbasicdata(date, code, years):
    """
    读取过去n年的主要财务指标数据, 其中每股数据会经过复权因子调整
    :param date: datetime-like
        日期
    :param code: str
        个股代码, 格式: SH600000
    :param years: int
        返回的报告期年数
    :return: list of pd.Series
    """
    year = date.year
    month = date.month
    if month in (1, 2, 3, 4):
        # report_dates = [datetime.datetime(year-5, 12, 31),
        #                 datetime.datetime(year-4, 12, 31),
        #                 datetime.datetime(year-3, 12, 31),
        #                 datetime.datetime(year-2, 12, 31)]
        report_dates = [
            datetime.datetime(year - n, 12, 31) for n in range(years, 1, -1)
        ]
        is_ttm = True
    elif month in (5, 6, 7, 8):
        # report_dates = [datetime.datetime(year-5, 12, 31),
        #                 datetime.datetime(year-4, 12, 31),
        #                 datetime.datetime(year-3, 12, 31),
        #                 datetime.datetime(year-2, 12, 31),
        #                 datetime.datetime(year-1, 12, 31)]
        report_dates = [
            datetime.datetime(year - n, 12, 31) for n in range(years, 0, -1)
        ]
        is_ttm = False
    else:
        # report_dates = [datetime.datetime(year-4, 12, 31),
        #                 datetime.datetime(year-3, 12, 31),
        #                 datetime.datetime(year-2, 12, 31),
        #                 datetime.datetime(year-1, 12, 31)]
        report_dates = [
            datetime.datetime(year - n, 12, 31)
            for n in range(years - 1, 0, -1)
        ]
        is_ttm = True

    df_mkt_data = Utils.get_secu_daily_mkt(code, end=date,
                                           fq=True)  # 个股复权行情, 用于调整每股数据

    prevN_years_finbasicdata = []
    for report_date in report_dates:
        fin_basic_data = Utils.get_fin_basic_data(code,
                                                  report_date,
                                                  date_type='report_date')
        if fin_basic_data is None:
            return None
        fin_basic_data = fin_basic_data.to_dict()
        df_extract_mkt = df_mkt_data[
            df_mkt_data.date <= report_date.strftime('%Y-%m-%d')]
        if not df_extract_mkt.empty:
            fq_factor = df_extract_mkt.iloc[-1]['factor']
            # 调整每股数据
            fin_basic_data[
                'BasicEPS_adj'] = fin_basic_data['BasicEPS'] * fq_factor
            fin_basic_data['UnitNetAsset_adj'] = fin_basic_data[
                'UnitNetAsset'] * fq_factor
            fin_basic_data['UnitNetOperateCashFlow_adj'] = fin_basic_data[
                'UnitNetOperateCashFlow'] * fq_factor
            # 计算调整后的主营业务收入
            fin_basic_data['MainOperateRevenue_adj'] = fin_basic_data[
                'MainOperateRevenue'] / fq_factor
        else:
            fin_basic_data['BasicEPS_adj'] = fin_basic_data['BasicEPS']
            fin_basic_data['UnitNetAsset_adj'] = fin_basic_data['UnitNetAsset']
            fin_basic_data['UnitNetOperateCashFlow_adj'] = fin_basic_data[
                'UnitNetOperateCashFlow']
            fin_basic_data['MainOperateRevenue_adj'] = fin_basic_data[
                'MainOperateRevenue']
        prevN_years_finbasicdata.append(fin_basic_data)
    if is_ttm:
        ttm_fin_basic_data = Utils.get_ttm_fin_basic_data(code, date)
        if ttm_fin_basic_data is None:
            return None
        ttm_fin_basic_data = ttm_fin_basic_data.to_dict()
        df_extract_mkt = df_mkt_data[
            df_mkt_data.date <= ttm_fin_basic_data['ReportDate'].strftime(
                '%Y-%m-%d')]
        if not df_extract_mkt.empty:
            fq_factor = df_extract_mkt.iloc[-1]['factor']
            # 调整每股数据
            ttm_fin_basic_data[
                'BasicEPS_adj'] = ttm_fin_basic_data['BasicEPS'] * fq_factor
            # 计算调整后的主营业务收入
            ttm_fin_basic_data['MainOperateRevenue_adj'] = ttm_fin_basic_data[
                'MainOperateRevenue'] / fq_factor
        else:
            ttm_fin_basic_data['BasicEPS_adj'] = ttm_fin_basic_data['BasicEPS']
            ttm_fin_basic_data['MainOperateRevenue_adj'] = ttm_fin_basic_data[
                'MainOperateRevenue']
        prevN_years_finbasicdata.append(ttm_fin_basic_data)
    return prevN_years_finbasicdata
Exemple #13
0
 def _calc_factor_loading1(cls, code, calc_date):
     """
     计算指定日期、指定个股筹码分布的四个代理变量以及下一期(下个月)的收益率
     Parameters
     -------
     :param code: str
         个股代码, 如600000或SH600000
     :param calc_date: datetime-like, str
         计算日期, 格式YYYY-MM-DD
     :return: pd.Series
     --------
         个股筹码分布的额四个代理变量
         0. arc: 筹码分布的均值
         1. vrc: 筹码分布的方差
         2. src: 筹码分布的偏度
         3. krc: 筹码分布的峰度
         4. next_ret: 下一期的收益率
         若计算失败, 返回None
     """
     # 读取过去__days天的个股复权日K线行情数据
     df_mkt = Utils.get_secu_daily_mkt(code,
                                       end=calc_date,
                                       ndays=cls.__days,
                                       fq=True,
                                       range_lookup=True)
     if df_mkt is None:
         return None
     if len(df_mkt) < 20:
         return None
     # 按日期降序排列行情数据
     df_mkt.sort_values(by='date', ascending=False, inplace=True)
     # 遍历行情数据, 计算RC(相对资本收益)向量和ATR(调整换手率)向量
     arr_rc = np.zeros(len(df_mkt))
     arr_atr = np.zeros(len(df_mkt))
     p_c = df_mkt.iloc[0]['close']  # 截止日期的收盘价
     for j in range(len(df_mkt)):
         p_avg = df_mkt.iloc[j]['amount'] / df_mkt.iloc[j][
             'vol'] * df_mkt.iloc[j]['factor']
         arr_rc[j] = (p_c - p_avg) / p_c
         tr_j = df_mkt.iloc[j]['turnover1']
         if j == 0:
             arr_atr[j] = tr_j
         else:
             arr_atr[j] = arr_atr[j - 1] / pre_tr * tr_j * (1. - pre_tr)
         pre_tr = tr_j
     arc = np.average(arr_rc, weights=arr_atr)
     if np.isnan(arc):
         return None
     rc_dev = arr_rc - arc
     n = len(df_mkt)
     vrc = n / (n - 1.) * np.sum(
         arr_atr * rc_dev * rc_dev) / np.sum(arr_atr)
     if np.isnan(vrc):
         return None
     src = n / (n - 1.) * np.sum(arr_atr * np.float_power(
         rc_dev, 3)) / np.sum(arr_atr) / np.float_power(vrc, 1.5)
     if np.isnan(src):
         return None
     krc = n / (n - 1.) * np.sum(arr_atr * np.float_power(
         rc_dev, 4)) / np.sum(arr_atr) / np.float_power(vrc, 2)
     if np.isnan(krc):
         return None
     # 计算个股下一期的收益率
     # next_date = calc_date + datetime.timedelta(days=1)
     next_date = Utils.get_trading_days(start=calc_date, ndays=2)[1]
     wday, month_range = calendar.monthrange(next_date.year,
                                             next_date.month)
     date_end = datetime.datetime(next_date.year, next_date.month,
                                  month_range)
     next_ret = Utils.calc_interval_ret(code, start=next_date, end=date_end)
     if next_ret is None:
         return None
     else:
         return pd.Series([arc, vrc, src, krc, next_ret],
                          index=['arc', 'vrc', 'src', 'krc', 'next_ret'])
Exemple #14
0
def apm_backtest(start, end, pure_factor=False):
    """
    APM因子的历史回测
    Parameters:
    --------
    :param start: datetime-like, str
        回测开始日期,格式:YYYY-MM-DD,开始日期应该为月初的前一个交易日,即月末交易日
    :param end: datetime-like, str
        回测结束日期,格式:YYYY-MM-DD
    :param pure_factor: bool, 默认False
        是否是对纯净因子做回测
    :return:
    """
    # 取得开始结束日期间的交易日数据
    trading_days = Utils.get_trading_days(start, end)
    # 读取截止开始日期前最新的组合回测数据
    prev_trading_day = Utils.get_prev_n_day(trading_days.iloc[0], 1)
    if pure_factor:
        backtest_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                     factor_ct.APM_CT.pure_backtest_path)
    else:
        backtest_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                     factor_ct.APM_CT.backtest_path)
    factor_data, port_nav = Utils.get_backtest_data(backtest_path,
                                                    trading_days.iloc[0])
    # factor_data = None  # 记录每次调仓时最新入选个股的APM因子信息,pd.DataFrame<date,factorvalue,id,buyprice>
    if port_nav is None:
        port_nav = DataFrame({
            'date': [prev_trading_day.strftime('%Y-%m-%d')],
            'nav': [1.0]
        })
    # 遍历交易日,如果是月初,则读取APM因子载荷值;如果不是月初,则进行组合估值
    for trading_day in trading_days:
        if factor_data is None:
            nav = port_nav[port_nav.date == prev_trading_day.strftime(
                '%Y-%m-%d')].iloc[0].nav
        else:
            nav = port_nav[port_nav.date ==
                           factor_data.iloc[0].date].iloc[0].nav
        interval_ret = 0.0
        # 月初进行调仓
        if Utils.is_month_start(trading_day):
            logging.info('[%s] 月初调仓.' %
                         Utils.datetimelike_to_str(trading_day, True))
            # 调仓前,先估值计算按均价卖出原先组合个股在当天的估值
            if factor_data is not None:
                for ind, factor_info in factor_data.iterrows():
                    daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                         trading_day,
                                                         fq=True,
                                                         range_lookup=True)
                    if daily_mkt.date == trading_day.strftime('%Y-%m-%d'):
                        vwap_price = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor
                    else:
                        vwap_price = daily_mkt.close
                    interval_ret += vwap_price / factor_info.buyprice - 1.0
                interval_ret /= float(len(factor_data))
                nav *= (1.0 + interval_ret)
            # 读取factor_data
            if pure_factor:
                factor_data_path = os.path.join(
                    factor_ct.FACTOR_DB.db_path,
                    factor_ct.APM_CT.pure_apm_db_file)
            else:
                factor_data_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                                factor_ct.APM_CT.apm_db_file)
            factor_data = Utils.read_factor_loading(
                factor_data_path,
                Utils.datetimelike_to_str(prev_trading_day, False))
            # 遍历factor_data,剔除在调仓日没有正常交易(如停牌)、及涨停的个股
            ind_to_be_delted = []
            for ind, factor_info in factor_data.iterrows():
                trading_status = Utils.trading_status(factor_info.id,
                                                      trading_day)
                if trading_status == SecuTradingStatus.Suspend or trading_status == SecuTradingStatus.LimitUp:
                    ind_to_be_delted.append(ind)
            factor_data = factor_data.drop(ind_to_be_delted, axis=0)
            # 对factor_data按因子值降序排列,取前10%个股
            factor_data = factor_data.sort_values(by='factorvalue',
                                                  ascending=False)
            factor_data = factor_data.iloc[:int(len(factor_data) * 0.1)]
            # 遍历factor_data,添加买入价格,并估值计算当天调仓后的组合收益
            factor_data['buyprice'] = 0.0
            interval_ret = 0.0
            for ind, factor_info in factor_data.iterrows():
                daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                     trading_day,
                                                     fq=True,
                                                     range_lookup=False)
                assert len(daily_mkt) > 0
                factor_data.loc[
                    ind,
                    'buyprice'] = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor
                interval_ret += daily_mkt.close / factor_data.loc[
                    ind, 'buyprice'] - 1.0
            interval_ret /= float(len(factor_data))
            nav *= (1.0 + interval_ret)
            # 保存factor_data
            if pure_factor:
                port_data_path = os.path.join(
                    factor_ct.FACTOR_DB.db_path,
                    factor_ct.APM_CT.pure_backtest_path, 'port_data_%s.csv' %
                    Utils.datetimelike_to_str(trading_day, False))
            else:
                port_data_path = os.path.join(
                    factor_ct.FACTOR_DB.db_path,
                    factor_ct.APM_CT.backtest_path, 'port_data_%s.csv' %
                    Utils.datetimelike_to_str(trading_day, False))
            factor_data.to_csv(port_data_path, index=False)
        else:
            # 非调仓日,对组合进行估值
            logging.info('[%s] 月中估值.' %
                         Utils.datetimelike_to_str(trading_day, True))
            if factor_data is not None:
                for ind, factor_info in factor_data.iterrows():
                    daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                         trading_day,
                                                         fq=True,
                                                         range_lookup=True)
                    interval_ret += daily_mkt.close / factor_info.buyprice - 1.0
                interval_ret /= float(len(factor_data))
                nav *= (1.0 + interval_ret)
        # 添加nav
        port_nav = port_nav.append(Series({
            'date':
            trading_day.strftime('%Y-%m-%d'),
            'nav':
            nav
        }),
                                   ignore_index=True)
        # 设置prev_trading_day
        prev_trading_day = trading_day
    # 保存port_nav
    if pure_factor:
        port_nav_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                     factor_ct.APM_CT.pure_backtest_path,
                                     'port_nav.csv')
    else:
        port_nav_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                     factor_ct.APM_CT.backtest_path,
                                     'port_nav.csv')
    port_nav.to_csv(port_nav_path, index=False)
Exemple #15
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股BETA因子载荷
     Parameters:
     --------
     :param code: str
         个股代码, 如600000或SH600000
     :param calc_date: datetime-like, str
         计算日期, 格式YYYY-MM-DD
     :return: pd.Series
     --------
         个股的BETA因子和HSIGMA因子载荷
         0. code: 个股代码
         1. beta: BETA因子载荷
         2. hsigma: HSIGMA因子载荷
         若计算失败, 返回None
     """
     # 取得个股复权行情数据
     df_secu_quote = Utils.get_secu_daily_mkt(code, end=calc_date, ndays=risk_ct.DBETA_CT.trailing+1, fq=True)
     if df_secu_quote is None:
         return None
     # 如果行情数据长度小于半年(126个交易日), 那么返回None
     if len(df_secu_quote) < 126:
         return None
     # 如果读取的行情数据起始日距离计算日期大于trailing的3倍, 返回None
     s = Utils.to_date(calc_date) - datetime.timedelta(days=risk_ct.DBETA_CT.trailing*3)
     if Utils.to_date(df_secu_quote.iloc[0]['date']) < s:
         return None
     df_secu_quote.reset_index(drop=True, inplace=True)
     # 取得基准复权行情数据
     benchmark_code = risk_ct.DBETA_CT.benchmark
     df_benchmark_quote = Utils.get_secu_daily_mkt(benchmark_code, end=calc_date, fq=True)
     if df_benchmark_quote is None:
         return None
     df_benchmark_quote = df_benchmark_quote[df_benchmark_quote['date'].isin(list(df_secu_quote['date']))]
     if len(df_benchmark_quote) != len(df_secu_quote):
         raise ValueError("[beta计算]基准和个股的历史行情长度不一致.")
     df_benchmark_quote.reset_index(drop=True, inplace=True)
     # 计算个股和基准的日收益率序列
     arr_secu_close = np.array(df_secu_quote.iloc[1:]['close'])
     arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close'])
     arr_secu_daily_ret = arr_secu_close / arr_secu_preclose - 1.
     arr_benchmark_close = np.array(df_benchmark_quote.iloc[1:]['close'])
     arr_benchmark_preclose = np.array(df_benchmark_quote.shift(1).iloc[1:]['close'])
     arr_benchmark_daily_ret = arr_benchmark_close / arr_benchmark_preclose - 1.
     # 计算权重(指数移动加权平均)
     T = len(arr_benchmark_daily_ret)
     # time_spans = sorted(range(T), reverse=True)
     # alpha = 1 - np.exp(np.log(0.5)/risk_ct.DBETA_CT.half_life)
     # x = [1-alpha] * T
     # y = [alpha] * (T-1)
     # y.insert(0, 1)
     # weights = np.float_power(x, time_spans) * y
     weights = Algo.ewma_weight(T, risk_ct.DBETA_CT.half_life)
     # 采用加权最小二乘法计算Beta因子载荷及hsigma
     arr_benchmark_daily_ret = sm.add_constant(arr_benchmark_daily_ret)
     cap_model = sm.WLS(arr_secu_daily_ret, arr_benchmark_daily_ret, weights=weights)
     result = cap_model.fit()
     beta = result.params[1]
     hsigma = np.sqrt(result.mse_resid)
     return pd.Series([Utils.code_to_symbol(code), beta, hsigma], index=['code', 'beta', 'hsigma'])