def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股ETOP因子载荷 Parameters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的ETOP因子载荷 0. code 1. etop 如果计算失败, 返回None """ code = Utils.code_to_symbol(code) # 读取个股的ttm净利润 ttm_fin_data = Utils.get_ttm_fin_basic_data(code, calc_date) if ttm_fin_data is None: return None ttm_netprofit = ttm_fin_data['NetProfit'] if pd.isnull(ttm_netprofit): return None # 读取个股市值 lncap_path = os.path.join(factor_ct.FACTOR_DB.db_path, risk_ct.LNCAP_CT.db_file) lncap_data = Utils.read_factor_loading(lncap_path, Utils.datetimelike_to_str(calc_date, dash=False), code) if lncap_data.empty: return None secu_cap = np.exp(lncap_data['factorvalue']) # etop = ttm净利润/市值 etop = ttm_netprofit * 10000 / secu_cap return pd.Series([code, etop], index=['code', 'etop'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股的BTOP因子载荷 Paramters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的BTOP因子载荷 0. code 1. btop 如果计算失败, 返回None """ # 读取个股的财务数据 fin_report_date = Utils.get_fin_report_date(calc_date) fin_basic_data = Utils.get_fin_basic_data(code, fin_report_date) if fin_basic_data is None: return None # 读取个股的市值因子(LNCAP) df_lncap = cls._LNCAP_Cache.get(Utils.datetimelike_to_str(calc_date, dash=False)) if df_lncap is None: lncap_path = os.path.join(factor_ct.FACTOR_DB.db_path, risk_ct.LNCAP_CT.db_file) df_lncap = Utils.read_factor_loading(lncap_path, Utils.datetimelike_to_str(calc_date, dash=False)) cls._LNCAP_Cache.set(Utils.datetimelike_to_str(calc_date, dash=False), df_lncap) secu_lncap = df_lncap[df_lncap['id'] == Utils.code_to_symbol(code)] if secu_lncap.empty: return None flncap = secu_lncap.iloc[0]['factorvalue'] # 账面市值比=净资产/市值 btop = (fin_basic_data['TotalAsset'] - fin_basic_data['TotalLiability']) * 10000 / np.exp(flncap) return pd.Series([Utils.code_to_symbol(code), btop], index=['code', 'btop'])
def _get_factorloading(factor_name, date, factor_type): """ 读取个股因子载荷数据 Parameters: -------- :param factor_name: str alpha因子名称, e.g: SmartMoney :param date: datetime-like, str 日期, e.g: YYYY-MM-DD, YYYYMMDD :param factor_type: str 因子类型: 'raw': 原始因子, 'standardized': 去极值标准化后的因子, 'orthogonalized': 正交化后的因子 :return: pd.DataFrame -------- 因子载荷向量数据 0. date: 日期 1. id: 证券代码 2. factorvalue: 因子值 """ date = Utils.datetimelike_to_str(date, dash=False) factorloading_path = os.path.join( SETTINGS.FACTOR_DB_PATH, eval('alphafactor_ct.' + factor_name.upper() + '.CT')['db_file'], factor_type, factor_name) df_factorloading = Utils.read_factor_loading(factorloading_path, date, drop_na=True) return df_factorloading
def calc_factor_loading_(cls, start_date, end_date=None, month_end=True, save=False, **kwargs): """ 计算指定日期的样本个股的因子载荷, 并保存至因子数据库 Parameters: -------- :param start_date: datetime-like, str 开始日期, 格式: YYYY-MM-DD or YYYYMMDD :param end_date: datetime-like, str 结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD :param month_end: bool, 默认为True 如果为True, 则只计算月末时点的因子载荷 :param save: bool, 默认为True 是否保存至因子数据库 :param kwargs: 'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False :return: dict 因子载荷数据 """ # 取得交易日序列 start_date = Utils.to_date(start_date) if end_date is not None: end_date = Utils.to_date(end_date) trading_days_series = Utils.get_trading_days(start=start_date, end=end_date) else: trading_days_series = Utils.get_trading_days(end=start_date, ndays=1) # 遍历交易日序列, 计算ResVolatility因子下各个成分因子的因子载荷 if 'multi_proc' not in kwargs: kwargs['multi_proc'] = False for calc_date in trading_days_series: if month_end and (not Utils.is_month_end(calc_date)): continue # 计算各成分因子的因子载荷 for com_factor in risk_ct.RESVOLATILITY_CT.component: factor = eval(com_factor + '()') factor.calc_factor_loading(start_date=calc_date, end_date=None, month_end=month_end, save=save, multi_proc=kwargs['multi_proc']) # 合成ResVolatility因子载荷 resvol_factor = pd.DataFrame() for com_factor in risk_ct.RESVOLATILITY_CT.component: factor_path = os.path.join(factor_ct.FACTOR_DB.db_path, eval('risk_ct.' + com_factor + '_CT')['db_file']) factor_loading = Utils.read_factor_loading(factor_path, Utils.datetimelike_to_str(calc_date, dash=False)) factor_loading.drop(columns='date', inplace=True) factor_loading[com_factor] = Utils.normalize_data(Utils.clean_extreme_value(np.array(factor_loading['factorvalue']).reshape((len(factor_loading), 1)))) factor_loading.drop(columns='factorvalue', inplace=True) if resvol_factor.empty: resvol_factor = factor_loading else: resvol_factor = pd.merge(left=resvol_factor, right=factor_loading, how='inner', on='id') resvol_factor.set_index('id', inplace=True) weight = pd.Series(risk_ct.RESVOLATILITY_CT.weight) resvol_factor = (resvol_factor * weight).sum(axis=1) resvol_factor.name = 'factorvalue' resvol_factor.index.name = 'id' resvol_factor = pd.DataFrame(resvol_factor) resvol_factor.reset_index(inplace=True) resvol_factor['date'] = Utils.get_trading_days(start=calc_date, ndays=2)[1] # 保存ResVolatility因子载荷 if save: Utils.factor_loading_persistent(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), resvol_factor.to_dict('list'),['date', 'id', 'factorvalue'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股MLEV因子载荷 Parameters: -------- :param code: str 个股代码, 如Sh600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的MLEV因子载荷 0. code 1. mlev 如果计算失败, 返回None """ code = Utils.code_to_symbol(code) report_date = Utils.get_fin_report_date(calc_date) # 读取个股最新财务报表摘要数据 fin_summary_data = Utils.get_fin_summary_data(code, report_date) # ld为个股长期负债的账面价值, 如果缺失长期负债数据, 则用负债总计代替 if fin_summary_data is None: return None ld = fin_summary_data['TotalNonCurrentLiabilities'] if np.isnan(ld): ld = fin_summary_data['TotalLiabilities'] if np.isnan(ld): return None ld *= 10000.0 # pe为优先股账面价值, 对于A股pe设置为0 pe = 0.0 # 读取个股市值数据 lncap_path = os.path.join(factor_ct.FACTOR_DB.db_path, risk_ct.LNCAP_CT.db_file) lncap_factor_loading = Utils.read_factor_loading( lncap_path, Utils.datetimelike_to_str(calc_date, dash=False), code) if lncap_factor_loading.empty: return None me = np.exp(lncap_factor_loading['factorvalue']) # mlev = (me + pe + ld)/me mlev = (me + pe + ld) / me return pd.Series([code, mlev], index=['code', 'mlev'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股EPFWD因子载荷 Parameters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的EPFWD因子载荷 0. code 1. epfwd 如果计算失败, 返回None """ code = Utils.code_to_symbol(code) # 读取个股的预期盈利数据 predictedearnings_data = Utils.get_consensus_data( calc_date, code, ConsensusType.PredictedEarings) if predictedearnings_data is None: # 如果个股的预期盈利数据不存在, 那么代替ttm净利润 ttm_fin_data = Utils.get_ttm_fin_basic_data(code, calc_date) if ttm_fin_data is None: return None predictedearnings_data = pd.Series( [code, ttm_fin_data['NetProfit']], index=['code', 'predicted_earnings']) fpredictedearnings = predictedearnings_data['predicted_earnings'] if np.isnan(fpredictedearnings): return None # 读取个股市值 size_path = os.path.join(factor_ct.FACTOR_DB.db_path, risk_ct.LNCAP_CT.db_file) size_factor_loading = Utils.read_factor_loading( size_path, Utils.datetimelike_to_str(calc_date, dash=False), code) if size_factor_loading.empty: return None # epfwd = 盈利预期/市值 epfwd = fpredictedearnings * 10000.0 / np.exp( size_factor_loading['factorvalue']) return pd.Series([code, epfwd], index=['code', 'epfwd'])
def _get_factor_loading(cls, db_file, str_key, factor_name=None, factor_type=None, **kwargs): """ 读取因子载荷数据 Parameters: -------- :param db_file: str 因子载荷数据文件路径(绝对路径) :param str_key: str 键值, 一般为日期, e.g: YYYY-MM-DD, YYYYMMDD :param factor_name: str, 默认为None 因子名称 :param factor_type: str, 默认为None 因子类型, e.g: 'raw', 'standardized', 'orthogonalized' :param kwargs: kwargs['code']: str, 默认为None; 个股代码, e.g: SH600000, 600000 kwargs['nan_value']: object, 默认为None; 如果不为None, 那么缺失值用nan_value替换 kwargs['drop_na']: bool, 默认False; 是否删除含有NaN值的行 :return: pd.DataFrame or pd.Series, 因子载荷 -------- pd.DataFrame(code==None) or pd.Series(code!=None) 0. date 1. id 2. factorvalue """ if factor_type is not None: db_file = os.path.join(db_file, factor_type, factor_name) if 'code' not in kwargs: kwargs['code'] = None if 'na_value' not in kwargs: kwargs['na_value'] = None if 'drop_na' not in kwargs: kwargs['drop_na'] = False return Utils.read_factor_loading(db_file, str_key, kwargs['code'], kwargs['na_value'], kwargs['drop_na'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股CETOP因子载荷 Parameters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的CETOP因子载荷 0. code 1. cetop 如果计算失败, 返回None """ code = Utils.code_to_symbol(code) # 读取个股的主要财务指标数据ttm值 ttm_fin_data = Utils.get_ttm_fin_basic_data(code, calc_date) if ttm_fin_data is None: return None ttm_cash = ttm_fin_data['NetOperateCashFlow'] if np.isnan(ttm_cash): return None # 读取个股市值 lncap_path = os.path.join(factor_ct.FACTOR_DB.db_path, risk_ct.LNCAP_CT.db_file) lncap_data = Utils.read_factor_loading( lncap_path, Utils.datetimelike_to_str(calc_date, dash=False), code) if lncap_data.empty: return None secu_cap = np.exp(lncap_data['factorvalue']) # cetop = 经营活动现金流ttm值/市值 cetop = ttm_cash * 10000 / secu_cap return pd.Series([code, cetop], index=['code', 'cetop'])
def smartq_backtest(start, end): """ SmartQ因子的历史回测 Parameters: -------- :param start: datetime-like, str 回测开始日期,格式:YYYY-MM-DD,开始日期应该为月初 :param end: datetime-like, str 回测结束日期,格式:YYYY-MM-DD :return: """ # 取得开始结束日期间的交易日序列 trading_days = Utils.get_trading_days(start, end) # 读取截止开始日期前最新的组合回测数据 prev_trading_day = Utils.get_prev_n_day(trading_days.iloc[0], 1) backtest_path = os.path.join(SETTINGS.FACTOR_DB_PATH, alphafactor_ct.SMARTMONEY_CT.backtest_path) factor_data, port_nav = Utils.get_backtest_data(backtest_path, trading_days.iloc[0]) # factor_data = None # 记录每次调仓时最新入选个股的SmartQ因子信息,pd.DataFrame<date,factorvalue,id,buprice> if port_nav is None: port_nav = DataFrame({ 'date': [prev_trading_day.strftime('%Y-%m-%d')], 'nav': [1.0] }) # 遍历交易日,如果是月初,则读取SmartQ因子载荷值,进行调仓;如果不是月初,则进行组合估值 t = 0 # 记录调仓次数 for trading_day in trading_days: if factor_data is None: nav = port_nav[port_nav.date == prev_trading_day.strftime( '%Y-%m-%d')].iloc[0].nav else: nav = port_nav[port_nav.date == factor_data.iloc[0].date].iloc[0].nav interval_ret = 0.0 # 月初进行调仓 if Utils.is_month_start(trading_day): logging.info('[%s] 月初调仓.' % Utils.datetimelike_to_str(trading_day, True)) # 调仓前,先计算组合按均价卖出原先组合个股在当天的估值 if factor_data is not None: for ind, factor_info in factor_data.iterrows(): daily_mkt = Utils.get_secu_daily_mkt(factor_info.id, trading_day, fq=True, range_lookup=True) if daily_mkt.date == trading_day.strftime('%Y-%m-%d'): vwap_price = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor else: vwap_price = daily_mkt.close interval_ret += vwap_price / factor_info.buyprice - 1.0 interval_ret /= float(len(factor_data)) nav *= (1.0 + interval_ret) # 读取factor_data factor_data = Utils.read_factor_loading( SmartMoney.get_db_file(), Utils.datetimelike_to_str(prev_trading_day, False)) # 遍历factor_data, 计算每个个股过去20天的涨跌幅,并剔除在调仓日没有正常交易(如停牌)及涨停的个股 ind_to_be_deleted = [] factor_data['ret20'] = np.zeros(len(factor_data)) for ind, factor_info in factor_data.iterrows(): trading_status = Utils.trading_status(factor_info.id, trading_day) if trading_status == SecuTradingStatus.Suspend or trading_status == SecuTradingStatus.LimitUp: ind_to_be_deleted.append(ind) fret20 = Utils.calc_interval_ret(factor_info.id, end=prev_trading_day, ndays=20) if fret20 is None: if ind not in ind_to_be_deleted: ind_to_be_deleted.append(ind) else: factor_data.loc[ind, 'ret20'] = fret20 factor_data = factor_data.drop(ind_to_be_deleted, axis=0) # 对factor_data过去20天涨跌幅降序排列,剔除涨幅最大的20%个股 k = int(factor_data.shape[0] * 0.2) factor_data = factor_data.sort_values(by='ret20', ascending=False).iloc[k:] del factor_data['ret20'] # 删除ret20列 # 对factor_data按因子值升序排列,取前10%个股 factor_data = factor_data.sort_values(by='factorvalue', ascending=True) k = int(factor_data.shape[0] * 0.1) factor_data = factor_data.iloc[:k] # 遍历factor_data,添加买入价格,并估值计算当天调仓后的组合收益 factor_data['buyprice'] = 0.0 interval_ret = 0.0 for ind, factor_info in factor_data.iterrows(): daily_mkt = Utils.get_secu_daily_mkt(factor_info.id, trading_day, fq=True, range_lookup=False) assert len(daily_mkt) > 0 factor_data.loc[ ind, 'buyprice'] = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor interval_ret += daily_mkt.close / factor_data.loc[ ind, 'buyprice'] - 1.0 interval_ret /= float(factor_data.shape[0]) nav *= (1.0 + interval_ret) # 保存factor_data port_data_path = os.path.join( SETTINGS.FACTOR_DB_PATH, alphafactor_ct.SMARTMONEY_CT.backtest_path, 'port_data_%s.csv' % Utils.datetimelike_to_str(trading_day, False)) factor_data.to_csv(port_data_path, index=False) t += 1 if t % 6 == 0: logging.info('Suspended for 300s.') time.sleep(300) else: # 非调仓日,对组合进行估值 logging.info('[%s] 月中估值.' % Utils.datetimelike_to_str(trading_day, True)) if factor_data is not None: for ind, factor_info in factor_data.iterrows(): daily_mkt = Utils.get_secu_daily_mkt(factor_info.id, trading_day, fq=True, range_lookup=True) interval_ret += daily_mkt.close / factor_info.buyprice - 1.0 interval_ret /= float(factor_data.shape[0]) nav *= (1.0 + interval_ret) # 添加nav port_nav = port_nav.append(Series({ 'date': Utils.datetimelike_to_str(trading_day, True), 'nav': nav }), ignore_index=True) # 设置prev_trading_day prev_trading_day = trading_day # 保存port_nav port_nav_path = os.path.join(SETTINGS.FACTOR_DB_PATH, alphafactor_ct.SMARTMONEY_CT.backtest_path, 'port_nav.csv') port_nav.to_csv(port_nav_path, index=False)
def calc_factor_loading(cls, start_date, end_date=None, month_end=True, save=False, **kwargs): """ 计算指定日期的样本个股的因子载荷, 并保存至因子数据库 Parameters: -------- :param start_date: datetime-like, str 开始日期, 格式: YYYY-MM-DD or YYYYMMDD :param end_date: datetime-like, str 结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD :param month_end: bool, 默认为True 如果为True, 则只计算月末时点的因子载荷 :param save: bool, 默认为True 是否保存至因子数据库 :param kwargs: 'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False :return: dict 因子载荷数据 """ # 取得交易日序列及股票基本信息表 start_date = Utils.to_date(start_date) if end_date is not None: end_date = Utils.to_date(end_date) trading_days_series = Utils.get_trading_days(start=start_date, end=end_date) else: trading_days_series = Utils.get_trading_days(end=start_date, ndays=1) all_stock_basics = CDataHandler.DataApi.get_secu_basics() # 遍历交易日序列, 计算LIQUIDITY因子载荷 dict_raw_liquidity = None for calc_date in trading_days_series: if month_end and (not Utils.is_month_end(calc_date)): continue dict_stom = None dict_stoq = None dict_stoa = None dict_raw_liquidity = None logging.info('[%s] Calc LIQUIDITY factor loading.' % Utils.datetimelike_to_str(calc_date)) # 遍历个股,计算个股LIQUIDITY因子值 s = (calc_date - datetime.timedelta( days=risk_ct.LIQUID_CT.listed_days)).strftime('%Y%m%d') stock_basics = all_stock_basics[all_stock_basics.list_date < s] ids = [] stoms = [] stoqs = [] stoas = [] raw_liquidities = [] if 'multi_proc' not in kwargs: kwargs['multi_proc'] = False if not kwargs['multi_proc']: # 采用单进程计算LIQUIDITY因子值 for _, stock_info in stock_basics.iterrows(): logging.info("[%s] Calc %s's LIQUIDITY factor loading." % (Utils.datetimelike_to_str( calc_date, dash=True), stock_info.symbol)) liquidity_data = cls._calc_factor_loading( stock_info.symbol, calc_date) if liquidity_data is not None: ids.append(liquidity_data['code']) stoms.append(liquidity_data['stom']) stoqs.append(liquidity_data['stoq']) stoas.append(liquidity_data['stoa']) raw_liquidities.append(liquidity_data['liquidity']) else: # 采用多进程计算LIQUIDITY因子值 q = Manager().Queue() p = Pool(4) for _, stock_info in stock_basics.iterrows(): p.apply_async(cls._calc_factor_loading_proc, args=( stock_info.symbol, calc_date, q, )) p.close() p.join() while not q.empty(): liquidity_data = q.get(True) ids.append(liquidity_data['code']) stoms.append(liquidity_data['stom']) stoqs.append(liquidity_data['stoq']) stoas.append(liquidity_data['stoa']) raw_liquidities.append(liquidity_data['liquidity']) date_label = Utils.get_trading_days(start=calc_date, ndays=2)[1] dict_stom = dict({ 'date': [date_label] * len(ids), 'id': ids, 'factorvalue': stoms }) dict_stoq = dict({ 'date': [date_label] * len(ids), 'id': ids, 'factorvalue': stoqs }) dict_stoa = dict({ 'date': [date_label] * len(ids), 'id': ids, 'factorvalue': stoas }) dict_raw_liquidity = dict({ 'date': [date_label] * len(ids), 'id': ids, 'factorvalue': raw_liquidities }) # 读取Size因子值, 将流动性因子与Size因子正交化 size_factor_path = os.path.join(factor_ct.FACTOR_DB.db_path, risk_ct.SIZE_CT.db_file) df_size = Utils.read_factor_loading( size_factor_path, Utils.datetimelike_to_str(calc_date, dash=False)) df_size.drop(columns='date', inplace=True) df_size.rename(columns={'factorvalue': 'size'}, inplace=True) df_liquidity = pd.DataFrame( dict({ 'id': ids, 'liquidity': raw_liquidities })) df_liquidity = pd.merge(left=df_liquidity, right=df_size, how='inner', on='id') arr_liquidity = Utils.normalize_data( Utils.clean_extreme_value( np.array(df_liquidity['liquidity']).reshape( (len(df_liquidity), 1)))) arr_size = Utils.normalize_data( Utils.clean_extreme_value( np.array(df_liquidity['size']).reshape( (len(df_liquidity), 1)))) model = sm.OLS(arr_liquidity, arr_size) results = model.fit() df_liquidity['liquidity'] = results.resid df_liquidity.drop(columns='size', inplace=True) df_liquidity.rename(columns={'liquidity': 'factorvalue'}, inplace=True) df_liquidity['date'] = date_label # 保存因子载荷 if save: str_date = Utils.datetimelike_to_str(calc_date, dash=False) factor_header = ['date', 'id', 'factorvalue'] Utils.factor_loading_persistent(cls._db_file, 'stom_{}'.format(str_date), dict_stom, factor_header) Utils.factor_loading_persistent(cls._db_file, 'stoq_{}'.format(str_date), dict_stoq, factor_header) Utils.factor_loading_persistent(cls._db_file, 'stoa_{}'.format(str_date), dict_stoa, factor_header) Utils.factor_loading_persistent( cls._db_file, 'rawliquidity_{}'.format(str_date), dict_raw_liquidity, factor_header) Utils.factor_loading_persistent(cls._db_file, str_date, df_liquidity.to_dict('list'), factor_header) # 暂停180秒 logging.info('Suspending for 180s.') time.sleep(180) return dict_raw_liquidity
def calc_factor_loading_(cls, start_date, end_date=None, month_end=True, save=False, **kwargs): """ 计算指定日期的样本个股的因子载荷, 并保存至因子数据库 Parameters: -------- :param start_date: datetime-like, str 开始日期, 格式: YYYY-MM-DD or YYYYMMDD :param end_date: datetime-like, str 结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD :param month_end: bool, 默认为True 如果为True, 则只计算月末时点的因子载荷 :param save: bool, 默认为True 是否保存至因子数据库 :param kwargs: 'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False :return: dict 因子载荷数据 """ # 取得交易日序列 start_date = Utils.to_date(start_date) if end_date is not None: end_date = Utils.to_date(end_date) trading_days_series = Utils.get_trading_days(start=start_date, end=end_date) else: trading_days_series = Utils.get_trading_days(end=start_date, ndays=1) # 遍历交易日序列, 计算growth因子下各个成分因子的因子载荷 if 'multi_proc' not in kwargs: kwargs['multi_proc'] = False for calc_date in trading_days_series: if month_end and (not Utils.is_month_end(calc_date)): continue # 计算各成分因子的因子载荷 for com_factor in risk_ct.GROWTH_CT.component: factor = eval(com_factor + '()') factor.calc_factor_loading(start_date=calc_date, end_date=None, month_end=month_end, save=save, multi_proc=kwargs['multi_proc']) # 合成Growth因子载荷 growth_factor = pd.DataFrame() df_industry_classify = Utils.get_industry_classify() # 个股行业分类数据 for com_factor in risk_ct.GROWTH_CT.component: factor_path = os.path.join( factor_ct.FACTOR_DB.db_path, eval('risk_ct.' + com_factor + '_CT')['db_file']) factor_loading = Utils.read_factor_loading( factor_path, Utils.datetimelike_to_str(calc_date, dash=False)) factor_loading.drop(columns='date', inplace=True) # factor_loading[com_factor] = Utils.normalize_data(Utils.clean_extreme_value(np.array(factor_loading['factorvalue']).reshape((len(factor_loading), 1)))) # factor_loading.drop(columns='factorvalue', inplace=True) factor_loading.rename(columns={'factorvalue': com_factor}, inplace=True) # 添加行业分类数据 factor_loading = pd.merge( left=factor_loading, right=df_industry_classify[['id', 'ind_code']], how='inner', on='id') # 取得含缺失值的因子载荷数据 missingdata_factor = factor_loading[ factor_loading[com_factor].isna()] # 删除factor_loading中的缺失值 factor_loading.dropna(axis='index', how='any', inplace=True) # 对factor_loading去极值、标准化 factor_loading = Utils.normalize_data(factor_loading, id='id', columns=com_factor, treat_outlier=True, weight='cap', calc_date=calc_date) # 把missingdata_factor中的缺失值替换为行业均值 ind_codes = set(missingdata_factor['ind_code']) ind_mean_factor = {} for ind_code in ind_codes: ind_mean_factor[ind_code] = factor_loading[ factor_loading['ind_code'] == ind_code][com_factor].mean() for idx, missingdata in missingdata_factor.iterrows(): missingdata_factor.loc[idx, com_factor] = ind_mean_factor[ missingdata['ind_code']] # 把missingdata_factor和factor_loading合并 factor_loading = pd.concat( [factor_loading, missingdata_factor]) # 删除ind_code列 factor_loading.drop(columns='ind_code', inplace=True) if growth_factor.empty: growth_factor = factor_loading else: growth_factor = pd.merge(left=growth_factor, right=factor_loading, how='inner', on='id') # # 读取个股行业分类数据, 添加至growth_factor中 # df_industry_classify = Utils.get_industry_classify() # growth_factor = pd.merge(left=growth_factor, right=df_industry_classify[['id', 'ind_code']]) # # 取得含缺失值的因子载荷数据 # missingdata_factor = growth_factor.loc[[ind for ind, data in growth_factor.iterrows() if data.hasnans]] # # 删除growth_factot中的缺失值 # growth_factor.dropna(axis='index', how='any', inplace=True) # # 对growth_factor去极值、标准化 # growth_factor = Utils.normalize_data(growth_factor, id='id', columns=risk_ct.GROWTH_CT.component, treat_outlier=True, weight='cap', calc_date=calc_date) # # 把missingdata_factor中的缺失值替换为行业均值 # ind_codes = set(missingdata_factor['ind_code']) # ind_mean_factor = {} # for ind_code in ind_codes: # ind_mean_factor[ind_code] = growth_factor[growth_factor['ind_code'] == ind_code].mean() # missingdata_label = {ind: missingdata_factor.columns[missingdata.isna()].tolist() for ind, missingdata in missingdata_factor.iterrows()} # for ind, cols in missingdata_label.items(): # missingdata_factor.loc[ind, cols] = ind_mean_factor[missingdata_factor.loc[ind, 'ind_code']][cols] # # 把missingdata_factor和growth_factor合并 # growth_factor = pd.concat([growth_factor, missingdata_factor]) # # 删除ind_code列 # growth_factor.drop(columns='ind_code', inplace=True) # 合成Growth因子 growth_factor.set_index('id', inplace=True) weight = pd.Series(risk_ct.GROWTH_CT.weight) growth_factor = (growth_factor * weight).sum(axis=1) growth_factor.name = 'factorvalue' growth_factor.index.name = 'id' growth_factor = pd.DataFrame(growth_factor) growth_factor.reset_index(inplace=True) growth_factor['date'] = Utils.get_trading_days(start=calc_date, ndays=2)[1] # 保存growth因子载荷 if save: Utils.factor_loading_persistent( cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), growth_factor.to_dict('list'), ['date', 'id', 'factorvalue'])
def apm_backtest(start, end, pure_factor=False): """ APM因子的历史回测 Parameters: -------- :param start: datetime-like, str 回测开始日期,格式:YYYY-MM-DD,开始日期应该为月初的前一个交易日,即月末交易日 :param end: datetime-like, str 回测结束日期,格式:YYYY-MM-DD :param pure_factor: bool, 默认False 是否是对纯净因子做回测 :return: """ # 取得开始结束日期间的交易日数据 trading_days = Utils.get_trading_days(start, end) # 读取截止开始日期前最新的组合回测数据 prev_trading_day = Utils.get_prev_n_day(trading_days.iloc[0], 1) if pure_factor: backtest_path = os.path.join(factor_ct.FACTOR_DB.db_path, factor_ct.APM_CT.pure_backtest_path) else: backtest_path = os.path.join(factor_ct.FACTOR_DB.db_path, factor_ct.APM_CT.backtest_path) factor_data, port_nav = Utils.get_backtest_data(backtest_path, trading_days.iloc[0]) # factor_data = None # 记录每次调仓时最新入选个股的APM因子信息,pd.DataFrame<date,factorvalue,id,buyprice> if port_nav is None: port_nav = DataFrame({ 'date': [prev_trading_day.strftime('%Y-%m-%d')], 'nav': [1.0] }) # 遍历交易日,如果是月初,则读取APM因子载荷值;如果不是月初,则进行组合估值 for trading_day in trading_days: if factor_data is None: nav = port_nav[port_nav.date == prev_trading_day.strftime( '%Y-%m-%d')].iloc[0].nav else: nav = port_nav[port_nav.date == factor_data.iloc[0].date].iloc[0].nav interval_ret = 0.0 # 月初进行调仓 if Utils.is_month_start(trading_day): logging.info('[%s] 月初调仓.' % Utils.datetimelike_to_str(trading_day, True)) # 调仓前,先估值计算按均价卖出原先组合个股在当天的估值 if factor_data is not None: for ind, factor_info in factor_data.iterrows(): daily_mkt = Utils.get_secu_daily_mkt(factor_info.id, trading_day, fq=True, range_lookup=True) if daily_mkt.date == trading_day.strftime('%Y-%m-%d'): vwap_price = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor else: vwap_price = daily_mkt.close interval_ret += vwap_price / factor_info.buyprice - 1.0 interval_ret /= float(len(factor_data)) nav *= (1.0 + interval_ret) # 读取factor_data if pure_factor: factor_data_path = os.path.join( factor_ct.FACTOR_DB.db_path, factor_ct.APM_CT.pure_apm_db_file) else: factor_data_path = os.path.join(factor_ct.FACTOR_DB.db_path, factor_ct.APM_CT.apm_db_file) factor_data = Utils.read_factor_loading( factor_data_path, Utils.datetimelike_to_str(prev_trading_day, False)) # 遍历factor_data,剔除在调仓日没有正常交易(如停牌)、及涨停的个股 ind_to_be_delted = [] for ind, factor_info in factor_data.iterrows(): trading_status = Utils.trading_status(factor_info.id, trading_day) if trading_status == SecuTradingStatus.Suspend or trading_status == SecuTradingStatus.LimitUp: ind_to_be_delted.append(ind) factor_data = factor_data.drop(ind_to_be_delted, axis=0) # 对factor_data按因子值降序排列,取前10%个股 factor_data = factor_data.sort_values(by='factorvalue', ascending=False) factor_data = factor_data.iloc[:int(len(factor_data) * 0.1)] # 遍历factor_data,添加买入价格,并估值计算当天调仓后的组合收益 factor_data['buyprice'] = 0.0 interval_ret = 0.0 for ind, factor_info in factor_data.iterrows(): daily_mkt = Utils.get_secu_daily_mkt(factor_info.id, trading_day, fq=True, range_lookup=False) assert len(daily_mkt) > 0 factor_data.loc[ ind, 'buyprice'] = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor interval_ret += daily_mkt.close / factor_data.loc[ ind, 'buyprice'] - 1.0 interval_ret /= float(len(factor_data)) nav *= (1.0 + interval_ret) # 保存factor_data if pure_factor: port_data_path = os.path.join( factor_ct.FACTOR_DB.db_path, factor_ct.APM_CT.pure_backtest_path, 'port_data_%s.csv' % Utils.datetimelike_to_str(trading_day, False)) else: port_data_path = os.path.join( factor_ct.FACTOR_DB.db_path, factor_ct.APM_CT.backtest_path, 'port_data_%s.csv' % Utils.datetimelike_to_str(trading_day, False)) factor_data.to_csv(port_data_path, index=False) else: # 非调仓日,对组合进行估值 logging.info('[%s] 月中估值.' % Utils.datetimelike_to_str(trading_day, True)) if factor_data is not None: for ind, factor_info in factor_data.iterrows(): daily_mkt = Utils.get_secu_daily_mkt(factor_info.id, trading_day, fq=True, range_lookup=True) interval_ret += daily_mkt.close / factor_info.buyprice - 1.0 interval_ret /= float(len(factor_data)) nav *= (1.0 + interval_ret) # 添加nav port_nav = port_nav.append(Series({ 'date': trading_day.strftime('%Y-%m-%d'), 'nav': nav }), ignore_index=True) # 设置prev_trading_day prev_trading_day = trading_day # 保存port_nav if pure_factor: port_nav_path = os.path.join(factor_ct.FACTOR_DB.db_path, factor_ct.APM_CT.pure_backtest_path, 'port_nav.csv') else: port_nav_path = os.path.join(factor_ct.FACTOR_DB.db_path, factor_ct.APM_CT.backtest_path, 'port_nav.csv') port_nav.to_csv(port_nav_path, index=False)
def _calc_synthetic_factor_loading(cls, start_date, end_date=None, month_end=True, save=False, **kwargs): """ 计算指定日期的样本个股的合成因子的载荷,并保存至因子数据库 Parameters -------- :param start_date: datetime-like, str 开始日期 :param end_date: datetime-like, str,默认None 结束日期,如果为None,则只计算start_date日期的因子载荷 :param month_end: bool,默认True 只计算月末时点的因子载荷,该参数只在end_date不为None时有效,并且不论end_date是否为None,都会计算第一天的因子载荷 :param save: 是否保存至因子数据库,默认为False :param kwargs: 'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False 'com_factors': list, 成分因子的类实例list :return: 因子载荷,DataFrame -------- 因子载荷,DataFrame 0: ID, 证券ID,为索引 1: factorvalue, 因子载荷 """ # 取得交易日序列 start_date = Utils.to_date(start_date) if end_date is not None: end_date = Utils.to_date(end_date) trading_days_series = Utils.get_trading_days(start=start_date, end=end_date) else: trading_days_series = Utils.get_trading_days(end=start_date, ndays=1) # 遍历交易日序列, 计算合成因子下各个成分因子的因子载荷 if 'multi_proc' not in kwargs: kwargs['multi_proc'] = False for calc_date in trading_days_series: if month_end and (not Utils.is_month_end(calc_date)): continue # 计算各成分因子的因子载荷 # for com_factor in eval('risk_ct.' + cls.__name__.upper() + '_CT')['component']: # factor = eval(com_factor + '()') # factor.calc_factor_loading(start_date=calc_date, end_date=None, month_end=month_end, save=save, multi_proc=kwargs['multi_proc']) for com_factor in kwargs['com_factors']: com_factor.calc_factor_loading(start_date=calc_date, end_date=None, month_end=month_end, save=save, multi_proc=kwargs['multi_proc']) # 计算合成因子 synthetic_factor = pd.DataFrame() df_industry_classify = Utils.get_industry_classify() # 个股行业分类数据 for com_factor in eval('risk_ct.' + cls.__name__.upper() + '_CT')['component']: factor_path = os.path.join( factor_ct.FACTOR_DB.db_path, eval('risk_ct.' + com_factor + '_CT')['db_file']) factor_loading = Utils.read_factor_loading( factor_path, Utils.datetimelike_to_str(calc_date, dash=False)) factor_loading.drop(columns='date', inplace=True) factor_loading.rename(columns={'factorvalue': com_factor}, inplace=True) # 添加行业分类数据 factor_loading = pd.merge( left=factor_loading, right=df_industry_classify[['id', 'ind_code']], how='inner', on='id') # 取得含缺失值的因子载荷数据 missingdata_factor = factor_loading[ factor_loading[com_factor].isna()] # 删除factor_loading中的缺失值 factor_loading.dropna(axis='index', how='any', inplace=True) # 对factor_loading去极值、标准化 factor_loading = Utils.normalize_data(factor_loading, id='id', columns=com_factor, treat_outlier=True, weight='cap', calc_date=calc_date) # 把missingdata_factor中的缺失值替换为行业均值 ind_codes = set(missingdata_factor['ind_code']) ind_mean_factor = {} for ind_code in ind_codes: ind_mean_factor[ind_code] = factor_loading[ factor_loading['ind_code'] == ind_code][com_factor].mean() for idx, missingdata in missingdata_factor.iterrows(): missingdata_factor.loc[idx, com_factor] = ind_mean_factor[ missingdata['ind_code']] # 把missingdata_factor和factor_loading合并 factor_loading = pd.concat( [factor_loading, missingdata_factor]) # 删除ind_code列 factor_loading.drop(columns='ind_code', inplace=True) # merge成分因子 if synthetic_factor.empty: synthetic_factor = factor_loading else: synthetic_factor = pd.merge(left=synthetic_factor, right=factor_loading, how='inner', on='id') # 合成因子 synthetic_factor.set_index('id', inplace=True) weight = pd.Series( eval('risk_ct.' + cls.__name__.upper() + '_CT')['weight']) synthetic_factor = (synthetic_factor * weight).sum(axis=1) synthetic_factor.name = 'factorvalue' synthetic_factor.index.name = 'id' synthetic_factor = pd.DataFrame(synthetic_factor) synthetic_factor.reset_index(inplace=True) synthetic_factor['date'] = Utils.get_trading_days(start=calc_date, ndays=2)[1] # 保存synthetic_factor因子载荷 if save: Utils.factor_loading_persistent( cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), synthetic_factor.to_dict('list'), ['date', 'id', 'factorvalue'])
def get_dependent_factors(cls, date): """ 计算用于因子提纯的相关性因子值,包换行业、规模、价值、成长、短期动量、长期动量 Parameters: -------- :param date: datetime-like or str 日期 :return: pd.DataFrame index为个股代码, columns=[28个申万一级行业,规模(scale),价值(value),成长(growth),短期动量(short_momentum),长期动量(long_momentum)] """ str_date = Utils.to_date(date).strftime('%Y%m%d') # 1. 行业因子 # 1.1. 读取行业分类信息 df_industry_calssify = Utils.get_industry_classify() df_industry_calssify = df_industry_calssify.set_index('id') # 1.2. 构建行业分裂哑变量 df_industry_dummies = pd.get_dummies(df_industry_calssify['ind_code']) # 2. 规模因子 # 2.1. 读取规模因子 scale_factor_path = os.path.join(factor_ct.FACTOR_DB.db_path, factor_ct.SCALE_CT.db_file) df_scale_raw = Utils.read_factor_loading(scale_factor_path, str_date, nan_value=0) # 2.2. 规模因子去极值、标准化 scale_cleaned_arr = Utils.clean_extreme_value( np.array(df_scale_raw[['LnLiquidMktCap', 'LnTotalMktCap']])) scale_normalized_arr = Utils.normalize_data(scale_cleaned_arr) # 2.3. 规模因子降维 scale_factor_arr = np.mean(scale_normalized_arr, axis=1) scale_factor = Series(scale_factor_arr, index=df_scale_raw['id']) # 3. 价值因子 # 3.1. 读取价值因子 value_factor_path = os.path.join(factor_ct.FACTOR_DB.db_path, factor_ct.VALUE_CT.db_file) df_value_raw = Utils.read_factor_loading(value_factor_path, str_date, nan_value=0) # 3.2. 价值因子去极值、标准化 value_cleaned_arr = Utils.clean_extreme_value( np.array(df_value_raw[['ep_ttm', 'bp_lr', 'ocf_ttm']])) value_normalized_arr = Utils.normalize_data(value_cleaned_arr) # 3.3. 价值因子降维 value_factor_arr = np.mean(value_normalized_arr, axis=1) value_factor = Series(value_factor_arr, index=df_value_raw['id']) # 4. 成长因子 # 4.1. 读取成长因子 growth_factor_path = os.path.join(factor_ct.FACTOR_DB.db_path, factor_ct.GROWTH_CT.db_file) df_growth_raw = Utils.read_factor_loading(growth_factor_path, str_date, nan_value=0) # 4.2. 成长因子去极值、标准化 growth_cleaned_arr = Utils.clean_extreme_value( np.array(df_growth_raw[['npg_ttm', 'opg_ttm']])) growth_normalized_arr = Utils.normalize_data(growth_cleaned_arr) # 4.3. 成长因子降维 growth_factor_arr = np.mean(growth_normalized_arr, axis=1) growth_factor = Series(growth_factor_arr, index=df_growth_raw['id']) # 5. 动量因子 # 5.1. 读取动量因子 mom_factor_path = os.path.join(factor_ct.FACTOR_DB.db_path, factor_ct.MOMENTUM_CT.db_file) df_mom_raw = Utils.read_factor_loading(mom_factor_path, str_date, nan_value=0) # 5.2. 动量因子去极值、标准化 short_term_mom_header = [ 'short_term_' + d for d in factor_ct.MOMENTUM_CT.short_term_days.split('|') ] short_mom_cleaned_arr = Utils.clean_extreme_value( np.array(df_mom_raw[short_term_mom_header])) short_mom_normalized_arr = Utils.normalize_data(short_mom_cleaned_arr) long_term_mom_header = [ 'long_term_' + d for d in factor_ct.MOMENTUM_CT.long_term_days.split('|') ] long_mom_cleaned_arr = Utils.clean_extreme_value( np.array(df_mom_raw[long_term_mom_header])) long_mom_normalized_arr = Utils.normalize_data(long_mom_cleaned_arr) # 5.3. 动量因子降维 short_mom_arr = np.mean(short_mom_normalized_arr, axis=1) short_mom = Series(short_mom_arr, index=df_mom_raw['id']) long_mom_arr = np.mean(long_mom_normalized_arr, axis=1) long_mom = Series(long_mom_arr, index=df_mom_raw['id']) # 拼接除行业因子外的因子 df_style_factor = pd.concat( [scale_factor, value_factor, growth_factor, short_mom, long_mom], axis=1, keys=['scale', 'value', 'growth', 'short_mom', 'long_mom'], join='inner') # 再拼接行业因子 df_dependent_factor = pd.concat([df_industry_dummies, df_style_factor], axis=1, join='inner') return df_dependent_factor
def calc_factor_loading(cls, start_date, end_date=None, month_end=True, save=False, **kwargs): """ 计算指定日期的样本个股的因子载荷,并保存至因子数据库 Parameters -------- :param start_date: datetime-like, str 开始日期,格式:YYYY-MM-DD or YYYYMMDD :param end_date: datetime-like, str 结束日期,如果为None,则只计算start_date日期的因子载荷,格式:YYYY-MM-DD or YYYYMMDD :param month_end: bool,默认True 如果为True,则只计算月末时点的因子载荷 :param save: bool,默认False 是否保存至因子数据库 :param kwargs['synthetic_factor']: bool, 默认为False 是否计算合成因子 :return: 因子载荷,DataFrame -------- 因子载荷,DataFrame 0. date: 日期 1. id: 证券symbol 2. m0: 隔夜时段动量 3. m1: 第一个小时动量 4. m2: 第二个小时动量 5. m3: 第三个小时动量 6. m4: 第四个小时动量 7. m_normal: 传统动量 """ # 取得交易日序列及股票基本信息表 start_date = Utils.to_date(start_date) if end_date is not None: end_date = Utils.to_date(end_date) trading_days_series = Utils.get_trading_days(start=start_date, end=end_date) else: trading_days_series = Utils.get_trading_days(end=start_date, ndays=1) all_stock_basics = CDataHandler.DataApi.get_secu_basics() # 遍历交易日序列,计算日内动量因子值 dict_intraday_momentum = None for calc_date in trading_days_series: if month_end and (not Utils.is_month_end(calc_date)): continue logging.info( '[%s] calc synthetic intraday momentum factor loading.' % Utils.datetimelike_to_str(calc_date)) if 'synthetic_factor' in kwargs and kwargs[ 'synthetic_factor']: # 计算日内合成动量因子 dict_intraday_momentum = { 'date': [], 'id': [], 'factorvalue': [] } # 读取日内个时段动量因子值 df_factor_loading = Utils.read_factor_loading( cls._db_file, Utils.datetimelike_to_str(calc_date, False)) if df_factor_loading.shape[0] <= 0: logging.info( "[%s] It doesn't exist intraday momentum factor loading." % Utils.datetimelike_to_str(calc_date)) return df_factor_loading.fillna(0, inplace=True) # 读取因子最优权重 factor_weight = cls.get_factor_weight(calc_date) if factor_weight is None: logging.info("[%s] It doesn't exist factor weight.") return # 计算合成动量因子 arr_factor_loading = np.array( df_factor_loading[['m0', 'm1', 'm2', 'm3', 'm4']]) arr_factor_weight = np.array( factor_weight.drop('date')).reshape((5, 1)) arr_synthetic_factor = np.dot(arr_factor_loading, arr_factor_weight) # arr_synthetic_factor = np.around(arr_synthetic_factor, 6) dict_intraday_momentum['date'] = list( df_factor_loading['date']) dict_intraday_momentum['id'] = list(df_factor_loading['id']) dict_intraday_momentum['factorvalue'] = list( arr_synthetic_factor.astype(float).round(6).reshape( (arr_synthetic_factor.shape[0], ))) # 保存合成因子 if save: synthetic_db_file = os.path.join( factor_ct.FACTOR_DB.db_path, factor_ct.INTRADAYMOMENTUM_CT.synthetic_db_file) Utils.factor_loading_persistent( synthetic_db_file, Utils.datetimelike_to_str(calc_date, False), dict_intraday_momentum) else: # 计算日内各时段动量因子 dict_intraday_momentum = { 'date': [], 'id': [], 'm0': [], 'm1': [], 'm2': [], 'm3': [], 'm4': [], 'm_normal': [] } # 遍历个股,计算个股日内动量值 s = (calc_date - datetime.timedelta(days=90)).strftime('%Y%m%d') stock_basics = all_stock_basics[all_stock_basics.list_date < s] # 采用单进程进行计算 # for _, stock_info in stock_basics.iterrows(): # momentum_data = cls._calc_factor_loading(stock_info.symbol, calc_date) # if momentum_data is not None: # logging.info("[%s] %s's intraday momentum = (%0.4f,%0.4f,%0.4f,%0.4f,%0.4f,%0.4f)" % (calc_date.strftime('%Y-%m-%d'),stock_info.symbol, momentum_data.m0, momentum_data.m1, momentum_data.m2, momentum_data.m3, momentum_data.m4, momentum_data.m_normal)) # dict_intraday_momentum['id'].append(Utils.code_to_symbol(stock_info.symbol)) # dict_intraday_momentum['m0'].append(round(momentum_data.m0, 6)) # dict_intraday_momentum['m1'].append(round(momentum_data.m1, 6)) # dict_intraday_momentum['m2'].append(round(momentum_data.m2, 6)) # dict_intraday_momentum['m3'].append(round(momentum_data.m3, 6)) # dict_intraday_momentum['m4'].append(round(momentum_data.m4, 6)) # dict_intraday_momentum['m_normal'].append(round(momentum_data.m_normal, 6)) # 采用多进程并行计算日内动量因子载荷 q = Manager().Queue() # 队列,用于进程间通信,存储每个进程计算的因子载荷 p = Pool(4) # 进程池,最多同时开启4个进程 for _, stock_info in stock_basics.iterrows(): p.apply_async(cls._calc_factor_loading_proc, args=( stock_info.symbol, calc_date, q, )) p.close() p.join() while not q.empty(): momentum_data = q.get(True) dict_intraday_momentum['id'].append(momentum_data[0]) dict_intraday_momentum['m0'].append( round(momentum_data[1], 6)) dict_intraday_momentum['m1'].append( round(momentum_data[2], 6)) dict_intraday_momentum['m2'].append( round(momentum_data[3], 6)) dict_intraday_momentum['m3'].append( round(momentum_data[4], 6)) dict_intraday_momentum['m4'].append( round(momentum_data[5], 6)) dict_intraday_momentum['m_normal'].append( round(momentum_data[6], 6)) date_label = Utils.get_trading_days(calc_date, ndays=2)[1] dict_intraday_momentum['date'] = [date_label] * len( dict_intraday_momentum['id']) # 保存因子载荷至因子数据库 if save: Utils.factor_loading_persistent( cls._db_file, calc_date.strftime('%Y%m%d'), dict_intraday_momentum) # 休息360秒 logging.info('Suspending for 360s.') time.sleep(360) return dict_intraday_momentum
def _calc_Orthogonalized_factorloading(factor_name, start_date, end_date=None, month_end=True, save=False): """ 计算alpha因子经正交化后的因子载荷 Parameters: -------- :param factor_name: str alpha因子名称, e.g: SmartMoney :param start_date: datetime-like, str 开始日期, e.g: YYYY-MM-DD, YYYYMMDD :param end_date: datetime-like, str, 默认None 结束日期, e.g: YYYY-MM-DD, YYYYMMDD :param month_end: bool, 默认True 是否只计算月末日期的因子载荷 :param save: bool, 默认False 是否保存计算结果 :return: dict -------- 因子经正交化后的因子载荷 0. date, 为计算日期的下一个交易日 1. id, 证券代码 2. factorvalue, 因子载荷 如果end_date=None,返回start_date对应的因子载荷数据 如果end_date!=None,返回最后一天的对应的因子载荷数据 如果没有计算数据,返回None """ start_date = Utils.to_date(start_date) if end_date is not None: end_date = Utils.to_date(end_date) trading_days_series = Utils.get_trading_days(start=start_date, end=end_date) else: trading_days_series = Utils.get_trading_days(end=start_date, ndays=1) CRiskModel = Barra() orthog_factorloading = {} for calc_date in trading_days_series: if month_end and (not Utils.is_month_end(calc_date)): continue # 读取目标因子原始载荷经标准化后的载荷值 target_factor_path = os.path.join( SETTINGS.FACTOR_DB_PATH, eval('alphafactor_ct.' + factor_name.upper() + '_CT')['db_file'], 'standardized', factor_name) df_targetfactor_loading = Utils.read_factor_loading( target_factor_path, Utils.datetimelike_to_str(calc_date, dash=False), drop_na=True) df_targetfactor_loading.drop(columns='date', inplace=True) df_targetfactor_loading.rename(columns={'factorvalue': factor_name}, inplace=True) # 读取风险模型中的风格因子载荷矩阵 df_stylefactor_loading = CRiskModel.get_StyleFactorloading_matrix( calc_date) df_stylefactor_loading.renmae(columns={'code': 'id'}, inplace=True) # 读取alpha因子载荷矩阵数据(经正交化后的载荷值) df_alphafactor_loading = pd.DataFrame() for alphafactor_name in alphafactor_ct.ALPHA_FACTORS: if alphafactor_name == factor_name: break factorloading_path = os.path.join( SETTINGS.FACTOR_DB_PATH, eval('alphafactor_ct.' + alphafactor_name.upper() + '_CT')['db_file'], 'orthogonalized', alphafactor_name) factor_loading = Utils.read_factor_loading( factorloading_path, Utils.datetimelike_to_str(calc_date, dash=False), drop_na=True) factor_loading.drop(columns='date', inplace=True) factor_loading.rename(columns={'factorvalue': alphafactor_name}, inplace=True) if df_alphafactor_loading.empty: df_alphafactor_loading = factor_loading else: df_alphafactor_loading = pd.merge(left=df_alphafactor_loading, right=factor_loading, how='inner', on='id') # 合并目标因子载荷、风格因子载荷与alpha因子载荷 df_factorloading = pd.merge(left=df_targetfactor_loading, right=df_stylefactor_loading, how='inner', on='id') if not df_alphafactor_loading.empty: df_factorloading = pd.merge(left=df_stylefactor_loading, right=df_alphafactor_loading, how='inner', on='id') # 构建目标因子载荷向量、风格与alpha因子载荷矩阵 df_factorloading.set_index('id', inplace=True) arr_targetfactor_loading = np.array(df_factorloading[factor_name]) stylealphafactor_names = df_factorloading.columns.tolist() stylealphafactor_names.remove(factor_name) arr_stylealphafactor_loading = np.array( df_factorloading[stylealphafactor_names]) # 将arr_targetfactor_loading对arr_stylealphafactor_loading进行截面回归, 得到的残差即为正交化后的因子载荷 Y = arr_targetfactor_loading X = sm.add_constant(arr_stylealphafactor_loading) model = sm.OLS(Y, X) results = model.fit() datelabel = Utils.get_trading_days(start=calc_date, ndays=2)[1] orthog_factorloading = { 'date': [datelabel] * len(results.resid), 'id': df_factorloading.index.tolist(), 'factorvalue': results.resid } # 保存正交化后的因子载荷 if save: orthog_factorloading_path = os.path.join( SETTINGS.FACTOR_DB_PATH, eval('alphafactor_ct.' + factor_name.upper() + '_CT')['db_file'], 'orthogonalized', factor_name) Utils.factor_loading_persistent( orthog_factorloading_path, Utils.datetimelike_to_str(calc_date, dash=False), orthog_factorloading, ['date', 'id', 'factorvalue']) return orthog_factorloading