def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股的STOM因子载荷 Parameters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like or str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的STOM因子载荷 0. code 1. stom 如果计算失败, 返回None """ # 读取个股过去252个交易日的日行情数据(非复权) df_mkt_data = Utils.get_secu_daily_mkt(code, end=calc_date, ndays=252, fq=False) if df_mkt_data is None or df_mkt_data.empty: return None # stom days = risk_ct.STOM_CT.month_days * risk_ct.STOM_CT.months if len(df_mkt_data) >= days: stom = math.log(df_mkt_data.iloc[-days:]['turnover1'].sum()/risk_ct.STOM_CT.months) else: stom = math.log(df_mkt_data['turnover1'].sum()/risk_ct.STOM_CT.months) return pd.Series([Utils.code_to_symbol(code), stom], index=['code', 'stom'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股DTOA因子载荷 Parameters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的DTOA因子载荷 0. code 1. dtoa 如果计算失败, 返回None """ code = Utils.code_to_symbol(code) report_date = Utils.get_fin_report_date(calc_date) # 读取最新主要财务指标数据 fin_basic_data = Utils.get_fin_basic_data(code, report_date) if fin_basic_data is None: return None # td为负债总额, ta为总资产 td = fin_basic_data['TotalLiability'] if np.isnan(td): return None ta = fin_basic_data['TotalAsset'] if np.isnan(ta): return None if abs(ta) < utils_con.TINY_ABS_VALUE: return None # dtoa = td / ta dtoa = td / ta return pd.Series([code, dtoa], index=['code', 'dtoa'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股BETA因子载荷 Parameters: -------- :param code: str 个股代码, 如600000或SH600000 :param calc_date: datetime-like, str 计算日期, 格式YYYY-MM-DD :return: pd.Series -------- 个股的BETA因子和HSIGMA因子载荷 0. code: 个股代码 1. beta: BETA因子载荷 2. hsigma: HSIGMA因子载荷 若计算失败, 返回None """ # 取得个股复权行情数据 df_secu_quote = Utils.get_secu_daily_mkt( code, end=calc_date, ndays=risk_ct.BETA_CT.trailing + 1, fq=True) if df_secu_quote is None: return None df_secu_quote.reset_index(drop=True, inplace=True) # 取得基准复权行情数据 benchmark_code = risk_ct.BETA_CT.benchmark df_benchmark_quote = Utils.get_secu_daily_mkt(benchmark_code, end=calc_date, fq=True) if df_benchmark_quote is None: return None df_benchmark_quote = df_benchmark_quote[ df_benchmark_quote['date'].isin(list(df_secu_quote['date']))] df_benchmark_quote.reset_index(drop=True, inplace=True) # 计算个股和基准的日收益率序列 arr_secu_close = np.array(df_secu_quote.iloc[1:]['close']) arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close']) arr_secu_daily_ret = arr_secu_close / arr_secu_preclose - 1. arr_benchmark_close = np.array(df_benchmark_quote.iloc[1:]['close']) arr_benchmark_preclose = np.array( df_benchmark_quote.shift(1).iloc[1:]['close']) arr_benchmark_daily_ret = arr_benchmark_close / arr_benchmark_preclose - 1. # 计算权重(指数移动加权平均) T = len(arr_benchmark_daily_ret) time_spans = sorted(range(T), reverse=True) alpha = 1 - np.exp(np.log(0.5) / risk_ct.BETA_CT.half_life) x = [1 - alpha] * T y = [alpha] * (T - 1) y.insert(0, 1) weights = np.float_power(x, time_spans) * y # 采用加权最小二乘法计算Beta因子载荷及hsigma arr_benchmark_daily_ret = sm.add_constant(arr_benchmark_daily_ret) cap_model = sm.WLS(arr_secu_daily_ret, arr_benchmark_daily_ret, weights=weights) result = cap_model.fit() beta = result.params[1] hsigma = np.sqrt(result.mse_resid) return pd.Series([Utils.code_to_symbol(code), beta, hsigma], index=['code', 'beta', 'hsigma'])
def load_ipo_info(): """从网易财经下载个股的IPO数据""" cfg = ConfigParser() cfg.read('config.ini') ipo_info_url = cfg.get('ipo_info', 'ipo_info_url') db_path = Path(cfg.get('factor_db', 'db_path'), cfg.get('ipo_info', 'db_path')) # 读取所有已上市个股代码 # data_api = DataApi(addr='tcp://data.tushare.org:8910') # data_api.login('13811931480', 'eyJhbGciOiJIUzI1NiJ9.eyJjcmVhdGVfdGltZSI6IjE1MTI4Nzk0NTI2MjkiLCJpc3MiOiJhdXRoMCIsImlkIjoiMTM4MTE5MzE0ODAifQ.I0SXsA1bK--fbGu0B5Is2xdKOjALAeWBJRX6GdVmUL8') # df_stock_basics, msg = data_api.query(view='jz.instrumentInfo', # fields='status,list_date,name,market', # filter='inst_type=1&status=&market=SH,SZ&symbol=', # data_format='pandas') # if msg != '0,': # print('读取市场个股代码失败。') # return # df_stock_basics.symbol = df_stock_basics.symbol.map(lambda x: x.split('.')[0]) df_stock_basics = Utils.get_stock_basics(all=True) # 遍历个股, 下载ipo信息数据 df_ipo_info = DataFrame() for _, stock_info in df_stock_basics.iterrows(): # 如果个股ipo数据已存在, 则跳过 if db_path.joinpath('%s.csv' % stock_info.symbol).exists(): continue print('下载%s的IPO数据.' % stock_info.symbol) ipo_info_header = [] ipo_info_data = [] secu_code = Utils.code_to_symbol(stock_info.symbol) url = ipo_info_url % stock_info.symbol[2:] html = requests.get(url).content soup = BeautifulSoup(html, 'html.parser') tags = soup.find_all(name='h2') for tag in tags: if tag.get_text().strip() == 'IPO资料': ipo_table = tag.find_next(name='table') for tr in ipo_table.find_all(name='tr'): tds = tr.find_all(name='td') name = tds[0].get_text().replace(' ', '').replace( '\n', '').replace('\r', '') value = tds[1].get_text().replace(' ', '').replace( ',', '').replace('\n', '').replace('\r', '') ipo_info_header.append(name) ipo_info_data.append(value) ipo_info = Series(ipo_info_data, index=ipo_info_header) ipo_info['代码'] = secu_code ipo_info.to_csv(db_path.joinpath('%s.csv' % secu_code)) df_ipo_info = df_ipo_info.append(ipo_info, ignore_index=True) break if not df_ipo_info.empty: df_ipo_info.to_csv(db_path.joinpath('ipo_info.csv'), index=False, mode='a', header=False)
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股的BTOP因子载荷 Paramters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的BTOP因子载荷 0. code 1. btop 如果计算失败, 返回None """ # 读取个股的财务数据 fin_report_date = Utils.get_fin_report_date(calc_date) fin_basic_data = Utils.get_fin_basic_data(code, fin_report_date) if fin_basic_data is None: return None # 读取个股的市值因子(LNCAP) df_lncap = cls._LNCAP_Cache.get( Utils.datetimelike_to_str(calc_date, dash=False)) if df_lncap is None: lncap_path = os.path.join(factor_ct.FACTOR_DB.db_path, risk_ct.LNCAP_CT.db_file) df_lncap = Utils.read_factor_loading( lncap_path, Utils.datetimelike_to_str(calc_date, dash=False)) cls._LNCAP_Cache.set( Utils.datetimelike_to_str(calc_date, dash=False), df_lncap) secu_lncap = df_lncap[df_lncap['id'] == Utils.code_to_symbol(code)] if secu_lncap.empty: return None flncap = secu_lncap.iloc[0]['factorvalue'] # 账面市值比=净资产/市值 btop = (fin_basic_data['TotalAsset'] - fin_basic_data['TotalLiability']) * 10000 / np.exp(flncap) return pd.Series([Utils.code_to_symbol(code), btop], index=['code', 'btop'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股的LIQUIDITY因子载荷 Parameters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的LIQUIDILITY因子载荷 0. code 1. stom 月度换手率 2. stoq 季度换手率 3. stoa 年度换手率 4. liquidity 如果就是按失败, 返回None """ # 读取个股过去252个交易日的日行情数据(非复权) stom_days = risk_ct.LIQUID_CT.stom_days stoq_months = risk_ct.LIQUID_CT.stoq_months stoa_months = risk_ct.LIQUID_CT.stoa_months df_mkt_data = Utils.get_secu_daily_mkt(code, end=calc_date, ndays=stoa_months * stom_days, fq=False) if df_mkt_data is None or df_mkt_data.empty: return None # stom if len(df_mkt_data) >= stom_days: stom = math.log(df_mkt_data.iloc[-stom_days:]['turnover1'].sum()) else: stom = math.log(df_mkt_data['turnover1'].sum()) # stoq stoq_days = stom_days * stoq_months if len(df_mkt_data) >= stoq_days: stoq = math.log(df_mkt_data.iloc[-stoq_days:]['turnover1'].sum() / stoq_months) else: stoq = math.log(df_mkt_data['turnover1'].sum() / stoq_months) # stoa stoa = math.log(df_mkt_data['turnover1'].sum() / stoa_months) # liquidity = 0.35*stom + 0.35*stoq + 0.3*stoa stom_weight = risk_ct.LIQUID_CT.stom_weight stoq_weight = risk_ct.LIQUID_CT.stoq_weight stoa_weight = risk_ct.LIQUID_CT.stoa_weight liquidity = stom_weight * stom + stoq_weight * stoq + stoa_weight * stoa return pd.Series( [Utils.code_to_symbol(code), stom, stoq, stoa, liquidity], index=['code', 'stom', 'stoq', 'stoa', 'liquidity'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股筹码分布数据 Parameters: -------- :param code: str 个股代码, 如600000或SH600000 :param calc_date: datetime-like, str 计算日期, 格式YYYY-MM-DD :return: tuple(code, close, cyq_data) -------- 1. code 2. close: float 个股在calc_date的收盘价 3. cyq_data: pd.Series 个股从IPO开始、至calc_date为止的筹码分布数据 Series的index为筹码价格, values为对应每个筹码价格的持仓比例 若计算失败, 返回None """ # 读取个股IPO数据 ipo_data = Utils.get_ipo_info(code) if ipo_data is None: return None if ipo_data['发行价格'][:-1] == '--': return None ipo_price = float(ipo_data['发行价格'][:-1]) # 读取个股上市以来的日复权行情数据 mkt_data = Utils.get_secu_daily_mkt(code, end=calc_date, fq=True) secu_close = mkt_data.iloc[-1]['close'] # 计算每天的均价 mkt_data['vwap'] = np.around( mkt_data['amount'] / mkt_data['vol'] * mkt_data['factor'], 2) mkt_data.dropna(axis=0, how='any', inplace=True) # 行情数据按日期降序排列 mkt_data.sort_values(by='date', ascending=False, inplace=True) mkt_data.reset_index(drop=True, inplace=True) # 计算筹码分布 cyq_data = mkt_data[['vwap', 'turnover1']] cyq_data = cyq_data.append(Series([ipo_price, 0], index=['vwap', 'turnover1']), ignore_index=True) cyq_data['minusTR'] = 1 - cyq_data['turnover1'] cyq_data['cumprod_TR'] = cyq_data['minusTR'].cumprod().shift(1) cyq_data.loc[0, 'cumprod_TR'] = 1. cyq_data['cyq'] = cyq_data['turnover1'] * cyq_data['cumprod_TR'] secu_cyq = cyq_data['cyq'].groupby(cyq_data['vwap']).sum() # 如果筹码价格数量小于30个, 返回None if len(secu_cyq) < 30: return None secu_cyq = secu_cyq[secu_cyq.values > 0.00001] return (Utils.code_to_symbol(code), secu_close, secu_cyq)
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股的价值因子,包含ep_ttm, bp_lr, ocf_ttm Parameters: -------- :param code: str 个股代码:如600000或SH600000 :param calc_date: datetime-like or str 计算日期,格式YYYY-MM-DD, YYYYMMDD :return: pd.Series -------- 价值类因子值 0. ep_ttm: TTM净利润/总市值 1. bp_lr: 净资产(最新财报)/总市值 2. ocf_ttm: TTM经营性现金流/总市值 若计算失败,返回None """ code = Utils.code_to_symbol(code) calc_date = Utils.to_date(calc_date) # 读取TTM财务数据 ttm_fin_data = Utils.get_ttm_fin_basic_data(code, calc_date) if ttm_fin_data is None: return None # 读取最新财报数据 report_date = Utils.get_fin_report_date(calc_date) fin_basic_data = Utils.get_fin_basic_data(code, report_date) if fin_basic_data is None: return None # 计算总市值 mkt_daily = Utils.get_secu_daily_mkt(code, calc_date, fq=False, range_lookup=True) if mkt_daily.shape[0] == 0: return None cap_struct = Utils.get_cap_struct(code, calc_date) if cap_struct is None: return None total_cap = cap_struct.total - cap_struct.liquid_b - cap_struct.liquid_h total_mkt_cap = total_cap * mkt_daily.close # 计算价值类因子 ep_ttm = ttm_fin_data[ 'NetProfit'] * util_ct.FIN_DATA_AMOUNT_UNIT / total_mkt_cap ocf_ttm = ttm_fin_data[ 'NetOperateCashFlow'] * util_ct.FIN_DATA_AMOUNT_UNIT / total_mkt_cap bp_lr = fin_basic_data[ 'ShareHolderEquity'] * util_ct.FIN_DATA_AMOUNT_UNIT / total_mkt_cap return Series([round(ep_ttm, 6), round(bp_lr, 6), round(ocf_ttm, 6)], index=['ep_ttm', 'bp_lr', 'ocf_ttm'])
def load_fin_data_cwbbzy(): """导入上市公司财务报表摘要""" cfg = ConfigParser() cfg.read('config.ini') cwbbzy_url = cfg.get('fin_data', 'cwbbzy_url') cwbbzy_path = os.path.join(cfg.get('factor_db', 'db_path'), cfg.get('fin_data', 'cwbbzy_path')) # 读取个股代码 # data_api = DataApi(addr='tcp://data.tushare.org:8910') # data_api.login('13811931480', # 'eyJhbGciOiJIUzI1NiJ9.eyJjcmVhdGVfdGltZSI6IjE1MTI4Nzk0NTI2MjkiLCJpc3MiOiJhdXRoMCIsImlkIjoiMTM4MTE5MzE0ODAifQ.I0SXsA1bK--fbGu0B5Is2xdKOjALAeWBJRX6GdVmUL8') # df_stock_basics, msg = data_api.query(view='jz.instrumentInfo', # fields='status,list_date,name,market', # filter='inst_type=1&status=1&market=SH,SZ&symbol=', # data_format='pandas') # if msg != '0,': # print('读取市场个股代码失败。') # return # df_stock_basics.symbol = df_stock_basics.symbol.map(lambda x: x.split('.')[0]) df_stock_basics = Utils.get_stock_basics(all=True) # 遍历个股, 下载财务报表摘要数据 for _, stock_info in df_stock_basics.iterrows(): url = cwbbzy_url % stock_info.symbol[-6:] resp = requests.get(url) if resp.status_code != requests.codes.ok: print('%s的财务报表摘要数据下载失败!' % stock_info.symbol) continue print('下载%s的财务报表摘要数据.' % stock_info.symbol) fin_data = resp.text if '暂无数据' in fin_data: continue tmp = fin_data.split(',')[-1] fin_data = fin_data.replace(tmp, '') fin_data = fin_data.split('\r\n') fin_datas = [] for data in fin_data: s = data.split(',') fin_datas.append(s[:-1]) n = min([len(data) for data in fin_datas]) dict_fin_data = {data[0]: data[1:n] for data in fin_datas} fin_header = [data[0] for data in fin_datas] df_fin_data = DataFrame(dict_fin_data, columns=fin_header) df_fin_data = df_fin_data.sort_values(by=fin_header[0]) df_fin_data.to_csv(os.path.join( cwbbzy_path, '%s.csv' % Utils.code_to_symbol(stock_info.symbol)), index=False)
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股RSTR因子载荷 Parameters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的RSTR因子载荷 0. code 1. rstr 如果计算失败, 返回None """ # 取得个股复权行情数据 df_secu_quote = Utils.get_secu_daily_mkt( code, end=calc_date, ndays=risk_ct.RSTR_CT.trailing_start + 1, fq=True) if df_secu_quote is None: return None if len(df_secu_quote) < risk_ct.RSTR_CT.half_life * 2: return None df_secu_quote = df_secu_quote.head( len(df_secu_quote) - risk_ct.RSTR_CT.trailing_end) df_secu_quote.reset_index(drop=True, inplace=True) # 计算个股的日对数收益率 arr_secu_close = np.array(df_secu_quote.iloc[1:]['close']) arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close']) arr_secu_daily_ret = np.log(arr_secu_close / arr_secu_preclose) # 计算权重(指数移动加权平均) T = len(arr_secu_daily_ret) time_spans = sorted(range(T), reverse=True) alpha = 1 - np.exp(np.log(0.5) / risk_ct.RSTR_CT.half_life) x = [1 - alpha] * T y = [alpha] * (T - 1) y.insert(0, 1) weights = np.float_power(x, time_spans) * y # 计算RSTR rstr = np.sum(arr_secu_daily_ret * weights) return pd.Series([Utils.code_to_symbol(code), rstr], index=['code', 'rstr'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股SGRO因子载荷 Parameters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的SGRO因子载荷 0. code 1. sgro 如果计算失败, 返回None """ code = Utils.code_to_symbol(code) calc_date = Utils.to_date(calc_date) # 读取过去5年的主要财务指标数据 years = 5 prevN_years_finbasicdata = _get_prevN_years_finbasicdata( calc_date, code, years) if prevN_years_finbasicdata is None: return None # 复权因子调整后的主营业务收入对年度t进行线性回归(OLS), 计算斜率beta arr_revenue = np.asarray([ fin_basicdata['MainOperateRevenue'] for fin_basicdata in prevN_years_finbasicdata ]) if any(np.isnan(arr_revenue)): return None arr_t = np.arange(1, years + 1) arr_t = sm.add_constant(arr_t) model = sm.OLS(arr_revenue, arr_t) results = model.fit() beta = results.params[1] # 计算平均revenue avg_revenue = np.mean(arr_revenue) if abs(avg_revenue) < utils_con.TINY_ABS_VALUE: return None # sgro = beta / avg_revenue sgro = beta / avg_revenue return pd.Series([code, sgro], index=['code', 'sgro'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股EPFWD因子载荷 Parameters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的EPFWD因子载荷 0. code 1. epfwd 如果计算失败, 返回None """ code = Utils.code_to_symbol(code) # 读取个股的预期盈利数据 predictedearnings_data = Utils.get_consensus_data( calc_date, code, ConsensusType.PredictedEarings) if predictedearnings_data is None: # 如果个股的预期盈利数据不存在, 那么代替ttm净利润 ttm_fin_data = Utils.get_ttm_fin_basic_data(code, calc_date) if ttm_fin_data is None: return None predictedearnings_data = pd.Series( [code, ttm_fin_data['NetProfit']], index=['code', 'predicted_earnings']) fpredictedearnings = predictedearnings_data['predicted_earnings'] if np.isnan(fpredictedearnings): return None # 读取个股市值 size_path = os.path.join(factor_ct.FACTOR_DB.db_path, risk_ct.LNCAP_CT.db_file) size_factor_loading = Utils.read_factor_loading( size_path, Utils.datetimelike_to_str(calc_date, dash=False), code) if size_factor_loading.empty: return None # epfwd = 盈利预期/市值 epfwd = fpredictedearnings * 10000.0 / np.exp( size_factor_loading['factorvalue']) return pd.Series([code, epfwd], index=['code', 'epfwd'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股MLEV因子载荷 Parameters: -------- :param code: str 个股代码, 如Sh600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的MLEV因子载荷 0. code 1. mlev 如果计算失败, 返回None """ code = Utils.code_to_symbol(code) report_date = Utils.get_fin_report_date(calc_date) # 读取个股最新财务报表摘要数据 fin_summary_data = Utils.get_fin_summary_data(code, report_date) # ld为个股长期负债的账面价值, 如果缺失长期负债数据, 则用负债总计代替 if fin_summary_data is None: return None ld = fin_summary_data['TotalNonCurrentLiabilities'] if np.isnan(ld): ld = fin_summary_data['TotalLiabilities'] if np.isnan(ld): return None ld *= 10000.0 # pe为优先股账面价值, 对于A股pe设置为0 pe = 0.0 # 读取个股市值数据 lncap_path = os.path.join(factor_ct.FACTOR_DB.db_path, risk_ct.LNCAP_CT.db_file) lncap_factor_loading = Utils.read_factor_loading( lncap_path, Utils.datetimelike_to_str(calc_date, dash=False), code) if lncap_factor_loading.empty: return None me = np.exp(lncap_factor_loading['factorvalue']) # mlev = (me + pe + ld)/me mlev = (me + pe + ld) / me return pd.Series([code, mlev], index=['code', 'mlev'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股DASTD因子载荷 Parameters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的DASTD因子载荷 0. code 1. dastr 如果计算失败, 返回None """ # 取得个股复权行情数据 df_secu_quote = Utils.get_secu_daily_mkt( code, end=calc_date, ndays=risk_ct.DASTD_CT.trailing + 1, fq=True) if df_secu_quote is None: return None # 如果行情数据长度小于trailing的一半(即126个交易日),那么返回None if len(df_secu_quote) < int(risk_ct.DASTD_CT.trailing / 2): return None df_secu_quote.reset_index(drop=True, inplace=True) # 计算个股的日对数收益率序列及收益率均值 arr_secu_close = np.array(df_secu_quote.iloc[1:]['close']) arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close']) arr_secu_daily_ret = np.log(arr_secu_close / arr_secu_preclose) avg_daily_ret = np.mean(arr_secu_daily_ret) # 计算权重(指数移动加权平均) T = len(arr_secu_daily_ret) time_spans = sorted(range(T), reverse=True) alpha = 1 - np.exp(np.log(0.5) / risk_ct.DASTD_CT.half_life) x = [1 - alpha] * T y = [alpha] * (T - 1) y.insert(0, 1) weights = np.float_power(x, time_spans) * y # 计算个股DASTD因子值 dastd = np.sqrt( np.sum((arr_secu_daily_ret - avg_daily_ret)**2 * weights)) return pd.Series([Utils.code_to_symbol(code), dastd], index=['code', 'dastd'])
def _calc_factor_loading_proc(cls, code, calc_date, q): """ 用于并行计算因子载荷 Parameters: -------- :param code: str 个股代码, 如600000, SH600000 :param calc_date: datetime-like or str 计算日期, 格式: YYYY-MM-DD :param q: 队列, 用于进程间通信 :return: 添加因子载荷至队列中 """ logging.debug('[%s] Calc BETA factor of %s.' % (Utils.datetimelike_to_str(calc_date), code)) beta_data = None try: beta_data = cls._calc_factor_loading(code, calc_date) except Exception as e: print(e) if beta_data is None: beta_data = pd.Series([Utils.code_to_symbol(code), np.nan, np.nan], index=['code', 'beta', 'hsigma']) q.put(beta_data)
def _calc_factor_loading_proc(cls, code, calc_date, q): """ 用于并行计算因子载荷 Parameters: --------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :param q: 队列, 用于进程间通信 :return: 添加因子载荷至队列 """ logging.debug('[{}] Calc EPFWD factor of {}.'.format(Utils.datetimelike_to_str(calc_date), code)) epfwd_data = None try: epfwd_data = cls._calc_factor_loading(code, calc_date) except Exception as e: print(e) if epfwd_data is None: epfwd_data = pd.Series([Utils.code_to_symbol(code), np.nan], index=['code', 'epfwd']) q.put(epfwd_data)
def _calc_factor_loading_proc(cls, code, calc_date, q): """ 用于并行计算因子载荷 Parameters -------- :param code: str 个股代码,如600000或SH600000 :param calc_date: datetime-like or str 计算日期,格式:YYYY-MM-DD :param q: 队列,用于进程间通信 :return: 添加因子载荷至队列中 """ logging.info('[%s] Calc Intaday Momentum of %s.' % (Utils.datetimelike_to_str(calc_date), code)) momentum_data = None try: momentum_data = cls._calc_factor_loading(code, calc_date) except Exception as e: print(e) if momentum_data is not None: q.put((Utils.code_to_symbol(code), momentum_data.m0, momentum_data.m1, momentum_data.m2, momentum_data.m3, momentum_data.m4, momentum_data.m_normal))
def _calc_factor_loading_proc(cls, code, calc_date, q): """ 用于并行计算因子载荷 Parameters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :param q: 队列, 用于进程间通信 :return: 添加因子载荷至队列 """ logging.info('[%s] Calc DASTD factor of %s.' % (Utils.datetimelike_to_str(calc_date), code)) dastd_data = None try: dastd_data = cls._calc_factor_loading(code, calc_date) except Exception as e: print(e) if dastd_data is None: dastd_data = pd.Series([Utils.code_to_symbol(code), np.nan], index=['code', 'dastd']) q.put(dastd_data)
def _calc_factor_loading_proc(cls, code, calc_date, q): """ 用于并行计算因子载荷 Parameters -------- :param code: str 个股代码,如600000或SH600000 :param calc_date: datetime-like or str 计算日期 :param q: 队列,用于进程间通信 :return: 添加因子载荷至队列q中 """ logging.info('[%s] Calc SmartQ of %s.' % (calc_date.strftime('%Y-%m-%d'), code)) smart_q = None try: smart_q = cls._calc_factor_loading(code, calc_date) except Exception as e: print(e) if smart_q is not None: q.put((Utils.code_to_symbol(code), smart_q))
def load_st_info(): """导入个股st带帽摘帽时间信息""" cfg = ConfigParser() cfg.read('config.ini') factor_db_path = cfg.get('factor_db', 'db_path') raw_data_path = cfg.get('st_info', 'raw_data_path') st_info_path = cfg.get('st_info', 'st_info_path') st_start_types = cfg.get('st_info', 'st_start_types').split(',') st_end_types = cfg.get('st_info', 'st_end_types').split(',') if not os.path.isfile(os.path.join(raw_data_path, 'st_info.csv')): print('\033[1;31;40mst_info.csv原始文件不存在.\033[0m') return df_st_rawinfo = pd.read_csv(os.path.join(raw_data_path, 'st_info.csv'), header=0) df_st_rawinfo = df_st_rawinfo[(df_st_rawinfo['st_info'] != '0') & (~df_st_rawinfo['st_info'].isna())] df_st_info = pd.DataFrame(columns=['code', 'st_start', 'st_end']) for _, st_data in df_st_rawinfo.iterrows(): st_start_date = None st_end_date = None code = Utils.code_to_symbol(st_data['code']) st_info_list = st_data['st_info'].split(',') st_info_list = st_info_list[::-1] for st_info in st_info_list: if ':' in st_info: st_type = st_info.split(':')[0] st_date = st_info.split(':')[1] if not (st_type in st_start_types or st_type in st_end_types): print('st type: {} is not counted.'.format(st_type)) continue if st_type in st_start_types and st_start_date is None: st_start_date = st_date elif st_type in st_end_types and st_start_date is not None: st_end_date = st_date df_st_info = df_st_info.append(pd.Series([code, st_start_date, st_end_date], index=['code', 'st_start', 'st_end']), ignore_index=True) st_start_date = None st_end_date = None if st_start_date is not None and st_end_date is None: df_st_info = df_st_info.append(pd.Series([code, st_start_date, '20301231'], index=['code', 'st_start', 'st_end']), ignore_index=True) df_st_info.to_csv(os.path.join(factor_db_path, st_info_path, 'st_info.csv'), index=False)
def _calc_factor_loading_proc(cls, code, calc_date, q): """ 用于并行计算因子载荷 Parameters: -------- :param code: str 个股代码,如600000或SH600000 :param calc_date: datetime-like or str 计算日期,格式YYYY-MM-DD or YYYYMMDD :param q: 队列,用于进程间通信 :return: 添加因子载荷至队列中 """ logging.info('[%s] Calc Momentum factor of %s.' % (Utils.datetimelike_to_str(calc_date), code)) momentum = None try: momentum = cls._calc_factor_loading(code, calc_date) except Exception as e: print(e) if momentum is not None: momentum['id'] = Utils.code_to_symbol(code) q.put(momentum)
def _calc_factor_loading_proc1(cls, code, calc_date, q): """ 用于并行计算因子载荷 Parameters: -------- :param code: str 个股代码, 如600000 or SH600000 :param calc_date: datetime-like or str 计算日期, 格式: YYYY-MM-DD :param q: 队列, 用于进程间通信 :return: 添加因子载荷至队列中 """ logging.info('[%s] Calc CYQ factor of %s.' % (Utils.datetimelike_to_str(calc_date), code)) cyq_data = None try: cyq_data = cls._calc_factor_loading(code, calc_date) except Exception as e: print(e) if cyq_data is not None: cyq_data['id'] = Utils.code_to_symbol(code) q.put(cyq_data)
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股的成长因子,包含npg_ttm, opg_ttm Parameters: -------- :param code: str 个股代码,如600000或SH600000 :param calc_date: datetime-like or str 计算日期,格式YYYY-MM-DD, YYYYMMDD :return: pd.Series -------- 成长类因子值 0. id: 证券代码 1. npg_ttm: 净利润增长率_TTM 2. opg_ttm: 营业收入增长率_TTM 若计算失败, 返回None """ code = Utils.code_to_symbol(code) calc_date = Utils.to_date(calc_date) # 读取最新的TTM财务数据 ttm_fin_data_latest = Utils.get_ttm_fin_basic_data(code, calc_date) if ttm_fin_data_latest is None: return None # 读取去年同期TTM财务数据 try: pre_date = datetime.datetime(calc_date.year-1, calc_date.month, calc_date.day) except ValueError: pre_date = calc_date - datetime.timedelta(days=366) ttm_fin_data_pre = Utils.get_ttm_fin_basic_data(code, pre_date) if ttm_fin_data_pre is None: return None # 计算成长类因子值 if abs(ttm_fin_data_pre['NetProfit']) < 0.1: return None npg_ttm = (ttm_fin_data_latest['NetProfit'] - ttm_fin_data_pre['NetProfit']) / abs(ttm_fin_data_pre['NetProfit']) if abs(ttm_fin_data_pre['MainOperateRevenue']) < 0.1: return None opg_ttm = (ttm_fin_data_latest['MainOperateRevenue'] - ttm_fin_data_pre['MainOperateRevenue']) / abs(ttm_fin_data_pre['MainOperateRevenue']) return Series([code, round(npg_ttm, 4), round(opg_ttm, 4)], index=['id', 'npg_ttm', 'opg_ttm'])
def _calc_factor_loading_proc(cls, code, calc_date, q): """ 用于并行计算因子载荷 Parameters: -------- :param code: str 个股代码,如600000或SH600000 :param calc_date: datetime-like or str 计算日期,格式YYYY-MM-DD 或YYYYMMDD :param q: 队列,用于进程间通信 :return: 添加因子载荷至队列中 """ logging.info('[%s] Calc market capitalization of %s' % (Utils.datetimelike_to_str(calc_date), code)) mkt_cap = None try: mkt_cap = cls._calc_factor_loading(code, calc_date) except Exception as e: print(e) if mkt_cap is not None: # logging.info("[%s'] %s's total mkt cap = %.0f, liquid mkt cap = %.0f" % (Utils.datetimelike_to_str(calc_date), code, mkt_cap.LnTotalMktCap, mkt_cap.LnLiquidMktCap)) q.put((Utils.code_to_symbol(code), mkt_cap.LnTotalMktCap, mkt_cap.LnLiquidMktCap))
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股BLEV因子载荷 Parameters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的BLEV因子载荷 0. code 1. blev 如果计算失败, 返回None """ code = Utils.code_to_symbol(code) report_date = Utils.get_fin_report_date(calc_date) # 读取个股最新财务报表摘要数据 fin_summary_data = Utils.get_fin_summary_data(code, report_date) if fin_summary_data is None: return None be = fin_summary_data['TotalShareholderEquity'] if np.isnan(be): return None if abs(be) < utils_con.TINY_ABS_VALUE: return None ld = fin_summary_data['TotalNonCurrentLiabilities'] if np.isnan(ld): ld = fin_summary_data['TotalLiabilities'] if np.isnan(ld): return None pe = 0 # blev = (be + pe + ld) / be blev = (be + pe + ld) / be return pd.Series([code, blev], index=['code', 'blev'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股ETOP因子载荷 Parameters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的ETOP因子载荷 0. code 1. etop 如果计算失败, 返回None """ code = Utils.code_to_symbol(code) # 读取个股的ttm净利润 ttm_fin_data = Utils.get_ttm_fin_basic_data(code, calc_date) if ttm_fin_data is None: return None ttm_netprofit = ttm_fin_data['NetProfit'] if np.isnan(ttm_netprofit): return None # 读取个股市值 lncap_path = os.path.join(factor_ct.FACTOR_DB.db_path, risk_ct.LNCAP_CT.db_file) lncap_data = Utils.read_factor_loading( lncap_path, Utils.datetimelike_to_str(calc_date, dash=False), code) if lncap_data.empty: return None secu_cap = np.exp(lncap_data['factorvalue']) # etop = ttm净利润/市值 etop = ttm_netprofit * 10000 / secu_cap return pd.Series([code, etop], index=['code', 'etop'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股的规模因子值 Parameters: -------- :param code: str 个股代码,如600000、SH600000 :param calc_date: datetime-like, str 规模因子计算日期,格式YYYY-MM-DD或YYYYMMDD :return: pd.Series -------- 个股规模因子值,各个index对应的含义如下: 0. LnTotalMktCap: 总市值对数 1. LnLiquidMktCap: 流通市值对数 若计算失败,返回None """ # 取得证券截止指定日期最新的非复权行情数据 code = Utils.code_to_symbol(code) calc_date = Utils.to_date(calc_date) mkt_daily = Utils.get_secu_daily_mkt(code, calc_date, fq=False, range_lookup=True) if mkt_daily.shape[0] == 0: return None # 取得证券截止指定日期前最新的股本结构数据 cap_struct = Utils.get_cap_struct(code, calc_date) if cap_struct is None: return None # 计算证券的规模因子 scale_factor = Series() total_cap = cap_struct.total - cap_struct.liquid_b - cap_struct.liquid_h scale_factor['LnTotalMktCap'] = math.log(total_cap * mkt_daily.close) scale_factor['LnLiquidMktCap'] = math.log(cap_struct.liquid_a * mkt_daily.close) return scale_factor
def calc_factor_loading(cls, start_date, end_date=None, month_end=True, save=False, **kwargs): """ 计算指定日期的样本个股的因子载荷,并保存至因子数据库 Parameters -------- :param start_date: datetime-like, str 开始日期 :param end_date: datetime-like, str,默认None 结束日期,如果为None,则只计算start_date日期的因子载荷 :param month_end: bool,默认True 只计算月末时点的因子载荷 :param save: 是否保存至因子数据库,默认为False :param kwargs: 'multi_proc': bool, True=采用多进程并行计算, False=采用单进程计算, 默认为False :return: 因子载荷,DataFrame -------- 因子载荷,DataFrame 0. date, 日期, 为计算日期的下一个交易日 1: id, 证券代码 2: factorvalue, 因子载荷 如果end_date=None,返回start_date对应的因子载荷数据 如果end_date!=None,返回最后一天的对应的因子载荷数据 如果没有计算数据,返回None """ # 0.取得交易日序列 start_date = Utils.to_date(start_date) if end_date is not None: end_date = Utils.to_date(end_date) trading_days_series = Utils.get_trading_days(start=start_date, end=end_date) else: trading_days_series = Utils.get_trading_days(end=start_date, ndays=1) # 取得样本个股信息 # all_stock_basics = CDataHandler.DataApi.get_secu_basics() # 遍历交易日序列,计算SMartQ因子载荷 dict_factor = None for calc_date in trading_days_series: dict_factor = {'id': [], 'factorvalue': []} if month_end and (not Utils.is_month_end(calc_date)): continue # 1.获取用于读取分钟行情的交易日列表(过去30天的交易日列表,降序排列) # trading_days = _get_trading_days(calc_date, 30) # trading_days = Utils.get_trading_days(end=calc_date, ndays=30, ascending=False) # 2.取得样本个股信息 # stock_basics = ts.get_stock_basics() s = (calc_date - datetime.timedelta(days=90)).strftime('%Y%m%d') stock_basics = Utils.get_stock_basics(s) # 3.遍历样本个股代码,计算Smart_Q因子载荷值 dict_factor = {'date': None, 'id': [], 'factorvalue': []} if 'multi_proc' not in kwargs: kwargs['multi_proc'] = False if not kwargs['multi_proc']: # 采用单进程进行计算 for _, stock_info in stock_basics.iterrows(): # code = '%s%s' % ('SH' if code[:2] == '60' else 'SZ', code) factor_loading = cls._calc_factor_loading( stock_info.symbol, calc_date) print( "[%s]Calculating %s's SmartMoney factor loading = %.4f." % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol, -1.0 if factor_loading is None else factor_loading)) if factor_loading is not None: # df_factor.ix[code, 'factorvalue'] = factor_loading dict_factor['id'].append( Utils.code_to_symbol(stock_info.symbol)) dict_factor['factorvalue'].append(factor_loading) else: # 采用多进程并行计算SmartQ因子载荷 q = Manager().Queue() # 队列,用于进程间通信,存储每个进程计算的因子载荷值 p = Pool(4) # 进程池,最多同时开启4个进程 for _, stock_info in stock_basics.iterrows(): p.apply_async(cls._calc_factor_loading_proc, args=( stock_info.symbol, calc_date, q, )) p.close() p.join() while not q.empty(): smart_q = q.get(True) dict_factor['id'].append(smart_q[0]) dict_factor['factorvalue'].append(smart_q[1]) date_label = Utils.get_trading_days(calc_date, ndays=2)[1] dict_factor['date'] = [date_label] * len(dict_factor['id']) # 4.计算去极值标准化后的因子载荷 df_std_factor = Utils.normalize_data(pd.DataFrame(dict_factor), columns='factorvalue', treat_outlier=True, weight='eq') # 5.保存因子载荷至因子数据库 if save: # Utils.factor_loading_persistent(cls._db_file, calc_date.strftime('%Y%m%d'), dict_factor) cls._save_factor_loading(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), dict_factor, 'SmartMoney', factor_type='raw', columns=['date', 'id', 'factorvalue']) cls._save_factor_loading(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), df_std_factor, 'SmartMoney', factor_type='standardized', columns=['date', 'id', 'factorvalue']) # 休息300秒 logging.info('Suspending for 360s.') time.sleep(360) return dict_factor
def calc_factor_loading(cls, start_date, end_date=None, month_end=True, save=False, **kwargs): """ 计算指定日期的样本个股的因子载荷, 并保存至因子数据库 Parameters: -------- :param start_date: datetime-like, str 开始日期, 格式: YYYY-MM-DD or YYYYMMDD :param end_date: datetime-like, str 结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD :param month_end: bool, 默认为True 如果为True, 则只计算月末时点的因子载荷 :param save: bool, 默认为True 是否保存至因子数据库 :param kwargs: 'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False :return: dict 因子载荷 """ # 取得交易日序列及股票基本信息表 start_date = Utils.to_date(start_date) if end_date is not None: end_date = Utils.to_date(end_date) trading_days_series = Utils.get_trading_days(start=start_date, end=end_date) else: trading_days_series = Utils.get_trading_days(end=start_date, ndays=1) all_stock_basics = CDataHandler.DataApi.get_secu_basics() # 遍历交易日序列, 计算DASTD因子载荷 dict_dastd = None for calc_date in trading_days_series: if month_end and (not Utils.is_month_end(calc_date)): continue logging.info('[%s] Calc DASTD factor loading.' % Utils.datetimelike_to_str(calc_date)) # 遍历个股, 计算个股的DASTD因子值 s = (calc_date - datetime.timedelta(days=risk_ct.DASTD_CT.listed_days)).strftime('%Y%m%d') stock_basics = all_stock_basics[all_stock_basics.list_date < s] ids = [] # 个股代码list dastds = [] # DASTD因子值list if 'multi_proc' not in kwargs: kwargs['multi_proc'] = False if not kwargs['multi_proc']: # 采用单进程计算DASTD因子值 for _, stock_info in stock_basics.iterrows(): logging.info("[%s] Calc %s's DASTD factor loading." % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol)) dastd_data = cls._calc_factor_loading(stock_info.symbol, calc_date) if dastd_data is None: ids.append(Utils.code_to_symbol(stock_info.symbol)) dastds.append(np.nan) else: ids.append(dastd_data['code']) dastds.append(dastd_data['dastd']) else: # 采用多进程并行计算DASTD因子值 q = Manager().Queue() # 队列, 用于进程间通信, 存储每个进程计算的因子载荷 p = Pool(4) # 进程池, 最多同时开启4个进程 for _, stock_info in stock_basics.iterrows(): p.apply_async(cls._calc_factor_loading_proc, args=(stock_info.symbol, calc_date, q,)) p.close() p.join() while not q.empty(): dastd_data = q.get(True) ids.append(dastd_data['code']) dastds.append(dastd_data['dastd']) date_label = Utils.get_trading_days(start=calc_date, ndays=2)[1] dict_dastd = {'date': [date_label]*len(ids), 'id': ids, 'factorvalue': dastds} if save: Utils.factor_loading_persistent(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), dict_dastd, ['date', 'id', 'factorvalue']) # 暂停180秒 logging.info('Suspending for 180s.') # time.sleep(180) return dict_dastd
def _calc_factor_loading(cls, code, calc_date): """ Parameter: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的CMRA因子载荷 0. code 1. cmra 如果计算失败, 返回None """ # 取得个股日复权行情数据 # df_secu_quote = Utils.get_secu_daily_mkt(code, end=calc_date,ndays=risk_ct.CMRA_CT.trailing*risk_ct.CMRA_CT.days_scale+1, fq=True) # if df_secu_quote is None: # return None # if len(df_secu_quote) < risk_ct.CMRA_CT.listed_days: # return None # df_secu_quote.reset_index(drop=True, inplace=True) # 计算个股的日对数收益率序列 # arr_secu_close = np.array(df_secu_quote.iloc[1:]['close']) # arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close']) # arr_secu_daily_ret = np.log(arr_secu_close / arr_secu_preclose) # 每个月计算累积收益率 # z = [] # for t in range(1, risk_ct.CMRA_CT.trailing+1): # k = t * risk_ct.CMRA_CT.days_scale - 1 # if k > len(arr_secu_daily_ret) - 1: # k = len(arr_secu_daily_ret) - 1 # z.append(np.sum(arr_secu_daily_ret[:k])) # break # else: # z.append(np.sum(arr_secu_daily_ret[:k])) # 计算每个月的个股价格变化率(1+r) # z = [] # for t in range(1, risk_ct.CMRA_CT.trailing+1): # k = t * risk_ct.CMRA_CT.days_scale # if k > len(df_secu_quote)-1: # k = len(df_secu_quote)-1 # z.append(df_secu_quote.iloc[k]['close']/df_secu_quote.iloc[0]['close']) # break # else: # z.append(df_secu_quote.iloc[k]['close']/df_secu_quote.iloc[0]['close']) # cmra = np.log(max(z)) - np.log(min(z)) # 取得交易日序列 trading_days = Utils.get_trading_days(end=calc_date, ndays=risk_ct.CMRA_CT.trailing*risk_ct.CMRA_CT.days_scale+1) trading_days = [day.strftime('%Y-%m-%d') for day in trading_days] # 取得个股复权行情数据 df_secu_quote = Utils.get_secu_daily_mkt(code, end=calc_date, fq=True) # 提取相应交易日的个股复权行情数据 df_secu_quote = df_secu_quote[df_secu_quote['date'].isin(trading_days)] df_secu_quote.reset_index(drop=True, inplace=True) # 计算个股每个月的个股价格变化率 z = [] if len(df_secu_quote) < int(risk_ct.CMRA_CT.trailing*risk_ct.CMRA_CT.days_scale/2): # 如果提取的个股复权行情长度小于所需时间长度的一半(126个交易日), 返回None return None else: prev_trading_day = df_secu_quote.iloc[0]['date'] for t in range(1, risk_ct.CMRA_CT.trailing+1): k = t * risk_ct.CMRA_CT.days_scale trading_day = trading_days[k] if trading_day < df_secu_quote.iloc[0]['date']: continue # try: secu_trading_day = df_secu_quote[df_secu_quote['date'] <= trading_day].iloc[-1]['date'] if secu_trading_day <= prev_trading_day: continue else: ret = df_secu_quote[df_secu_quote['date']==secu_trading_day].iloc[0]['close']/df_secu_quote.iloc[0]['close'] z.append(ret) prev_trading_day = secu_trading_day # except Exception as e: # print(e) cmra = math.log(max(z)) - math.log(min(z)) return pd.Series([Utils.code_to_symbol(code), cmra], index=['code', 'cmra'])