Ejemplo n.º 1
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股的STOM因子载荷
     Parameters:
     --------
     :param code: str
         个股代码, 如SH600000, 600000
     :param calc_date: datetime-like or str
         计算日期, 格式: YYYY-MM-DD
     :return: pd.Series
     --------
         个股的STOM因子载荷
         0. code
         1. stom
         如果计算失败, 返回None
     """
     # 读取个股过去252个交易日的日行情数据(非复权)
     df_mkt_data = Utils.get_secu_daily_mkt(code, end=calc_date, ndays=252, fq=False)
     if df_mkt_data is None or df_mkt_data.empty:
         return None
     # stom
     days = risk_ct.STOM_CT.month_days * risk_ct.STOM_CT.months
     if len(df_mkt_data) >= days:
         stom = math.log(df_mkt_data.iloc[-days:]['turnover1'].sum()/risk_ct.STOM_CT.months)
     else:
         stom = math.log(df_mkt_data['turnover1'].sum()/risk_ct.STOM_CT.months)
     
     return pd.Series([Utils.code_to_symbol(code), stom], index=['code', 'stom'])
Ejemplo n.º 2
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股DTOA因子载荷
        Parameters:
        --------
        :param code: str
            个股代码, 如SH600000, 600000
        :param calc_date: datetime-like, str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的DTOA因子载荷
            0. code
            1. dtoa
            如果计算失败, 返回None
        """
        code = Utils.code_to_symbol(code)
        report_date = Utils.get_fin_report_date(calc_date)
        # 读取最新主要财务指标数据
        fin_basic_data = Utils.get_fin_basic_data(code, report_date)
        if fin_basic_data is None:
            return None
        # td为负债总额, ta为总资产
        td = fin_basic_data['TotalLiability']
        if np.isnan(td):
            return None
        ta = fin_basic_data['TotalAsset']
        if np.isnan(ta):
            return None
        if abs(ta) < utils_con.TINY_ABS_VALUE:
            return None
        # dtoa = td / ta
        dtoa = td / ta

        return pd.Series([code, dtoa], index=['code', 'dtoa'])
Ejemplo n.º 3
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股BETA因子载荷
     Parameters:
     --------
     :param code: str
         个股代码, 如600000或SH600000
     :param calc_date: datetime-like, str
         计算日期, 格式YYYY-MM-DD
     :return: pd.Series
     --------
         个股的BETA因子和HSIGMA因子载荷
         0. code: 个股代码
         1. beta: BETA因子载荷
         2. hsigma: HSIGMA因子载荷
         若计算失败, 返回None
     """
     # 取得个股复权行情数据
     df_secu_quote = Utils.get_secu_daily_mkt(
         code, end=calc_date, ndays=risk_ct.BETA_CT.trailing + 1, fq=True)
     if df_secu_quote is None:
         return None
     df_secu_quote.reset_index(drop=True, inplace=True)
     # 取得基准复权行情数据
     benchmark_code = risk_ct.BETA_CT.benchmark
     df_benchmark_quote = Utils.get_secu_daily_mkt(benchmark_code,
                                                   end=calc_date,
                                                   fq=True)
     if df_benchmark_quote is None:
         return None
     df_benchmark_quote = df_benchmark_quote[
         df_benchmark_quote['date'].isin(list(df_secu_quote['date']))]
     df_benchmark_quote.reset_index(drop=True, inplace=True)
     # 计算个股和基准的日收益率序列
     arr_secu_close = np.array(df_secu_quote.iloc[1:]['close'])
     arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close'])
     arr_secu_daily_ret = arr_secu_close / arr_secu_preclose - 1.
     arr_benchmark_close = np.array(df_benchmark_quote.iloc[1:]['close'])
     arr_benchmark_preclose = np.array(
         df_benchmark_quote.shift(1).iloc[1:]['close'])
     arr_benchmark_daily_ret = arr_benchmark_close / arr_benchmark_preclose - 1.
     # 计算权重(指数移动加权平均)
     T = len(arr_benchmark_daily_ret)
     time_spans = sorted(range(T), reverse=True)
     alpha = 1 - np.exp(np.log(0.5) / risk_ct.BETA_CT.half_life)
     x = [1 - alpha] * T
     y = [alpha] * (T - 1)
     y.insert(0, 1)
     weights = np.float_power(x, time_spans) * y
     # 采用加权最小二乘法计算Beta因子载荷及hsigma
     arr_benchmark_daily_ret = sm.add_constant(arr_benchmark_daily_ret)
     cap_model = sm.WLS(arr_secu_daily_ret,
                        arr_benchmark_daily_ret,
                        weights=weights)
     result = cap_model.fit()
     beta = result.params[1]
     hsigma = np.sqrt(result.mse_resid)
     return pd.Series([Utils.code_to_symbol(code), beta, hsigma],
                      index=['code', 'beta', 'hsigma'])
Ejemplo n.º 4
0
def load_ipo_info():
    """从网易财经下载个股的IPO数据"""
    cfg = ConfigParser()
    cfg.read('config.ini')
    ipo_info_url = cfg.get('ipo_info', 'ipo_info_url')
    db_path = Path(cfg.get('factor_db', 'db_path'),
                   cfg.get('ipo_info', 'db_path'))
    # 读取所有已上市个股代码
    # data_api = DataApi(addr='tcp://data.tushare.org:8910')
    # data_api.login('13811931480', 'eyJhbGciOiJIUzI1NiJ9.eyJjcmVhdGVfdGltZSI6IjE1MTI4Nzk0NTI2MjkiLCJpc3MiOiJhdXRoMCIsImlkIjoiMTM4MTE5MzE0ODAifQ.I0SXsA1bK--fbGu0B5Is2xdKOjALAeWBJRX6GdVmUL8')
    # df_stock_basics, msg = data_api.query(view='jz.instrumentInfo',
    #                                       fields='status,list_date,name,market',
    #                                       filter='inst_type=1&status=&market=SH,SZ&symbol=',
    #                                       data_format='pandas')
    # if msg != '0,':
    #     print('读取市场个股代码失败。')
    #     return
    # df_stock_basics.symbol = df_stock_basics.symbol.map(lambda x: x.split('.')[0])

    df_stock_basics = Utils.get_stock_basics(all=True)
    # 遍历个股, 下载ipo信息数据
    df_ipo_info = DataFrame()
    for _, stock_info in df_stock_basics.iterrows():
        # 如果个股ipo数据已存在, 则跳过
        if db_path.joinpath('%s.csv' % stock_info.symbol).exists():
            continue

        print('下载%s的IPO数据.' % stock_info.symbol)
        ipo_info_header = []
        ipo_info_data = []

        secu_code = Utils.code_to_symbol(stock_info.symbol)
        url = ipo_info_url % stock_info.symbol[2:]
        html = requests.get(url).content
        soup = BeautifulSoup(html, 'html.parser')
        tags = soup.find_all(name='h2')
        for tag in tags:
            if tag.get_text().strip() == 'IPO资料':
                ipo_table = tag.find_next(name='table')
                for tr in ipo_table.find_all(name='tr'):
                    tds = tr.find_all(name='td')
                    name = tds[0].get_text().replace(' ', '').replace(
                        '\n', '').replace('\r', '')
                    value = tds[1].get_text().replace(' ', '').replace(
                        ',', '').replace('\n', '').replace('\r', '')
                    ipo_info_header.append(name)
                    ipo_info_data.append(value)
                ipo_info = Series(ipo_info_data, index=ipo_info_header)
                ipo_info['代码'] = secu_code
                ipo_info.to_csv(db_path.joinpath('%s.csv' % secu_code))
                df_ipo_info = df_ipo_info.append(ipo_info, ignore_index=True)
                break
    if not df_ipo_info.empty:
        df_ipo_info.to_csv(db_path.joinpath('ipo_info.csv'),
                           index=False,
                           mode='a',
                           header=False)
Ejemplo n.º 5
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股的BTOP因子载荷
     Paramters:
     --------
     :param code: str
         个股代码, 如SH600000, 600000
     :param calc_date: datetime-like, str
         计算日期, 格式: YYYY-MM-DD
     :return: pd.Series
     --------
         个股的BTOP因子载荷
         0. code
         1. btop
         如果计算失败, 返回None
     """
     # 读取个股的财务数据
     fin_report_date = Utils.get_fin_report_date(calc_date)
     fin_basic_data = Utils.get_fin_basic_data(code, fin_report_date)
     if fin_basic_data is None:
         return None
     # 读取个股的市值因子(LNCAP)
     df_lncap = cls._LNCAP_Cache.get(
         Utils.datetimelike_to_str(calc_date, dash=False))
     if df_lncap is None:
         lncap_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                   risk_ct.LNCAP_CT.db_file)
         df_lncap = Utils.read_factor_loading(
             lncap_path, Utils.datetimelike_to_str(calc_date, dash=False))
         cls._LNCAP_Cache.set(
             Utils.datetimelike_to_str(calc_date, dash=False), df_lncap)
     secu_lncap = df_lncap[df_lncap['id'] == Utils.code_to_symbol(code)]
     if secu_lncap.empty:
         return None
     flncap = secu_lncap.iloc[0]['factorvalue']
     # 账面市值比=净资产/市值
     btop = (fin_basic_data['TotalAsset'] -
             fin_basic_data['TotalLiability']) * 10000 / np.exp(flncap)
     return pd.Series([Utils.code_to_symbol(code), btop],
                      index=['code', 'btop'])
Ejemplo n.º 6
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股的LIQUIDITY因子载荷
        Parameters:
        --------
        :param code: str
            个股代码, 如SH600000, 600000
        :param calc_date: datetime-like, str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的LIQUIDILITY因子载荷
            0. code
            1. stom 月度换手率
            2. stoq 季度换手率
            3. stoa 年度换手率
            4. liquidity
            如果就是按失败, 返回None
        """
        # 读取个股过去252个交易日的日行情数据(非复权)
        stom_days = risk_ct.LIQUID_CT.stom_days
        stoq_months = risk_ct.LIQUID_CT.stoq_months
        stoa_months = risk_ct.LIQUID_CT.stoa_months
        df_mkt_data = Utils.get_secu_daily_mkt(code,
                                               end=calc_date,
                                               ndays=stoa_months * stom_days,
                                               fq=False)
        if df_mkt_data is None or df_mkt_data.empty:
            return None
        # stom
        if len(df_mkt_data) >= stom_days:
            stom = math.log(df_mkt_data.iloc[-stom_days:]['turnover1'].sum())
        else:
            stom = math.log(df_mkt_data['turnover1'].sum())
        # stoq
        stoq_days = stom_days * stoq_months
        if len(df_mkt_data) >= stoq_days:
            stoq = math.log(df_mkt_data.iloc[-stoq_days:]['turnover1'].sum() /
                            stoq_months)
        else:
            stoq = math.log(df_mkt_data['turnover1'].sum() / stoq_months)
        # stoa
        stoa = math.log(df_mkt_data['turnover1'].sum() / stoa_months)
        # liquidity = 0.35*stom + 0.35*stoq + 0.3*stoa
        stom_weight = risk_ct.LIQUID_CT.stom_weight
        stoq_weight = risk_ct.LIQUID_CT.stoq_weight
        stoa_weight = risk_ct.LIQUID_CT.stoa_weight
        liquidity = stom_weight * stom + stoq_weight * stoq + stoa_weight * stoa

        return pd.Series(
            [Utils.code_to_symbol(code), stom, stoq, stoa, liquidity],
            index=['code', 'stom', 'stoq', 'stoa', 'liquidity'])
Ejemplo n.º 7
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股筹码分布数据
     Parameters:
     --------
     :param code: str
         个股代码, 如600000或SH600000
     :param calc_date: datetime-like, str
         计算日期, 格式YYYY-MM-DD
     :return: tuple(code, close, cyq_data)
     --------
         1. code
         2. close: float
         个股在calc_date的收盘价
         3. cyq_data: pd.Series
         个股从IPO开始、至calc_date为止的筹码分布数据
         Series的index为筹码价格, values为对应每个筹码价格的持仓比例
         若计算失败, 返回None
     """
     # 读取个股IPO数据
     ipo_data = Utils.get_ipo_info(code)
     if ipo_data is None:
         return None
     if ipo_data['发行价格'][:-1] == '--':
         return None
     ipo_price = float(ipo_data['发行价格'][:-1])
     # 读取个股上市以来的日复权行情数据
     mkt_data = Utils.get_secu_daily_mkt(code, end=calc_date, fq=True)
     secu_close = mkt_data.iloc[-1]['close']
     # 计算每天的均价
     mkt_data['vwap'] = np.around(
         mkt_data['amount'] / mkt_data['vol'] * mkt_data['factor'], 2)
     mkt_data.dropna(axis=0, how='any', inplace=True)
     # 行情数据按日期降序排列
     mkt_data.sort_values(by='date', ascending=False, inplace=True)
     mkt_data.reset_index(drop=True, inplace=True)
     # 计算筹码分布
     cyq_data = mkt_data[['vwap', 'turnover1']]
     cyq_data = cyq_data.append(Series([ipo_price, 0],
                                       index=['vwap', 'turnover1']),
                                ignore_index=True)
     cyq_data['minusTR'] = 1 - cyq_data['turnover1']
     cyq_data['cumprod_TR'] = cyq_data['minusTR'].cumprod().shift(1)
     cyq_data.loc[0, 'cumprod_TR'] = 1.
     cyq_data['cyq'] = cyq_data['turnover1'] * cyq_data['cumprod_TR']
     secu_cyq = cyq_data['cyq'].groupby(cyq_data['vwap']).sum()
     # 如果筹码价格数量小于30个, 返回None
     if len(secu_cyq) < 30:
         return None
     secu_cyq = secu_cyq[secu_cyq.values > 0.00001]
     return (Utils.code_to_symbol(code), secu_close, secu_cyq)
Ejemplo n.º 8
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股的价值因子,包含ep_ttm, bp_lr, ocf_ttm
     Parameters:
     --------
     :param code: str
         个股代码:如600000或SH600000
     :param calc_date: datetime-like or str
         计算日期,格式YYYY-MM-DD, YYYYMMDD
     :return: pd.Series
     --------
         价值类因子值
         0. ep_ttm: TTM净利润/总市值
         1. bp_lr: 净资产(最新财报)/总市值
         2. ocf_ttm: TTM经营性现金流/总市值
         若计算失败,返回None
     """
     code = Utils.code_to_symbol(code)
     calc_date = Utils.to_date(calc_date)
     # 读取TTM财务数据
     ttm_fin_data = Utils.get_ttm_fin_basic_data(code, calc_date)
     if ttm_fin_data is None:
         return None
     # 读取最新财报数据
     report_date = Utils.get_fin_report_date(calc_date)
     fin_basic_data = Utils.get_fin_basic_data(code, report_date)
     if fin_basic_data is None:
         return None
     # 计算总市值
     mkt_daily = Utils.get_secu_daily_mkt(code,
                                          calc_date,
                                          fq=False,
                                          range_lookup=True)
     if mkt_daily.shape[0] == 0:
         return None
     cap_struct = Utils.get_cap_struct(code, calc_date)
     if cap_struct is None:
         return None
     total_cap = cap_struct.total - cap_struct.liquid_b - cap_struct.liquid_h
     total_mkt_cap = total_cap * mkt_daily.close
     # 计算价值类因子
     ep_ttm = ttm_fin_data[
         'NetProfit'] * util_ct.FIN_DATA_AMOUNT_UNIT / total_mkt_cap
     ocf_ttm = ttm_fin_data[
         'NetOperateCashFlow'] * util_ct.FIN_DATA_AMOUNT_UNIT / total_mkt_cap
     bp_lr = fin_basic_data[
         'ShareHolderEquity'] * util_ct.FIN_DATA_AMOUNT_UNIT / total_mkt_cap
     return Series([round(ep_ttm, 6),
                    round(bp_lr, 6),
                    round(ocf_ttm, 6)],
                   index=['ep_ttm', 'bp_lr', 'ocf_ttm'])
Ejemplo n.º 9
0
def load_fin_data_cwbbzy():
    """导入上市公司财务报表摘要"""
    cfg = ConfigParser()
    cfg.read('config.ini')
    cwbbzy_url = cfg.get('fin_data', 'cwbbzy_url')
    cwbbzy_path = os.path.join(cfg.get('factor_db', 'db_path'),
                               cfg.get('fin_data', 'cwbbzy_path'))
    # 读取个股代码
    # data_api = DataApi(addr='tcp://data.tushare.org:8910')
    # data_api.login('13811931480',
    #                'eyJhbGciOiJIUzI1NiJ9.eyJjcmVhdGVfdGltZSI6IjE1MTI4Nzk0NTI2MjkiLCJpc3MiOiJhdXRoMCIsImlkIjoiMTM4MTE5MzE0ODAifQ.I0SXsA1bK--fbGu0B5Is2xdKOjALAeWBJRX6GdVmUL8')
    # df_stock_basics, msg = data_api.query(view='jz.instrumentInfo',
    #                                       fields='status,list_date,name,market',
    #                                       filter='inst_type=1&status=1&market=SH,SZ&symbol=',
    #                                       data_format='pandas')
    # if msg != '0,':
    #     print('读取市场个股代码失败。')
    #     return
    # df_stock_basics.symbol = df_stock_basics.symbol.map(lambda x: x.split('.')[0])

    df_stock_basics = Utils.get_stock_basics(all=True)
    # 遍历个股, 下载财务报表摘要数据
    for _, stock_info in df_stock_basics.iterrows():
        url = cwbbzy_url % stock_info.symbol[-6:]
        resp = requests.get(url)
        if resp.status_code != requests.codes.ok:
            print('%s的财务报表摘要数据下载失败!' % stock_info.symbol)
            continue
        print('下载%s的财务报表摘要数据.' % stock_info.symbol)
        fin_data = resp.text
        if '暂无数据' in fin_data:
            continue
        tmp = fin_data.split(',')[-1]
        fin_data = fin_data.replace(tmp, '')
        fin_data = fin_data.split('\r\n')
        fin_datas = []
        for data in fin_data:
            s = data.split(',')
            fin_datas.append(s[:-1])
        n = min([len(data) for data in fin_datas])
        dict_fin_data = {data[0]: data[1:n] for data in fin_datas}
        fin_header = [data[0] for data in fin_datas]
        df_fin_data = DataFrame(dict_fin_data, columns=fin_header)
        df_fin_data = df_fin_data.sort_values(by=fin_header[0])
        df_fin_data.to_csv(os.path.join(
            cwbbzy_path, '%s.csv' % Utils.code_to_symbol(stock_info.symbol)),
                           index=False)
Ejemplo n.º 10
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股RSTR因子载荷
     Parameters:
     --------
     :param code: str
         个股代码, 如SH600000, 600000
     :param calc_date: datetime-like, str
         计算日期, 格式: YYYY-MM-DD
     :return: pd.Series
     --------
         个股的RSTR因子载荷
         0. code
         1. rstr
         如果计算失败, 返回None
     """
     # 取得个股复权行情数据
     df_secu_quote = Utils.get_secu_daily_mkt(
         code,
         end=calc_date,
         ndays=risk_ct.RSTR_CT.trailing_start + 1,
         fq=True)
     if df_secu_quote is None:
         return None
     if len(df_secu_quote) < risk_ct.RSTR_CT.half_life * 2:
         return None
     df_secu_quote = df_secu_quote.head(
         len(df_secu_quote) - risk_ct.RSTR_CT.trailing_end)
     df_secu_quote.reset_index(drop=True, inplace=True)
     # 计算个股的日对数收益率
     arr_secu_close = np.array(df_secu_quote.iloc[1:]['close'])
     arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close'])
     arr_secu_daily_ret = np.log(arr_secu_close / arr_secu_preclose)
     # 计算权重(指数移动加权平均)
     T = len(arr_secu_daily_ret)
     time_spans = sorted(range(T), reverse=True)
     alpha = 1 - np.exp(np.log(0.5) / risk_ct.RSTR_CT.half_life)
     x = [1 - alpha] * T
     y = [alpha] * (T - 1)
     y.insert(0, 1)
     weights = np.float_power(x, time_spans) * y
     # 计算RSTR
     rstr = np.sum(arr_secu_daily_ret * weights)
     return pd.Series([Utils.code_to_symbol(code), rstr],
                      index=['code', 'rstr'])
Ejemplo n.º 11
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股SGRO因子载荷
        Parameters:
        --------
        :param code: str
            个股代码, 如SH600000, 600000
        :param calc_date: datetime-like, str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的SGRO因子载荷
            0. code
            1. sgro
            如果计算失败, 返回None
        """
        code = Utils.code_to_symbol(code)
        calc_date = Utils.to_date(calc_date)
        # 读取过去5年的主要财务指标数据
        years = 5
        prevN_years_finbasicdata = _get_prevN_years_finbasicdata(
            calc_date, code, years)
        if prevN_years_finbasicdata is None:
            return None
        # 复权因子调整后的主营业务收入对年度t进行线性回归(OLS), 计算斜率beta
        arr_revenue = np.asarray([
            fin_basicdata['MainOperateRevenue']
            for fin_basicdata in prevN_years_finbasicdata
        ])
        if any(np.isnan(arr_revenue)):
            return None
        arr_t = np.arange(1, years + 1)
        arr_t = sm.add_constant(arr_t)
        model = sm.OLS(arr_revenue, arr_t)
        results = model.fit()
        beta = results.params[1]
        # 计算平均revenue
        avg_revenue = np.mean(arr_revenue)
        if abs(avg_revenue) < utils_con.TINY_ABS_VALUE:
            return None
        # sgro = beta / avg_revenue
        sgro = beta / avg_revenue

        return pd.Series([code, sgro], index=['code', 'sgro'])
Ejemplo n.º 12
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股EPFWD因子载荷
        Parameters:
        --------
        :param code: str
            个股代码, 如SH600000, 600000
        :param calc_date: datetime-like, str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的EPFWD因子载荷
            0. code
            1. epfwd
            如果计算失败, 返回None
        """
        code = Utils.code_to_symbol(code)
        # 读取个股的预期盈利数据
        predictedearnings_data = Utils.get_consensus_data(
            calc_date, code, ConsensusType.PredictedEarings)
        if predictedearnings_data is None:
            # 如果个股的预期盈利数据不存在, 那么代替ttm净利润
            ttm_fin_data = Utils.get_ttm_fin_basic_data(code, calc_date)
            if ttm_fin_data is None:
                return None
            predictedearnings_data = pd.Series(
                [code, ttm_fin_data['NetProfit']],
                index=['code', 'predicted_earnings'])
        fpredictedearnings = predictedearnings_data['predicted_earnings']
        if np.isnan(fpredictedearnings):
            return None
        # 读取个股市值
        size_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                 risk_ct.LNCAP_CT.db_file)
        size_factor_loading = Utils.read_factor_loading(
            size_path, Utils.datetimelike_to_str(calc_date, dash=False), code)
        if size_factor_loading.empty:
            return None
        # epfwd = 盈利预期/市值
        epfwd = fpredictedearnings * 10000.0 / np.exp(
            size_factor_loading['factorvalue'])

        return pd.Series([code, epfwd], index=['code', 'epfwd'])
Ejemplo n.º 13
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股MLEV因子载荷
        Parameters:
        --------
        :param code: str
            个股代码, 如Sh600000, 600000
        :param calc_date: datetime-like, str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的MLEV因子载荷
            0. code
            1. mlev
            如果计算失败, 返回None
        """
        code = Utils.code_to_symbol(code)
        report_date = Utils.get_fin_report_date(calc_date)
        # 读取个股最新财务报表摘要数据
        fin_summary_data = Utils.get_fin_summary_data(code, report_date)
        # ld为个股长期负债的账面价值, 如果缺失长期负债数据, 则用负债总计代替
        if fin_summary_data is None:
            return None
        ld = fin_summary_data['TotalNonCurrentLiabilities']
        if np.isnan(ld):
            ld = fin_summary_data['TotalLiabilities']
        if np.isnan(ld):
            return None
        ld *= 10000.0
        # pe为优先股账面价值, 对于A股pe设置为0
        pe = 0.0
        # 读取个股市值数据
        lncap_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                  risk_ct.LNCAP_CT.db_file)
        lncap_factor_loading = Utils.read_factor_loading(
            lncap_path, Utils.datetimelike_to_str(calc_date, dash=False), code)
        if lncap_factor_loading.empty:
            return None
        me = np.exp(lncap_factor_loading['factorvalue'])
        # mlev = (me + pe + ld)/me
        mlev = (me + pe + ld) / me

        return pd.Series([code, mlev], index=['code', 'mlev'])
Ejemplo n.º 14
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股DASTD因子载荷
     Parameters:
     --------
     :param code: str
         个股代码, 如SH600000, 600000
     :param calc_date: datetime-like, str
         计算日期, 格式: YYYY-MM-DD
     :return: pd.Series
     --------
         个股的DASTD因子载荷
         0. code
         1. dastr
         如果计算失败, 返回None
     """
     # 取得个股复权行情数据
     df_secu_quote = Utils.get_secu_daily_mkt(
         code, end=calc_date, ndays=risk_ct.DASTD_CT.trailing + 1, fq=True)
     if df_secu_quote is None:
         return None
     # 如果行情数据长度小于trailing的一半(即126个交易日),那么返回None
     if len(df_secu_quote) < int(risk_ct.DASTD_CT.trailing / 2):
         return None
     df_secu_quote.reset_index(drop=True, inplace=True)
     # 计算个股的日对数收益率序列及收益率均值
     arr_secu_close = np.array(df_secu_quote.iloc[1:]['close'])
     arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close'])
     arr_secu_daily_ret = np.log(arr_secu_close / arr_secu_preclose)
     avg_daily_ret = np.mean(arr_secu_daily_ret)
     # 计算权重(指数移动加权平均)
     T = len(arr_secu_daily_ret)
     time_spans = sorted(range(T), reverse=True)
     alpha = 1 - np.exp(np.log(0.5) / risk_ct.DASTD_CT.half_life)
     x = [1 - alpha] * T
     y = [alpha] * (T - 1)
     y.insert(0, 1)
     weights = np.float_power(x, time_spans) * y
     # 计算个股DASTD因子值
     dastd = np.sqrt(
         np.sum((arr_secu_daily_ret - avg_daily_ret)**2 * weights))
     return pd.Series([Utils.code_to_symbol(code), dastd],
                      index=['code', 'dastd'])
Ejemplo n.º 15
0
 def _calc_factor_loading_proc(cls, code, calc_date, q):
     """
     用于并行计算因子载荷
     Parameters:
     --------
     :param code: str
         个股代码, 如600000, SH600000
     :param calc_date: datetime-like or str
         计算日期, 格式: YYYY-MM-DD
     :param q: 队列, 用于进程间通信
     :return: 添加因子载荷至队列中
     """
     logging.debug('[%s] Calc BETA factor of %s.' % (Utils.datetimelike_to_str(calc_date), code))
     beta_data = None
     try:
         beta_data = cls._calc_factor_loading(code, calc_date)
     except Exception as e:
         print(e)
     if beta_data is None:
         beta_data = pd.Series([Utils.code_to_symbol(code), np.nan, np.nan], index=['code', 'beta', 'hsigma'])
     q.put(beta_data)
Ejemplo n.º 16
0
 def _calc_factor_loading_proc(cls, code, calc_date, q):
     """
     用于并行计算因子载荷
     Parameters:
     ---------
     :param code: str
         个股代码, 如SH600000, 600000
     :param calc_date: datetime-like, str
         计算日期, 格式: YYYY-MM-DD
     :param q: 队列, 用于进程间通信
     :return: 添加因子载荷至队列
     """
     logging.debug('[{}] Calc EPFWD factor of {}.'.format(Utils.datetimelike_to_str(calc_date), code))
     epfwd_data = None
     try:
         epfwd_data = cls._calc_factor_loading(code, calc_date)
     except Exception as e:
         print(e)
     if epfwd_data is None:
         epfwd_data = pd.Series([Utils.code_to_symbol(code), np.nan], index=['code', 'epfwd'])
     q.put(epfwd_data)
Ejemplo n.º 17
0
 def _calc_factor_loading_proc(cls, code, calc_date, q):
     """
     用于并行计算因子载荷
     Parameters
     --------
     :param code: str
         个股代码,如600000或SH600000
     :param calc_date: datetime-like or str
         计算日期,格式:YYYY-MM-DD
     :param q: 队列,用于进程间通信
     :return: 添加因子载荷至队列中
     """
     logging.info('[%s] Calc Intaday Momentum of %s.' % (Utils.datetimelike_to_str(calc_date), code))
     momentum_data = None
     try:
         momentum_data = cls._calc_factor_loading(code, calc_date)
     except Exception as e:
         print(e)
     if momentum_data is not None:
         q.put((Utils.code_to_symbol(code), momentum_data.m0, momentum_data.m1, momentum_data.m2,
                momentum_data.m3, momentum_data.m4, momentum_data.m_normal))
Ejemplo n.º 18
0
 def _calc_factor_loading_proc(cls, code, calc_date, q):
     """
     用于并行计算因子载荷
     Parameters:
     --------
     :param code: str
         个股代码, 如SH600000, 600000
     :param calc_date: datetime-like, str
         计算日期, 格式: YYYY-MM-DD
     :param q: 队列, 用于进程间通信
     :return: 添加因子载荷至队列
     """
     logging.info('[%s] Calc DASTD factor of %s.' % (Utils.datetimelike_to_str(calc_date), code))
     dastd_data = None
     try:
         dastd_data = cls._calc_factor_loading(code, calc_date)
     except Exception as e:
         print(e)
     if dastd_data is None:
         dastd_data = pd.Series([Utils.code_to_symbol(code), np.nan], index=['code', 'dastd'])
     q.put(dastd_data)
Ejemplo n.º 19
0
 def _calc_factor_loading_proc(cls, code, calc_date, q):
     """
     用于并行计算因子载荷
     Parameters
     --------
     :param code: str
         个股代码,如600000或SH600000
     :param calc_date: datetime-like or str
         计算日期
     :param q: 队列,用于进程间通信
     :return: 添加因子载荷至队列q中
     """
     logging.info('[%s] Calc SmartQ of %s.' %
                  (calc_date.strftime('%Y-%m-%d'), code))
     smart_q = None
     try:
         smart_q = cls._calc_factor_loading(code, calc_date)
     except Exception as e:
         print(e)
     if smart_q is not None:
         q.put((Utils.code_to_symbol(code), smart_q))
Ejemplo n.º 20
0
def load_st_info():
    """导入个股st带帽摘帽时间信息"""
    cfg = ConfigParser()
    cfg.read('config.ini')
    factor_db_path = cfg.get('factor_db', 'db_path')
    raw_data_path = cfg.get('st_info', 'raw_data_path')
    st_info_path = cfg.get('st_info', 'st_info_path')
    st_start_types = cfg.get('st_info', 'st_start_types').split(',')
    st_end_types = cfg.get('st_info', 'st_end_types').split(',')

    if not os.path.isfile(os.path.join(raw_data_path, 'st_info.csv')):
        print('\033[1;31;40mst_info.csv原始文件不存在.\033[0m')
        return
    df_st_rawinfo = pd.read_csv(os.path.join(raw_data_path, 'st_info.csv'), header=0)
    df_st_rawinfo = df_st_rawinfo[(df_st_rawinfo['st_info'] != '0') & (~df_st_rawinfo['st_info'].isna())]
    df_st_info = pd.DataFrame(columns=['code', 'st_start', 'st_end'])
    for _, st_data in df_st_rawinfo.iterrows():
        st_start_date = None
        st_end_date = None

        code = Utils.code_to_symbol(st_data['code'])
        st_info_list = st_data['st_info'].split(',')
        st_info_list = st_info_list[::-1]
        for st_info in st_info_list:
            if ':' in st_info:
                st_type = st_info.split(':')[0]
                st_date = st_info.split(':')[1]
                if not (st_type in st_start_types or st_type in st_end_types):
                    print('st type: {} is not counted.'.format(st_type))
                    continue
                if st_type in st_start_types and st_start_date is None:
                    st_start_date = st_date
                elif st_type in st_end_types and st_start_date is not None:
                    st_end_date = st_date
                    df_st_info = df_st_info.append(pd.Series([code, st_start_date, st_end_date], index=['code', 'st_start', 'st_end']), ignore_index=True)
                    st_start_date = None
                    st_end_date = None
        if st_start_date is not None and st_end_date is None:
            df_st_info = df_st_info.append(pd.Series([code, st_start_date, '20301231'], index=['code', 'st_start', 'st_end']), ignore_index=True)
    df_st_info.to_csv(os.path.join(factor_db_path, st_info_path, 'st_info.csv'), index=False)
Ejemplo n.º 21
0
 def _calc_factor_loading_proc(cls, code, calc_date, q):
     """
     用于并行计算因子载荷
     Parameters:
     --------
     :param code: str
         个股代码,如600000或SH600000
     :param calc_date: datetime-like or str
         计算日期,格式YYYY-MM-DD or YYYYMMDD
     :param q: 队列,用于进程间通信
     :return: 添加因子载荷至队列中
     """
     logging.info('[%s] Calc Momentum factor of %s.' %
                  (Utils.datetimelike_to_str(calc_date), code))
     momentum = None
     try:
         momentum = cls._calc_factor_loading(code, calc_date)
     except Exception as e:
         print(e)
     if momentum is not None:
         momentum['id'] = Utils.code_to_symbol(code)
         q.put(momentum)
Ejemplo n.º 22
0
 def _calc_factor_loading_proc1(cls, code, calc_date, q):
     """
     用于并行计算因子载荷
     Parameters:
     --------
     :param code: str
         个股代码, 如600000 or SH600000
     :param calc_date: datetime-like or str
         计算日期, 格式: YYYY-MM-DD
     :param q: 队列, 用于进程间通信
     :return: 添加因子载荷至队列中
     """
     logging.info('[%s] Calc CYQ factor of %s.' %
                  (Utils.datetimelike_to_str(calc_date), code))
     cyq_data = None
     try:
         cyq_data = cls._calc_factor_loading(code, calc_date)
     except Exception as e:
         print(e)
     if cyq_data is not None:
         cyq_data['id'] = Utils.code_to_symbol(code)
         q.put(cyq_data)
Ejemplo n.º 23
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股的成长因子,包含npg_ttm, opg_ttm
     Parameters:
     --------
     :param code: str
         个股代码,如600000或SH600000
     :param calc_date: datetime-like or str
         计算日期,格式YYYY-MM-DD, YYYYMMDD
     :return: pd.Series
     --------
         成长类因子值
         0. id: 证券代码
         1. npg_ttm: 净利润增长率_TTM
         2. opg_ttm: 营业收入增长率_TTM
         若计算失败, 返回None
     """
     code = Utils.code_to_symbol(code)
     calc_date = Utils.to_date(calc_date)
     # 读取最新的TTM财务数据
     ttm_fin_data_latest = Utils.get_ttm_fin_basic_data(code, calc_date)
     if ttm_fin_data_latest is None:
         return None
     # 读取去年同期TTM财务数据
     try:
         pre_date = datetime.datetime(calc_date.year-1, calc_date.month, calc_date.day)
     except ValueError:
         pre_date = calc_date - datetime.timedelta(days=366)
     ttm_fin_data_pre = Utils.get_ttm_fin_basic_data(code, pre_date)
     if ttm_fin_data_pre is None:
         return None
     # 计算成长类因子值
     if abs(ttm_fin_data_pre['NetProfit']) < 0.1:
         return None
     npg_ttm = (ttm_fin_data_latest['NetProfit'] - ttm_fin_data_pre['NetProfit']) / abs(ttm_fin_data_pre['NetProfit'])
     if abs(ttm_fin_data_pre['MainOperateRevenue']) < 0.1:
         return None
     opg_ttm = (ttm_fin_data_latest['MainOperateRevenue'] - ttm_fin_data_pre['MainOperateRevenue']) / abs(ttm_fin_data_pre['MainOperateRevenue'])
     return Series([code, round(npg_ttm, 4), round(opg_ttm, 4)], index=['id', 'npg_ttm', 'opg_ttm'])
Ejemplo n.º 24
0
 def _calc_factor_loading_proc(cls, code, calc_date, q):
     """
     用于并行计算因子载荷
     Parameters:
     --------
     :param code: str
         个股代码,如600000或SH600000
     :param calc_date: datetime-like or str
         计算日期,格式YYYY-MM-DD 或YYYYMMDD
     :param q: 队列,用于进程间通信
     :return: 添加因子载荷至队列中
     """
     logging.info('[%s] Calc market capitalization of %s' %
                  (Utils.datetimelike_to_str(calc_date), code))
     mkt_cap = None
     try:
         mkt_cap = cls._calc_factor_loading(code, calc_date)
     except Exception as e:
         print(e)
     if mkt_cap is not None:
         # logging.info("[%s'] %s's total mkt cap = %.0f, liquid mkt cap = %.0f" % (Utils.datetimelike_to_str(calc_date), code, mkt_cap.LnTotalMktCap, mkt_cap.LnLiquidMktCap))
         q.put((Utils.code_to_symbol(code), mkt_cap.LnTotalMktCap,
                mkt_cap.LnLiquidMktCap))
Ejemplo n.º 25
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股BLEV因子载荷
        Parameters:
        --------
        :param code: str
            个股代码, 如SH600000, 600000
        :param calc_date: datetime-like, str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的BLEV因子载荷
            0. code
            1. blev
            如果计算失败, 返回None
        """
        code = Utils.code_to_symbol(code)
        report_date = Utils.get_fin_report_date(calc_date)
        # 读取个股最新财务报表摘要数据
        fin_summary_data = Utils.get_fin_summary_data(code, report_date)
        if fin_summary_data is None:
            return None
        be = fin_summary_data['TotalShareholderEquity']
        if np.isnan(be):
            return None
        if abs(be) < utils_con.TINY_ABS_VALUE:
            return None
        ld = fin_summary_data['TotalNonCurrentLiabilities']
        if np.isnan(ld):
            ld = fin_summary_data['TotalLiabilities']
            if np.isnan(ld):
                return None
        pe = 0
        # blev = (be + pe + ld) / be
        blev = (be + pe + ld) / be

        return pd.Series([code, blev], index=['code', 'blev'])
Ejemplo n.º 26
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股ETOP因子载荷
        Parameters:
        --------
        :param code: str
            个股代码, 如SH600000, 600000
        :param calc_date: datetime-like, str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的ETOP因子载荷
            0. code
            1. etop
            如果计算失败, 返回None
        """
        code = Utils.code_to_symbol(code)
        # 读取个股的ttm净利润
        ttm_fin_data = Utils.get_ttm_fin_basic_data(code, calc_date)
        if ttm_fin_data is None:
            return None
        ttm_netprofit = ttm_fin_data['NetProfit']
        if np.isnan(ttm_netprofit):
            return None
        # 读取个股市值
        lncap_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                  risk_ct.LNCAP_CT.db_file)
        lncap_data = Utils.read_factor_loading(
            lncap_path, Utils.datetimelike_to_str(calc_date, dash=False), code)
        if lncap_data.empty:
            return None
        secu_cap = np.exp(lncap_data['factorvalue'])
        # etop = ttm净利润/市值
        etop = ttm_netprofit * 10000 / secu_cap

        return pd.Series([code, etop], index=['code', 'etop'])
Ejemplo n.º 27
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股的规模因子值
     Parameters:
     --------
     :param code: str
         个股代码,如600000、SH600000
     :param calc_date: datetime-like, str
         规模因子计算日期,格式YYYY-MM-DD或YYYYMMDD
     :return: pd.Series
     --------
         个股规模因子值,各个index对应的含义如下:
         0. LnTotalMktCap: 总市值对数
         1. LnLiquidMktCap: 流通市值对数
         若计算失败,返回None
     """
     # 取得证券截止指定日期最新的非复权行情数据
     code = Utils.code_to_symbol(code)
     calc_date = Utils.to_date(calc_date)
     mkt_daily = Utils.get_secu_daily_mkt(code,
                                          calc_date,
                                          fq=False,
                                          range_lookup=True)
     if mkt_daily.shape[0] == 0:
         return None
     # 取得证券截止指定日期前最新的股本结构数据
     cap_struct = Utils.get_cap_struct(code, calc_date)
     if cap_struct is None:
         return None
     # 计算证券的规模因子
     scale_factor = Series()
     total_cap = cap_struct.total - cap_struct.liquid_b - cap_struct.liquid_h
     scale_factor['LnTotalMktCap'] = math.log(total_cap * mkt_daily.close)
     scale_factor['LnLiquidMktCap'] = math.log(cap_struct.liquid_a *
                                               mkt_daily.close)
     return scale_factor
Ejemplo n.º 28
0
    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本个股的因子载荷,并保存至因子数据库
        Parameters
        --------
        :param start_date: datetime-like, str
            开始日期
        :param end_date: datetime-like, str,默认None
            结束日期,如果为None,则只计算start_date日期的因子载荷
        :param month_end: bool,默认True
            只计算月末时点的因子载荷
        :param save: 是否保存至因子数据库,默认为False
        :param kwargs:
            'multi_proc': bool, True=采用多进程并行计算, False=采用单进程计算, 默认为False
        :return: 因子载荷,DataFrame
        --------
            因子载荷,DataFrame
            0. date, 日期, 为计算日期的下一个交易日
            1: id, 证券代码
            2: factorvalue, 因子载荷
            如果end_date=None,返回start_date对应的因子载荷数据
            如果end_date!=None,返回最后一天的对应的因子载荷数据
            如果没有计算数据,返回None
        """
        # 0.取得交易日序列
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        # 取得样本个股信息
        # all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列,计算SMartQ因子载荷
        dict_factor = None
        for calc_date in trading_days_series:
            dict_factor = {'id': [], 'factorvalue': []}
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            # 1.获取用于读取分钟行情的交易日列表(过去30天的交易日列表,降序排列)
            # trading_days = _get_trading_days(calc_date, 30)
            # trading_days = Utils.get_trading_days(end=calc_date, ndays=30, ascending=False)
            # 2.取得样本个股信息
            # stock_basics = ts.get_stock_basics()
            s = (calc_date - datetime.timedelta(days=90)).strftime('%Y%m%d')
            stock_basics = Utils.get_stock_basics(s)
            # 3.遍历样本个股代码,计算Smart_Q因子载荷值
            dict_factor = {'date': None, 'id': [], 'factorvalue': []}

            if 'multi_proc' not in kwargs:
                kwargs['multi_proc'] = False
            if not kwargs['multi_proc']:
                # 采用单进程进行计算
                for _, stock_info in stock_basics.iterrows():
                    # code = '%s%s' % ('SH' if code[:2] == '60' else 'SZ', code)
                    factor_loading = cls._calc_factor_loading(
                        stock_info.symbol, calc_date)
                    print(
                        "[%s]Calculating %s's SmartMoney factor loading = %.4f."
                        % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol,
                           -1.0 if factor_loading is None else factor_loading))
                    if factor_loading is not None:
                        # df_factor.ix[code, 'factorvalue'] = factor_loading
                        dict_factor['id'].append(
                            Utils.code_to_symbol(stock_info.symbol))
                        dict_factor['factorvalue'].append(factor_loading)
            else:
                # 采用多进程并行计算SmartQ因子载荷
                q = Manager().Queue()  # 队列,用于进程间通信,存储每个进程计算的因子载荷值
                p = Pool(4)  # 进程池,最多同时开启4个进程
                for _, stock_info in stock_basics.iterrows():
                    p.apply_async(cls._calc_factor_loading_proc,
                                  args=(
                                      stock_info.symbol,
                                      calc_date,
                                      q,
                                  ))
                p.close()
                p.join()
                while not q.empty():
                    smart_q = q.get(True)
                    dict_factor['id'].append(smart_q[0])
                    dict_factor['factorvalue'].append(smart_q[1])

            date_label = Utils.get_trading_days(calc_date, ndays=2)[1]
            dict_factor['date'] = [date_label] * len(dict_factor['id'])
            # 4.计算去极值标准化后的因子载荷
            df_std_factor = Utils.normalize_data(pd.DataFrame(dict_factor),
                                                 columns='factorvalue',
                                                 treat_outlier=True,
                                                 weight='eq')
            # 5.保存因子载荷至因子数据库
            if save:
                # Utils.factor_loading_persistent(cls._db_file, calc_date.strftime('%Y%m%d'), dict_factor)
                cls._save_factor_loading(cls._db_file,
                                         Utils.datetimelike_to_str(calc_date,
                                                                   dash=False),
                                         dict_factor,
                                         'SmartMoney',
                                         factor_type='raw',
                                         columns=['date', 'id', 'factorvalue'])
                cls._save_factor_loading(cls._db_file,
                                         Utils.datetimelike_to_str(calc_date,
                                                                   dash=False),
                                         df_std_factor,
                                         'SmartMoney',
                                         factor_type='standardized',
                                         columns=['date', 'id', 'factorvalue'])
            # 休息300秒
            logging.info('Suspending for 360s.')
            time.sleep(360)
        return dict_factor
Ejemplo n.º 29
0
    def calc_factor_loading(cls, start_date, end_date=None, month_end=True, save=False, **kwargs):
        """
        计算指定日期的样本个股的因子载荷, 并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like, str
            开始日期, 格式: YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD
        :param month_end: bool, 默认为True
            如果为True, 则只计算月末时点的因子载荷
        :param save: bool, 默认为True
            是否保存至因子数据库
        :param kwargs:
            'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False
        :return: dict
            因子载荷
        """
        # 取得交易日序列及股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date, end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date, ndays=1)
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列, 计算DASTD因子载荷
        dict_dastd = None
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            logging.info('[%s] Calc DASTD factor loading.' % Utils.datetimelike_to_str(calc_date))
            # 遍历个股, 计算个股的DASTD因子值
            s = (calc_date - datetime.timedelta(days=risk_ct.DASTD_CT.listed_days)).strftime('%Y%m%d')
            stock_basics = all_stock_basics[all_stock_basics.list_date < s]
            ids = []        # 个股代码list
            dastds = []     # DASTD因子值list

            if 'multi_proc' not in kwargs:
                kwargs['multi_proc'] = False
            if not kwargs['multi_proc']:
                # 采用单进程计算DASTD因子值
                for _, stock_info in stock_basics.iterrows():
                    logging.info("[%s] Calc %s's DASTD factor loading." % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol))
                    dastd_data = cls._calc_factor_loading(stock_info.symbol, calc_date)
                    if dastd_data is None:
                        ids.append(Utils.code_to_symbol(stock_info.symbol))
                        dastds.append(np.nan)
                    else:
                        ids.append(dastd_data['code'])
                        dastds.append(dastd_data['dastd'])
            else:
                # 采用多进程并行计算DASTD因子值
                q = Manager().Queue()   # 队列, 用于进程间通信, 存储每个进程计算的因子载荷
                p = Pool(4)             # 进程池, 最多同时开启4个进程
                for _, stock_info in stock_basics.iterrows():
                    p.apply_async(cls._calc_factor_loading_proc, args=(stock_info.symbol, calc_date, q,))
                p.close()
                p.join()
                while not q.empty():
                    dastd_data = q.get(True)
                    ids.append(dastd_data['code'])
                    dastds.append(dastd_data['dastd'])

            date_label = Utils.get_trading_days(start=calc_date, ndays=2)[1]
            dict_dastd = {'date': [date_label]*len(ids), 'id': ids, 'factorvalue': dastds}
            if save:
                Utils.factor_loading_persistent(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), dict_dastd, ['date', 'id', 'factorvalue'])
            # 暂停180秒
            logging.info('Suspending for 180s.')
            # time.sleep(180)
        return dict_dastd
Ejemplo n.º 30
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        Parameter:
        --------
        :param code: str
            个股代码, 如SH600000, 600000
        :param calc_date: datetime-like, str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的CMRA因子载荷
            0. code
            1. cmra
            如果计算失败, 返回None
        """
        # 取得个股日复权行情数据
        # df_secu_quote = Utils.get_secu_daily_mkt(code, end=calc_date,ndays=risk_ct.CMRA_CT.trailing*risk_ct.CMRA_CT.days_scale+1, fq=True)
        # if df_secu_quote is None:
        #     return None
        # if len(df_secu_quote) < risk_ct.CMRA_CT.listed_days:
        #     return None
        # df_secu_quote.reset_index(drop=True, inplace=True)
        # 计算个股的日对数收益率序列
        # arr_secu_close = np.array(df_secu_quote.iloc[1:]['close'])
        # arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close'])
        # arr_secu_daily_ret = np.log(arr_secu_close / arr_secu_preclose)
        # 每个月计算累积收益率
        # z = []
        # for t in range(1, risk_ct.CMRA_CT.trailing+1):
        #     k = t * risk_ct.CMRA_CT.days_scale - 1
        #     if k > len(arr_secu_daily_ret) - 1:
        #         k = len(arr_secu_daily_ret) - 1
        #         z.append(np.sum(arr_secu_daily_ret[:k]))
        #         break
        #     else:
        #         z.append(np.sum(arr_secu_daily_ret[:k]))

        # 计算每个月的个股价格变化率(1+r)
        # z = []
        # for t in range(1, risk_ct.CMRA_CT.trailing+1):
        #     k = t * risk_ct.CMRA_CT.days_scale
        #     if k > len(df_secu_quote)-1:
        #         k = len(df_secu_quote)-1
        #         z.append(df_secu_quote.iloc[k]['close']/df_secu_quote.iloc[0]['close'])
        #         break
        #     else:
        #         z.append(df_secu_quote.iloc[k]['close']/df_secu_quote.iloc[0]['close'])
        # cmra = np.log(max(z)) - np.log(min(z))



        # 取得交易日序列
        trading_days = Utils.get_trading_days(end=calc_date, ndays=risk_ct.CMRA_CT.trailing*risk_ct.CMRA_CT.days_scale+1)
        trading_days = [day.strftime('%Y-%m-%d') for day in trading_days]
        # 取得个股复权行情数据
        df_secu_quote = Utils.get_secu_daily_mkt(code, end=calc_date, fq=True)
        # 提取相应交易日的个股复权行情数据
        df_secu_quote = df_secu_quote[df_secu_quote['date'].isin(trading_days)]
        df_secu_quote.reset_index(drop=True, inplace=True)
        # 计算个股每个月的个股价格变化率
        z = []
        if len(df_secu_quote) < int(risk_ct.CMRA_CT.trailing*risk_ct.CMRA_CT.days_scale/2):
            # 如果提取的个股复权行情长度小于所需时间长度的一半(126个交易日), 返回None
            return None
        else:
            prev_trading_day = df_secu_quote.iloc[0]['date']
            for t in range(1, risk_ct.CMRA_CT.trailing+1):
                k = t * risk_ct.CMRA_CT.days_scale
                trading_day = trading_days[k]
                if trading_day < df_secu_quote.iloc[0]['date']:
                    continue
                # try:
                secu_trading_day = df_secu_quote[df_secu_quote['date'] <= trading_day].iloc[-1]['date']
                if secu_trading_day <= prev_trading_day:
                    continue
                else:
                    ret = df_secu_quote[df_secu_quote['date']==secu_trading_day].iloc[0]['close']/df_secu_quote.iloc[0]['close']
                    z.append(ret)
                    prev_trading_day = secu_trading_day
                # except Exception as e:
                #     print(e)
            cmra = math.log(max(z)) - math.log(min(z))
        return pd.Series([Utils.code_to_symbol(code), cmra], index=['code', 'cmra'])