Exemple #1
0
def industry_neutral(factor,data_source, config):
    """行业中性化方式
    每一个时间截面按照不同行业进行分层,每一层再进行打分
    """
    N = config.total_groups
    industry_name = config.neutral_industry_used
    industry_axe = parse_industry(industry_name)
    data = factor.data
    is_ascending = True if factor.direction == -1 else False

    all_ids = data.index.levels[1].unique().tolist()
    all_dates = data.index.levels[0].tolist()
    industry_info = data_source.sector.get_stock_industry_info(ids=all_ids,industry=industry_name,
                                                               dates=all_dates)
    common = data.merge(industry_info, left_index=True, right_index=True, how='left')

    group_info_list = []
    for date in all_dates:
        section_factor = common.ix[date]
        group_id = section_factor.groupby(
            industry_axe,group_keys=False).apply(_add_group_id,factor_name=factor.axe,
                                                 n_groups=N,ascending=is_ascending)
        idx = pd.MultiIndex.from_product([[date], group_id.index])
        group_id.index = idx
        group_info_list.append(group_id)
    grouping_info = pd.concat(group_info_list)
    grouping_info.index.names = ['date','IDs']
    factor.grouping_info['industry_neutral'] = grouping_info
 def get_stock_industry_info(self,
                             ids,
                             industry='中信一级',
                             start_date=None,
                             end_date=None,
                             dates=None):
     """股票行业信息"""
     dates = self.trade_calendar.get_trade_days(
         start_date, end_date) if dates is None else dates
     if not isinstance(dates, list):
         dates = [dates]
     symbol = parse_industry(industry)
     industry_info = self.h5DB.load_factor(symbol,
                                           '/indexes/',
                                           ids=ids,
                                           dates=dates)
     return get_industry_names(symbol, industry_info)
Exemple #3
0
def score_by_industry(factor_data, industry_name, factor_names=None, **kwargs):
    factor_names = factor_names if factor_names is not None else list(
        factor_data.columns)
    industry_str = parse_industry(industry_name)
    all_ids = factor_data.index.get_level_values(1).unique().tolist()
    all_dates = factor_data.index.get_level_values(0).unique().tolist()

    # 个股的行业信息与因子数据匹配
    industry_info = data_source.sector.get_stock_industry_info(
        all_ids, industry=industry_name, dates=all_dates).reset_index()
    factor_data = factor_data.reset_index()
    factor_data = pd.merge(factor_data, industry_info, how='left')
    score = factor_data.set_index(['date', 'IDs']).groupby(
        industry_str, group_keys=False).apply(ScoringFactors,
                                              factors=factor_names,
                                              **kwargs)
    return score
 def get_index_industry_weight(self,
                               ids,
                               industry_name='中信一级',
                               start_date=None,
                               end_date=None,
                               dates=None):
     """获取指数的行业权重"""
     symbol = parse_industry(industry_name)
     dates = self.trade_calendar.get_trade_days(
         start_date, end_date) if dates is None else dates
     index_weight = self.get_index_weight(ids, dates=dates)
     all_stocks = list(index_weight.index.levels[1].unique())
     industry_data = self.get_stock_industry_info(all_stocks,
                                                  industry=industry_name,
                                                  dates=dates)
     common = pd.concat([index_weight, industry_data], join='inner', axis=1)
     index_industry_weight = common.reset_index().groupby(
         ['date', symbol])[index_weight.columns[0]].sum()
     return index_industry_weight
Exemple #5
0
def industry_neutral(factor, **kwargs):
    """行业中性化方式
    每一个时间截面按照不同行业进行分层,每一层再进行打分
    """

    N = kwargs['total_groups']
    industry_name = kwargs['industry_name']
    industry_axe = parse_industry(industry_name)
    data = factor.data
    is_ascending = True if factor.direction == -1 else False
    data_source = kwargs['env']._data_source

    all_ids = data.index.get_level_values(1).unique().tolist()
    all_dates = data.index.get_level_values(0).unique().tolist()
    industry_info = data_source.sector.get_stock_industry_info(
        ids=all_ids, industry=industry_name, dates=all_dates)
    common = data.merge(industry_info,
                        left_index=True,
                        right_index=True,
                        how='left')

    group_info_list = []
    for date in all_dates:
        section_factor = common.ix[date]
        group_id = section_factor.groupby(
            industry_axe, group_keys=False).apply(_add_group_id,
                                                  factor_name=factor.name,
                                                  n_groups=N,
                                                  ascending=is_ascending)
        idx = pd.MultiIndex.from_product([[date], group_id.index])
        group_id.index = idx
        group_info_list.append(group_id)
    grouping_info = pd.concat(group_info_list)
    grouping_info.index.names = ['date', 'IDs']
    grouping_info.columns = pd.MultiIndex.from_product([['industry_neutral'],
                                                        grouping_info.columns])
    factor.group_info.from_frame(grouping_info)
Exemple #6
0
def typical(factor,
            name,
            direction=None,
            industry_neutral=True,
            benchmark=None,
            industry_name='中信一级',
            prc=0.05,
            **kwargs):
    factor_data = factor.reset_index()
    prc = 1 - prc if (direction == 1 or direction is None) else prc
    if industry_neutral:
        industry_str = parse_industry(industry_name)

        all_ids = factor_data['IDs'].unique().tolist()
        all_dates = factor_data['date'].unique().tolist()

        # 个股的行业信息与因子数据匹配
        industry_info = data_source.sector.get_stock_industry_info(
            all_ids, industry=industry_name, dates=all_dates).reset_index()
        factor_data = pd.merge(factor_data, industry_info, how='left')
        quantile_per_industry = factor_data.groupby(['date', industry_str
                                                     ])[name].quantile(prc)
        quantile_per_industry.name = 'quantile_value'
        factor_data = factor_data.join(quantile_per_industry,
                                       on=['date', industry_str],
                                       how='left')

        # 股票选择,stocks=DataFrame[日期 IDs 因子值 行业 行业分位数]
        if direction == 1:
            stocks = factor_data[
                factor_data[name] >= factor_data['quantile_value']]
        else:
            stocks = factor_data[
                factor_data[name] <= factor_data['quantile_value']]

        # 配置权重
        benchmark_weight = data_source.sector.get_index_industry_weight(
            benchmark, industry_name=industry_name,
            dates=all_dates)  # 基准指数的行业权重
        stock_counts_per_industry = stocks.groupby(['date', industry_str
                                                    ])['IDs'].count()
        weight = (benchmark_weight /
                  stock_counts_per_industry).rename('Weight')
        stocks = stocks.join(weight, on=['date', industry_str],
                             how='left').set_index(['date', 'IDs'])[['Weight']]
        stocks.dropna(inplace=True)
        sum_weight = stocks.groupby(level=0)['Weight'].sum()
        stocks['Weight'] = stocks['Weight'] / sum_weight
    else:
        quantile_per_date = factor_data.groupby('date')[name].quantile(prc)
        quantile_per_date.name = 'quantile_value'
        factor_data = factor_data.join(quantile_per_date,
                                       on='date',
                                       how='left')

        # 股票选择,stocks=DataFrame[日期 IDs 因子值 分位数]
        if direction == 1:
            stocks = factor_data[
                factor_data[name] >= factor_data['quantile_value']]
        else:
            stocks = factor_data[
                factor_data[name] <= factor_data['quantile_value']]
        stock_counts_per_date = stocks.groupby('date')['IDs'].count()
        stocks = stocks.join(1 / stock_counts_per_date.rename('Weight'),
                             on='date').set_index(['date', 'IDs'])
    return stocks[['Weight']]
Exemple #7
0
def typical(factor,
            industry_neutral=True,
            industry_name='中信一级',
            prc=0.05,
            **kwargs):
    factor_data = factor.data.reset_index()
    prc = 1 - prc if factor.direction == 1 else prc
    if industry_neutral:
        industry_str = parse_industry(industry_name)

        all_ids = factor_data['IDs'].unique().tolist()
        all_dates = factor_data['date'].unique().tolist()

        benchmark = kwargs['env']._config.benchmark

        # 个股的行业信息与因子数据匹配
        industry_info = kwargs[
            'env']._data_source.sector.get_stock_industry_info(
                all_ids, industry=industry_name,
                dates=all_dates).reset_index()
        factor_data = pd.merge(factor_data, industry_info, how='left')
        quantile_per_industry = factor_data.groupby(
            ['date', industry_str])[factor.name].quantile(prc)
        quantile_per_industry.name = 'quantile_value'
        factor_data = factor_data.join(quantile_per_industry,
                                       on=['date', industry_str],
                                       how='left')

        # 股票选择,stocks=DataFrame[日期 IDs 因子值 行业 行业分位数]
        if factor.direction == 1:
            stocks = factor_data[
                factor_data[factor.name] >= factor_data['quantile_value']]
        else:
            stocks = factor_data[
                factor_data[factor.name] <= factor_data['quantile_value']]

        # 配置权重
        benchmark_weight = kwargs[
            'env']._data_source.sector.get_index_industry_weight(
                benchmark, industry_name=industry_name,
                dates=all_dates)  # 基准指数的行业权重
        stock_counts_per_industry = stocks.groupby(['date', industry_str
                                                    ])['IDs'].count()
        weight = (benchmark_weight /
                  stock_counts_per_industry).rename('Weight')
        stocks = stocks.join(weight, on=['date', industry_str],
                             how='left').set_index(['date', 'IDs'])[['Weight']]
        stocks.dropna(inplace=True)
        sum_weight = stocks.groupby(level=0)['Weight'].sum()
        stocks['Weight'] = stocks['Weight'] / sum_weight
    else:
        quantile_per_date = factor_data.groupby('date')[factor.name].quantile(
            prc)
        quantile_per_date.name = 'quantile_value'
        factor_data = factor_data.join(quantile_per_date,
                                       on='date',
                                       how='left')

        # 股票选择,stocks=DataFrame[日期 IDs 因子值 分位数]
        if factor.direction == 1:
            stocks = factor_data[
                factor_data[factor.name] >= factor_data['quantile_value']]
        else:
            stocks = factor_data[
                factor_data[factor.name] <= factor_data['quantile_value']]
        stock_counts_per_date = stocks.groupby('date')['IDs'].count()
        stocks = stocks.set_index(['date', 'IDs'])
        stocks['Weight'] = 1 / stock_counts_per_date
    if 'typical' not in factor.stock_list:
        factor.stock_list['typical'] = pd.DataFrame()
    factor_stock_list = factor.stock_list['typical'].append(stocks[['Weight']])
    factor_stock_list = factor_stock_list[~factor_stock_list.index.duplicated(
        keep='last')]
    factor.stock_list['typical'] = factor_stock_list