def industry_neutral(factor,data_source, config): """行业中性化方式 每一个时间截面按照不同行业进行分层,每一层再进行打分 """ N = config.total_groups industry_name = config.neutral_industry_used industry_axe = parse_industry(industry_name) data = factor.data is_ascending = True if factor.direction == -1 else False all_ids = data.index.levels[1].unique().tolist() all_dates = data.index.levels[0].tolist() industry_info = data_source.sector.get_stock_industry_info(ids=all_ids,industry=industry_name, dates=all_dates) common = data.merge(industry_info, left_index=True, right_index=True, how='left') group_info_list = [] for date in all_dates: section_factor = common.ix[date] group_id = section_factor.groupby( industry_axe,group_keys=False).apply(_add_group_id,factor_name=factor.axe, n_groups=N,ascending=is_ascending) idx = pd.MultiIndex.from_product([[date], group_id.index]) group_id.index = idx group_info_list.append(group_id) grouping_info = pd.concat(group_info_list) grouping_info.index.names = ['date','IDs'] factor.grouping_info['industry_neutral'] = grouping_info
def get_stock_industry_info(self, ids, industry='中信一级', start_date=None, end_date=None, dates=None): """股票行业信息""" dates = self.trade_calendar.get_trade_days( start_date, end_date) if dates is None else dates if not isinstance(dates, list): dates = [dates] symbol = parse_industry(industry) industry_info = self.h5DB.load_factor(symbol, '/indexes/', ids=ids, dates=dates) return get_industry_names(symbol, industry_info)
def score_by_industry(factor_data, industry_name, factor_names=None, **kwargs): factor_names = factor_names if factor_names is not None else list( factor_data.columns) industry_str = parse_industry(industry_name) all_ids = factor_data.index.get_level_values(1).unique().tolist() all_dates = factor_data.index.get_level_values(0).unique().tolist() # 个股的行业信息与因子数据匹配 industry_info = data_source.sector.get_stock_industry_info( all_ids, industry=industry_name, dates=all_dates).reset_index() factor_data = factor_data.reset_index() factor_data = pd.merge(factor_data, industry_info, how='left') score = factor_data.set_index(['date', 'IDs']).groupby( industry_str, group_keys=False).apply(ScoringFactors, factors=factor_names, **kwargs) return score
def get_index_industry_weight(self, ids, industry_name='中信一级', start_date=None, end_date=None, dates=None): """获取指数的行业权重""" symbol = parse_industry(industry_name) dates = self.trade_calendar.get_trade_days( start_date, end_date) if dates is None else dates index_weight = self.get_index_weight(ids, dates=dates) all_stocks = list(index_weight.index.levels[1].unique()) industry_data = self.get_stock_industry_info(all_stocks, industry=industry_name, dates=dates) common = pd.concat([index_weight, industry_data], join='inner', axis=1) index_industry_weight = common.reset_index().groupby( ['date', symbol])[index_weight.columns[0]].sum() return index_industry_weight
def industry_neutral(factor, **kwargs): """行业中性化方式 每一个时间截面按照不同行业进行分层,每一层再进行打分 """ N = kwargs['total_groups'] industry_name = kwargs['industry_name'] industry_axe = parse_industry(industry_name) data = factor.data is_ascending = True if factor.direction == -1 else False data_source = kwargs['env']._data_source all_ids = data.index.get_level_values(1).unique().tolist() all_dates = data.index.get_level_values(0).unique().tolist() industry_info = data_source.sector.get_stock_industry_info( ids=all_ids, industry=industry_name, dates=all_dates) common = data.merge(industry_info, left_index=True, right_index=True, how='left') group_info_list = [] for date in all_dates: section_factor = common.ix[date] group_id = section_factor.groupby( industry_axe, group_keys=False).apply(_add_group_id, factor_name=factor.name, n_groups=N, ascending=is_ascending) idx = pd.MultiIndex.from_product([[date], group_id.index]) group_id.index = idx group_info_list.append(group_id) grouping_info = pd.concat(group_info_list) grouping_info.index.names = ['date', 'IDs'] grouping_info.columns = pd.MultiIndex.from_product([['industry_neutral'], grouping_info.columns]) factor.group_info.from_frame(grouping_info)
def typical(factor, name, direction=None, industry_neutral=True, benchmark=None, industry_name='中信一级', prc=0.05, **kwargs): factor_data = factor.reset_index() prc = 1 - prc if (direction == 1 or direction is None) else prc if industry_neutral: industry_str = parse_industry(industry_name) all_ids = factor_data['IDs'].unique().tolist() all_dates = factor_data['date'].unique().tolist() # 个股的行业信息与因子数据匹配 industry_info = data_source.sector.get_stock_industry_info( all_ids, industry=industry_name, dates=all_dates).reset_index() factor_data = pd.merge(factor_data, industry_info, how='left') quantile_per_industry = factor_data.groupby(['date', industry_str ])[name].quantile(prc) quantile_per_industry.name = 'quantile_value' factor_data = factor_data.join(quantile_per_industry, on=['date', industry_str], how='left') # 股票选择,stocks=DataFrame[日期 IDs 因子值 行业 行业分位数] if direction == 1: stocks = factor_data[ factor_data[name] >= factor_data['quantile_value']] else: stocks = factor_data[ factor_data[name] <= factor_data['quantile_value']] # 配置权重 benchmark_weight = data_source.sector.get_index_industry_weight( benchmark, industry_name=industry_name, dates=all_dates) # 基准指数的行业权重 stock_counts_per_industry = stocks.groupby(['date', industry_str ])['IDs'].count() weight = (benchmark_weight / stock_counts_per_industry).rename('Weight') stocks = stocks.join(weight, on=['date', industry_str], how='left').set_index(['date', 'IDs'])[['Weight']] stocks.dropna(inplace=True) sum_weight = stocks.groupby(level=0)['Weight'].sum() stocks['Weight'] = stocks['Weight'] / sum_weight else: quantile_per_date = factor_data.groupby('date')[name].quantile(prc) quantile_per_date.name = 'quantile_value' factor_data = factor_data.join(quantile_per_date, on='date', how='left') # 股票选择,stocks=DataFrame[日期 IDs 因子值 分位数] if direction == 1: stocks = factor_data[ factor_data[name] >= factor_data['quantile_value']] else: stocks = factor_data[ factor_data[name] <= factor_data['quantile_value']] stock_counts_per_date = stocks.groupby('date')['IDs'].count() stocks = stocks.join(1 / stock_counts_per_date.rename('Weight'), on='date').set_index(['date', 'IDs']) return stocks[['Weight']]
def typical(factor, industry_neutral=True, industry_name='中信一级', prc=0.05, **kwargs): factor_data = factor.data.reset_index() prc = 1 - prc if factor.direction == 1 else prc if industry_neutral: industry_str = parse_industry(industry_name) all_ids = factor_data['IDs'].unique().tolist() all_dates = factor_data['date'].unique().tolist() benchmark = kwargs['env']._config.benchmark # 个股的行业信息与因子数据匹配 industry_info = kwargs[ 'env']._data_source.sector.get_stock_industry_info( all_ids, industry=industry_name, dates=all_dates).reset_index() factor_data = pd.merge(factor_data, industry_info, how='left') quantile_per_industry = factor_data.groupby( ['date', industry_str])[factor.name].quantile(prc) quantile_per_industry.name = 'quantile_value' factor_data = factor_data.join(quantile_per_industry, on=['date', industry_str], how='left') # 股票选择,stocks=DataFrame[日期 IDs 因子值 行业 行业分位数] if factor.direction == 1: stocks = factor_data[ factor_data[factor.name] >= factor_data['quantile_value']] else: stocks = factor_data[ factor_data[factor.name] <= factor_data['quantile_value']] # 配置权重 benchmark_weight = kwargs[ 'env']._data_source.sector.get_index_industry_weight( benchmark, industry_name=industry_name, dates=all_dates) # 基准指数的行业权重 stock_counts_per_industry = stocks.groupby(['date', industry_str ])['IDs'].count() weight = (benchmark_weight / stock_counts_per_industry).rename('Weight') stocks = stocks.join(weight, on=['date', industry_str], how='left').set_index(['date', 'IDs'])[['Weight']] stocks.dropna(inplace=True) sum_weight = stocks.groupby(level=0)['Weight'].sum() stocks['Weight'] = stocks['Weight'] / sum_weight else: quantile_per_date = factor_data.groupby('date')[factor.name].quantile( prc) quantile_per_date.name = 'quantile_value' factor_data = factor_data.join(quantile_per_date, on='date', how='left') # 股票选择,stocks=DataFrame[日期 IDs 因子值 分位数] if factor.direction == 1: stocks = factor_data[ factor_data[factor.name] >= factor_data['quantile_value']] else: stocks = factor_data[ factor_data[factor.name] <= factor_data['quantile_value']] stock_counts_per_date = stocks.groupby('date')['IDs'].count() stocks = stocks.set_index(['date', 'IDs']) stocks['Weight'] = 1 / stock_counts_per_date if 'typical' not in factor.stock_list: factor.stock_list['typical'] = pd.DataFrame() factor_stock_list = factor.stock_list['typical'].append(stocks[['Weight']]) factor_stock_list = factor_stock_list[~factor_stock_list.index.duplicated( keep='last')] factor.stock_list['typical'] = factor_stock_list