def get_shenwan_industry_label(stock_list, date): industry_classification = rqdatac.shenwan_instrument_industry( stock_list, date) industry_classification_missing_stocks = list( set(stock_list) - set(industry_classification.index.tolist())) # 当传入股票过多时,对于缺失行业标记的股票,RQD 目前不会向前搜索,因此需要循环单个传入查找这些股票的行业标记 if len(industry_classification_missing_stocks) != 0: for stock in industry_classification_missing_stocks: missing_industry_classification = rqdatac.shenwan_instrument_industry( stock, date) if missing_industry_classification != None: industry_classification = industry_classification.append( pd.Series([ missing_industry_classification[0], missing_industry_classification[1] ], index=['index_code', 'index_name'], name=stock)) return industry_classification['index_name']
def get_shenwan_industry_exposure(stock_list, date): industry_classification = rqdatac.shenwan_instrument_industry( stock_list, date) if date > '2014-01-01': shenwan_industry_name = ['农林牧渔', '采掘', '化工', '钢铁', '有色金属', '电子', '家用电器', '食品饮料', '纺织服装', '轻工制造',\ '医药生物', '公用事业', '交通运输', '房地产', '商业贸易', '休闲服务','综合', '建筑材料', '建筑装饰', '电气设备',\ '国防军工', '计算机', '传媒', '通信', '银行', '非银金融', '汽车', '机械设备'] else: shenwan_industry_name = [ '金融服务', '房地产', '医药生物', '有色金属', '餐饮旅游', '综合', '建筑建材', '家用电器', '交运设备', '食品饮料', '电子', '信息设备', '交通运输', '轻工制造', '公用事业', '机械设备', '纺织服装', '农林牧渔', '商业贸易', '化工', '信息服务', '采掘', '黑色金属' ] industry_exposure_df = pd.DataFrame(0, index=industry_classification.index, columns=shenwan_industry_name) for industry in shenwan_industry_name: industry_exposure_df.loc[industry_classification[ industry_classification['index_name'] == industry].index, industry] = 1 return industry_exposure_df.index.tolist(), industry_exposure_df
def get_industry_matching(clean_order_book_ids, matching_date, matching_index='000300.XSHG'): """ if specified industry matching, match unselected industry by matching_index :param clean_order_book_ids: :param matching_date: :param matching_index: :return: """ # get clean_order_book_ids industry selected_df = pd.DataFrame(index=clean_order_book_ids) selected_df['industry'] = [ rqdatac.shenwan_instrument_industry(s, matching_date) for s in selected_df.index ] selected_industry = set(selected_df.industry) # get matching_index industry and get unselected industry weight index_industry = get_index_component_industry_and_marketcap( matching_index, matching_date) match_industry = index_industry.loc[~index_industry.industry. isin(selected_industry)] industry_matching_weight = match_industry.percent # get how much weights left for optimizer optimizer_total_weight = 1 - sum(industry_matching_weight) return optimizer_total_weight, industry_matching_weight
def to_constraint(self, order_book_ids, date): shenwan_data = pd.DataFrame( rqdatac.shenwan_instrument_industry(order_book_ids, date)['index_name']) shenwan_data = shenwan_data.reset_index() shenwan_data['values'] = 1 dummy_variables = shenwan_data.pivot(columns="index_name", index="index", values="values").replace( np.nan, 0) return _to_constraint2(dummy_variables, self._rules), set(order_book_ids) - set( dummy_variables.index.tolist())
def get_index_component_industry_and_marketcap(matching_index, matching_date): """ get matching_index industry and market_cap :param matching_index: :param matching_date: :return: """ # get index components, industry i_c = rqdatac.index_components(matching_index, matching_date) matching_index_df = pd.DataFrame(index=i_c) matching_index_df['industry'] = [ rqdatac.shenwan_instrument_industry(s, matching_date) for s in matching_index_df.index ] # get index market_cap market_cap = rqdatac.get_fundamentals(query( fundamentals.eod_derivative_indicator.market_cap).filter( fundamentals.eod_derivative_indicator.stockcode.in_( matching_index_df.index)), entry_date=matching_date) market_cap_ = market_cap.loc[market_cap.items[0]].transpose() # paste them as one df matching_index_cap_df = pd.concat([matching_index_df, market_cap_], axis=1) # change the column name matching_index_cap_df.columns.values[1] = 'market_cap' # calculate each component's percent by its market_cap total_market_cap = sum(matching_index_cap_df.market_cap) matching_index_cap_df[ 'percent'] = matching_index_cap_df.market_cap / total_market_cap # sort them by industry and market_cap res = matching_index_cap_df.sort_values(['industry', 'market_cap']) return res
def get_shenwan_industry_exposure(stock_list, date): industry_classification = rqdatac.shenwan_instrument_industry( stock_list, date) industry_classification_missing_stocks = list( set(stock_list) - set(industry_classification.index.tolist())) # 当传入股票过多时,对于缺失行业标记的股票,RQD 目前不会向前搜索,因此需要循环单个传入查找这些股票的行业标记 if len(industry_classification_missing_stocks) != 0: print(date, 'industry_classification_missing_stocks', industry_classification_missing_stocks) for stock in industry_classification_missing_stocks: missing_industry_classification = rqdatac.shenwan_instrument_industry( stock, date) if missing_industry_classification != None: industry_classification = industry_classification.append( pd.Series([ missing_industry_classification[0], missing_industry_classification[1] ], index=['index_code', 'index_name'], name=stock)) if date > '2014-01-01': shenwan_industry_name = ['农林牧渔', '采掘', '化工', '钢铁', '有色金属', '电子', '家用电器', '食品饮料', '纺织服装', '轻工制造',\ '医药生物', '公用事业', '交通运输', '房地产', '商业贸易', '休闲服务','综合', '建筑材料', '建筑装饰', '电气设备',\ '国防军工', '计算机', '传媒', '通信', '银行', '非银金融', '汽车', '机械设备'] else: shenwan_industry_name = [ '金融服务', '房地产', '医药生物', '有色金属', '餐饮旅游', '综合', '建筑建材', '家用电器', '交运设备', '食品饮料', '电子', '信息设备', '交通运输', '轻工制造', '公用事业', '机械设备', '纺织服装', '农林牧渔', '商业贸易', '化工', '信息服务', '采掘', '黑色金属' ] # 在 stock_list 中仅有一个股票的情况下,返回格式为 tuple if isinstance(industry_classification, tuple): industry_name = industry_classification[1] industry_exposure_df = pd.DataFrame(0, index=shenwan_industry_name, columns=stock_list).T industry_exposure_df[industry_name] = 1 else: industry_exposure_df = pd.DataFrame( 0, index=industry_classification.index, columns=shenwan_industry_name) for industry in shenwan_industry_name: industry_exposure_df.loc[industry_classification[ industry_classification['index_name'] == industry].index, industry] = 1 return industry_exposure_df.index.tolist(), industry_exposure_df
def get_industry(universe, date): return (rqd.shenwan_instrument_industry( universe, date).loc[:, 'index_name'].rename('industry'))