def func(omega): w = 0 for j in range(len(returns.columns)): ind = jq.get_industry(returns.columns[j], date="2021-04-12") if industries[i] == ind: w += omega[j] return w
def get_industry_by_security(security): """ stock所属版块行业信息 :param security: :return: """ d = jq.get_industry(security) return (d[security]['sw_l1']['industry_name'], d[security]['sw_l2']['industry_name'], d[security]['sw_l3']['industry_name'])
def get_day_industry(security, date): fields = ["sw_l1", "sw_l2", "sw_l3", "zjw", "jq_l2", "jq_l1"] data_dict = jq.get_industry(security, date=date) # 即使在不开盘的日期也能有返回值 if len(data_dict) == 0 or security not in data_dict: return pd.DataFrame({col: [] for col in fields}) data_df = pd.DataFrame(columns=fields) for field in fields: industry = "{}_{}".format(data_dict[security][field]["industry_code"], data_dict[security][field]["industry_name"]) data_df[field] = [industry] return data_df
def init_stock_industries(): stocks_sql = "select code from security" stock_codes = my.select_all(stocks_sql, ()) jq.login() stock_industry_list = [] for stock_code in stock_codes: code = stock_code['code'] data = sdk.get_industry(code, date='2020-10-30') print(data) stock_industry_data = data[code] if not bool(stock_industry_data): continue industry_zjw = stock_industry_data['zjw'] stock_industry_zjw = (code, 'zjw', industry_zjw['industry_code'], industry_zjw['industry_name']) stock_industry_list.append(stock_industry_zjw) has_sw_l1 = 'sw_l1' in stock_industry_data.keys() if has_sw_l1: industry_sw_l1 = stock_industry_data['sw_l1'] industry_sw_l2 = stock_industry_data['sw_l2'] stock_industry_sw_l1 = (code, 'sw_l1', industry_sw_l1['industry_code'], industry_sw_l1['industry_name']) stock_industry_sw_l2 = (code, 'sw_l2', industry_sw_l2['industry_code'], industry_sw_l2['industry_name']) stock_industry_list.append(stock_industry_sw_l1) stock_industry_list.append(stock_industry_sw_l2) has_jq_l1 = 'jq_l1' in stock_industry_data.keys() if has_jq_l1: industry_jq_l1 = stock_industry_data['jq_l1'] stock_industry_jq_l1 = (code, 'jq_l1', industry_jq_l1['industry_code'], industry_jq_l1['industry_name']) stock_industry_list.append(stock_industry_jq_l1) has_jq_l2 = 'jq_l2' in stock_industry_data.keys() if has_jq_l2: industry_jq_l2 = stock_industry_data['jq_l2'] stock_industry_jq_l2 = (code, 'jq_l2', industry_jq_l2['industry_code'], industry_jq_l2['industry_name']) stock_industry_list.append(stock_industry_jq_l2) insert_sql = "insert into stock_industry(code, type, industry_code, industry_name) values (%s, %s, %s, %s)" my.insert_many(insert_sql, stock_industry_list)
def normlizeCode(codes): #from e.g. 000906.XSHG to 000906 which is the code format in quantaxis #return ['000001','000003',....] zz800_industry = get_industry(codes, date=datetime.strftime( datetime.today(), '%Y-%m-%d')) zz800_industry = pd.DataFrame(zz800_industry).T[[industry_type]] zz800_industry[industry_type] = zz800_industry[industry_type].apply( lambda x: x['industry_name']) normCode = [] for i in codes: normCode.append([i[:6], i, zz800_industry.loc[i][0]]) return normCode
def get_securities_day_industry(security_list, date): # 申万一级行业、申万二级行业、申万三级行业、聚宽一级行业、聚宽二级行业、证监会行业 name_list = ["sw_l1", "sw_l2", "sw_l3", "jq_l1", "jq_l2", "zjw"] data_dict = jq.get_industry(security_list, date=date) data_df_list = [] for security in data_dict: industry_dict = {"security": security} if len(data_dict[security]) == 0: continue for name in name_list: if name in data_dict[security]: industry_dict[name] = data_dict[security][name]["industry_name"] else: industry_dict[name] = "None" temp_df = pd.DataFrame(industry_dict, index=[0]) data_df_list.append(temp_df) if not data_df_list: return pd.DataFrame({name: [] for name in ["security"] + name_list + ["date"]}) concat_df = pd.concat(data_df_list, axis=0) result_df = concat_df.sort_values(by=["security"], ascending=True).reset_index(drop=True) result_df["date"] = date return result_df
def QA_fetch_get_factor_groupby(factor: pd.Series, industry_cls: str = "sw_l1", detailed: bool = False) -> pd.DataFrame: """ 获取因子的行业暴露, 注意,返回的值是 pd.DataFrame 格式,包含原因子值,附加一列 因子对应的行业信息 (需先自行导入聚宽本地 sdk 并登陆) 参数 --- :param factor: 因子值,索引为 ['日期' '资产'] :param industry_cls: 行业分类,默认为申万 1 级行业 :param detailed: 是否使用详细模式,默认为 False, 即取因子日期最后一日的行业信息 返回值 --- :return: 因子数据, 包括因子值,因子对应行业 """ warnings.warn("请先自行导入聚宽本地 sdk 并登陆", UserWarning) # 因子格式化 factor = QA_fmt_factor(factor) merged_data = pd.DataFrame(factor.copy().rename("factor")) # 股票代码格式化 stock_list = QA_fmt_code_list( factor.index.get_level_values("code").drop_duplicates(), style="jq") # 非详细模式, 行业数据采用当前日期 if detailed: # start_time = str(min(factor.index.get_level_values("datetime")))[:10] # end_time = str(max(factor.index.get_level_values("datetime")))[:10] # date_range = list( # map(pd.Timestamp, QA_util_get_trade_range(start_time, end_time)) # ) date_range = (factor.index.get_level_values( "datetime").drop_duplicates().tolist()) df_local = pd.DataFrame() industries = map(partial(jqdatasdk.get_industry, stock_list), date_range) industries = { d: { s: ind.get(s).get(industry_cls, dict()).get("industry_name", "NA") for s in stock_list } for d, ind in zip(date_range, industries) } else: end_time = str(max(factor.index.get_level_values("datetime")))[:10] date_range = [pd.Timestamp(end_time)] industries = jqdatasdk.get_industry(stock_list, end_time) industries = { d: { s: industries.get(s).get(industry_cls, dict()).get("industry_name", "NA") for s in stock_list } for d in date_range } # 可能历史上股票没有行业信息,用之后的行业信息往前填充 df_local = pd.DataFrame(industries).T.sort_index() df_local.columns = df_local.columns.map(str).str.slice(0, 6) ss_local = df_local.stack(level=-1) ss_local.index.names = ["date", "code"] merged_data["date"] = merged_data.index.get_level_values("datetime").map( lambda x: x.date()) merged_data = (merged_data.reset_index().set_index([ "date", "code" ]).assign(group=ss_local).reset_index().set_index(["datetime", "code"]).drop("date", axis=1)) group = merged_data["group"].unstack().bfill().stack() merged_data["group"] = group return merged_data
matplotlib.rc("font", **font) from datetime import datetime import seaborn as sns # 指数 index_code = '000906.XSHG' # 行业分类标准 industry_type = 'sw_l1' # 宽度日期 market_breadth_days = 30 # 获取每个行业 以及 获取每个行业包含中证800的成分股的个数 zz800 = get_index_stocks('000906.XSHG') zz800_industry = get_industry(zz800, date=datetime.strftime(datetime.today(), '%Y-%m-%d')) zz800_industry = pd.DataFrame(zz800_industry).T[[industry_type]] zz800_industry[industry_type] = zz800_industry[industry_type].apply(lambda x: x['industry_name']) industries = zz800_industry[industry_type] industries = list(set(industries)) industries.sort() # 创建表格 列是日期,行是行业,最后需要转置 market_breadth = pd.DataFrame(index=(['zz800'] + industries)) # 根据交易日来计算宽度 # 最近80天交易日 # 然后倒序[::-1] # 最后取出80天[:80] trade_days = get_trade_days(start_date='2015-01-01', end_date=None)[::-1][:market_breadth_days]
import sys import os root_path = os.path.dirname(os.path.realpath(__file__)) + '/../' sys.path.append(root_path + 'scripts/') import utils import utils_jq import jqdatasdk if len(sys.argv) < 3: print("usage: ./dump_ind.py date cn") quit() d = sys.argv[1] country = sys.argv[2] if not utils.is_bday(d, country): print('{} is {} holiday'.format(d, country)) quit() file_path = root_path + 'data/industry/{}/'.format(country) os.makedirs(file_path, 0o777, exist_ok=True) stocks = utils_jq.get_universe(utils.date_str(d)) t = jqdatasdk.get_industry(stocks, date=utils.date_str(d)) t = pd.DataFrame([ dict( zip(['ric', 'date'] + list(t[r].keys()), [r, d] + [x['industry_code'] for x in t[r].values()])) for r in t.keys() ]) if 0 == t.size: quit() print(file_path + '{}.txt'.format(d)) t.to_csv(file_path + '{}.txt'.format(d), sep='\t', index=False)
def get_instrument_industry(self, order_book_ids, dt=None): industry = get_industry(security=order_book_ids, date=dt) data_df = parse_industry_code_dict(industry) return data_df
def getindustry(self, code, tday): return jq.get_industry(code, tday)