def get_czce_rank_table(date=None, vars_list=cons.contract_symbols): """ 郑州商品交易所前 20 会员持仓排名数据明细 注:该交易所既公布了品种排名, 也公布了标的排名 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20050509开始,每交易日16:30左右更新数据 :return: pd.DataFrame rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_open_interest 该会员持多单 int long_open_interest_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_open_interest 该会员持空单 int short_open_interest_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ date = cons.convert_date( date) if date is not None else datetime.date.today() if date < datetime.date(2005, 5, 9): print("czce数据源开始日期为20050509,跳过") return {} if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return {} if date <= datetime.date(2010, 8, 25): url = cons.CZCE_VOL_RANK_URL_1 % (date.strftime('%Y%m%d')) data = _czce_df_read(url, skip_rows=0) r = requests_link(url, 'utf-8') r.encoding = 'utf-8' soup = BeautifulSoup(r.text, 'lxml', from_encoding="gb2312") symbols = [] for link in soup.find_all('b'): strings = (str(link).split(' ')) if len(strings) > 5: try: symbol = chinese_to_english(strings[4]) except: symbol = strings[4] symbols.append(symbol) big_dict = {} for i in range(len(symbols)): symbol = symbols[i] table_cut = data[i + 2] table_cut.columns = rank_columns table_cut = table_cut.iloc[:-1, :] table_cut.loc[:, 'rank'] = table_cut.index table_cut.loc['合计', 'rank'] = 999 table_cut.loc[ '合计', ['vol_party_name', 'long_party_name', 'short_party_name' ]] = None table_cut.loc[:, 'symbol'] = symbol table_cut.loc[:, 'variety'] = symbol_varieties(symbol) table_cut[intColumns] = table_cut[intColumns].fillna(0) table_cut[intColumns] = table_cut[intColumns].astype(str) table_cut[intColumns] = table_cut[intColumns].applymap( lambda x: x.replace(',', '')) table_cut = table_cut.applymap(lambda x: 0 if x == '-' else x) table_cut[intColumns] = table_cut[intColumns].astype(float) table_cut[intColumns] = table_cut[intColumns].astype(int) big_dict[symbol] = table_cut.reset_index(drop=True) return big_dict elif date <= datetime.date(2015, 11, 11): url = cons.CZCE_VOL_RANK_URL_2 % (date.year, date.strftime('%Y%m%d')) data = _czce_df_read(url, skip_rows=1)[1] elif date < datetime.date(2017, 12, 28): url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d')) data = _czce_df_read(url, skip_rows=1)[0] else: url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d')) data = _czce_df_read(url, skip_rows=0)[0] if len(data.columns) < 6: return {} table = pd.DataFrame(data.iloc[:, :9]) table.columns = rank_columns table.loc[:, 'rank'] = table.index table[intColumns] = table[intColumns].astype(str) table[intColumns] = table[intColumns].applymap( lambda x: x.replace(',', '')) table = table.applymap(lambda x: 0 if x == '-' else x) indexes = [i for i in table.index if '合约' in i or '品种' in i] indexes.insert(0, 0) big_dict = {} for i in range(len(indexes)): if indexes[i] == 0: table_cut = table.loc[:indexes[i + 1], :] string = table_cut.index.name elif i < len(indexes) - 1: table_cut = table.loc[indexes[i]:indexes[i + 1], :] string = table_cut.index[0] else: table_cut = table.loc[indexes[i]:, :] string = table_cut.index[0] if 'PTA' in string: symbol = 'TA' else: try: symbol = chinese_to_english( find_chinese(re.compile(r':(.*) ').findall(string)[0])) except: symbol = re.compile(r':(.*) ').findall(string)[0] var = symbol_varieties(symbol) if var in vars_list: table_cut = table_cut.dropna(how='any').iloc[1:, :] table_cut = table_cut.loc[[ x for x in table_cut.index if x in [str(i) for i in range(21)] ], :] table_cut = _table_cut_cal(table_cut, symbol) big_dict[symbol] = table_cut.reset_index(drop=True) return big_dict
def get_czce_rank_table(date="20081015", vars_list=cons.contract_symbols): """ 郑州商品交易所前 20 会员持仓排名数据明细 注:该交易所既公布了品种排名, 也公布了标的排名 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20050509开始,每交易日16:30左右更新数据 :return: pd.DataFrame rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_open_interest 该会员持多单 int long_open_interest_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_open_interest 该会员持空单 int short_open_interest_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ date = cons.convert_date( date) if date is not None else datetime.date.today() if date < datetime.date(2005, 5, 9): print("czce数据源开始日期为20050509,跳过") return {} if date.strftime("%Y%m%d") not in calendar: warnings.warn("%s非交易日" % date.strftime("%Y%m%d")) return {} if date <= datetime.date(2010, 8, 25): url = cons.CZCE_VOL_RANK_URL_1 % (date.strftime("%Y%m%d")) r = requests.get(url) r.encoding = "utf-8" soup = BeautifulSoup(r.text, "lxml") data = _czce_df_read(url, skip_rows=0) r = requests_link(url, "utf-8") r.encoding = "utf-8" symbols = [] for link in soup.find_all("b"): strings = str(link).split(" ") if len(strings) > 5: try: symbol = chinese_to_english(strings[4]) except: symbol = strings[4] if symbol == "SR905日期:": symbol = "SR905" symbols.append(symbol) big_dict = {} for i in range(len(symbols)): symbol = symbols[i] table_cut = data[i + 1] table_cut.columns = rank_columns table_cut = table_cut.iloc[:-1, :] table_cut.loc[:, "rank"] = table_cut.index table_cut.loc["合计", "rank"] = 999 table_cut.loc[ "合计", ["vol_party_name", "long_party_name", "short_party_name" ]] = None table_cut.loc[:, "symbol"] = symbol table_cut.loc[:, "variety"] = symbol_varieties(symbol) table_cut[intColumns] = table_cut[intColumns].fillna(0) table_cut[intColumns] = table_cut[intColumns].astype(str) table_cut[intColumns] = table_cut[intColumns].applymap( lambda x: x.replace(",", "")) table_cut = table_cut.applymap(lambda x: 0 if x == "-" else x) table_cut[intColumns] = table_cut[intColumns].astype(float) table_cut[intColumns] = table_cut[intColumns].astype(int) big_dict[symbol] = table_cut.reset_index(drop=True) return big_dict elif date <= datetime.date(2015, 11, 11): # 20200311 格式修正 url = cons.CZCE_VOL_RANK_URL_2 % (date.year, date.strftime("%Y%m%d")) data = _czce_df_read(url, skip_rows=0, header=None)[3:] big_df = pd.DataFrame() for item in data: big_df = pd.concat([big_df, item], axis=0, ignore_index=False) big_df.columns = big_df.iloc[0, :].tolist() data = big_df.iloc[1:, :] elif date < datetime.date(2017, 12, 28): # 20200311 格式修正 url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime("%Y%m%d")) data = _czce_df_read(url, skip_rows=0, header=0)[1] else: url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime("%Y%m%d")) data = _czce_df_read(url, skip_rows=0)[0] if len(data.columns) < 6: return {} table = pd.DataFrame(data.iloc[:, :9]) table.index.name = table.columns[0] table.columns = rank_columns table.loc[:, "rank"] = table.index table[intColumns] = table[intColumns].astype(str) table[intColumns] = table[intColumns].applymap( lambda x: x.replace(",", "")) table = table.applymap(lambda x: 0 if x == "-" else x) indexes = [i for i in table.index if "合约" in i or "品种" in i] indexes.insert(0, 0) big_dict = {} for i in range(len(indexes)): if indexes[i] == 0: table_cut = table.loc[:indexes[i + 1], :] string = table_cut.index.name elif i < len(indexes) - 1: table_cut = table.loc[indexes[i]:indexes[i + 1], :] string = table_cut.index[0] else: table_cut = table.loc[indexes[i]:, :] string = table_cut.index[0] if "PTA" in string: symbol = "TA" else: try: symbol = chinese_to_english( find_chinese(re.compile(r":(.*) ").findall(string)[0])) except: symbol = re.compile(r":(.*) ").findall(string)[0] var = symbol_varieties(symbol) if var in vars_list: table_cut = table_cut.dropna(how="any").iloc[1:, :] table_cut = table_cut.loc[[ x for x in table_cut.index if x in [str(i) for i in range(21)] ], :] table_cut = _table_cut_cal(table_cut, symbol) big_dict[symbol.strip()] = table_cut.reset_index(drop=True) return big_dict