def futures_dce_position_rank_other(date="20160104"): date = cons.convert_date(date) if date is not None else datetime.date.today() if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return {} url = "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html" payload = { "memberDealPosiQuotes.variety": "c", "memberDealPosiQuotes.trade_type": "0", "year": date.year, "month": date.month - 1, "day": date.day, "contract.contract_id": "all", "contract.variety_id": "c", "contract": "", } r = requests.post(url, data=payload) soup = BeautifulSoup(r.text, "lxml") symbol_list = [item["onclick"].strip("javascript:setVariety(").strip("');") for item in soup.find_all(attrs={"class": "selBox"})[-3].find_all("input")] big_df = dict() for symbol in symbol_list: payload = { "memberDealPosiQuotes.variety": symbol, "memberDealPosiQuotes.trade_type": "0", "year": date.year, "month": date.month - 1, "day": date.day, "contract.contract_id": "all", "contract.variety_id": symbol, "contract": "", } r = requests.post(url, data=payload) soup = BeautifulSoup(r.text, "lxml") contract_list = [item["onclick"].strip("javascript:setContract_id('").strip("');") for item in soup.find_all(attrs={"name": "contract"})] if contract_list: if len(contract_list[0]) == 4: contract_list = [symbol + item for item in contract_list] for contract in contract_list: payload = { "memberDealPosiQuotes.variety": symbol, "memberDealPosiQuotes.trade_type": "0", "year": date.year, "month": date.month - 1, "day": date.day, "contract.contract_id": contract, "contract.variety_id": symbol, "contract": "", } r = requests.post(url, data=payload) temp_df = pd.read_html(r.text)[1].iloc[:-1, :] temp_df.columns = ["rank", "vol_party_name", "vol", "vol_chg", "_", "long_party_name", "long_open_interest", "long_open_interest_chg", "_", "short_party_name", "short_open_interest", "short_open_interest_chg"] temp_df["variety"] = symbol.upper() temp_df["symbol"] = contract temp_df = temp_df[["long_open_interest", "long_open_interest_chg", "long_party_name", "rank", "short_open_interest", "short_open_interest_chg", "short_party_name", "vol", "vol_chg", "vol_party_name", "symbol", "variety"]] big_df[contract] = temp_df return big_df
def get_cffex_daily(date="20200416"): """ 中国金融期货交易所日交易数据 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象; 为空时为当天 :return: pandas.DataFrame 中国金融期货交易所日: symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 None(给定日期没有交易数据) """ day = cons.convert_date( date) if date is not None else datetime.date.today() if day.strftime("%Y%m%d") not in calendar: warnings.warn("%s非交易日" % day.strftime("%Y%m%d")) return None try: html = requests_link( cons.CFFEX_DAILY_URL.format(day.strftime("%Y%m"), day.strftime("%d"), day.strftime("%Y%m%d")), encoding="gbk", headers=cons.headers, ).text except requests.exceptions.HTTPError as reason: if reason.response != 404: print( cons.CFFEX_DAILY_URL % (day.strftime("%Y%m"), day.strftime("%d"), day.strftime("%Y%m%d")), reason, ) return if html.find("网页错误") >= 0: return html = [ i.replace(" ", "").split(",") for i in html.split("\n")[:-2] if i[0][0] != "小" ] if html[0][0] != "合约代码": return dict_data = list() day_const = day.strftime("%Y%m%d") for row in html[1:]: m = cons.FUTURES_SYMBOL_PATTERN.match(row[0]) if not m: continue row_dict = {"date": day_const, "symbol": row[0], "variety": m.group(1)} for i, field in enumerate(cons.CFFEX_COLUMNS): if row[i + 1] == "": row_dict[field] = 0.0 elif field in ["volume", "open_interest", "oi_chg"]: row_dict[field] = int(row[i + 1]) else: try: row_dict[field] = float(row[i + 1]) except: pass row_dict["pre_settle"] = row_dict["close"] - row_dict["change1"] dict_data.append(row_dict) return pd.DataFrame(dict_data)[cons.OUTPUT_COLUMNS]
def get_rank_sum(date="20200727", vars_list=cons.contract_symbols): """ 抓取四个期货交易所前5、前10、前15、前20会员持仓排名数据 注1:由于上期所和中金所只公布每个品种内部的标的排名, 没有公布品种的总排名; 所以函数输出的品种排名是由品种中的每个标的加总获得, 并不是真实的品种排名列表 注2:大商所只公布了品种排名, 未公布标的排名 :param date: 日期 format: YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如 RB, AL等列表 为空时为所有商品 :return: pd.DataFrame: 展期收益率数据 symbol 标的合约 string var 商品品种 string vol_top5 成交量前5会员成交量总和 int vol_chg_top5 成交量前5会员成交量变化总和 int long_open_interest_top5 持多单前5会员持多单总和 int long_open_interest_chg_top5 持多单前5会员持多单变化总和 int short_open_interest_top5 持空单前5会员持空单总和 int short_open_interest_chg_top5 持空单前5会员持空单变化总和 int vol_top10 成交量前10会员成交量总和 int ... date 日期 string YYYYMMDD """ date = cons.convert_date(date) if date is not None else datetime.date.today() if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return None dce_var = [i for i in vars_list if i in cons.market_exchange_symbols['dce']] shfe_var = [i for i in vars_list if i in cons.market_exchange_symbols['shfe']] czce_var = [i for i in vars_list if i in cons.market_exchange_symbols['czce']] cffex_var = [i for i in vars_list if i in cons.market_exchange_symbols['cffex']] big_dict = {} if len(dce_var) > 0: data = get_dce_rank_table(date, dce_var) if data is False: return False big_dict.update(data) if len(shfe_var) > 0: data = get_shfe_rank_table(date, shfe_var) if data is False: return False big_dict.update(data) if len(czce_var) > 0: data = get_czce_rank_table(date, czce_var) if data is False: return False big_dict.update(data) if len(cffex_var) > 0: data = get_cffex_rank_table(date, cffex_var) if data is False: return False big_dict.update(data) records = pd.DataFrame() for symbol, table in big_dict.items(): table = table.applymap(lambda x: 0 if x == '' else x) for symbol_inner in set(table['symbol']): var = symbol_varieties(symbol_inner) if var in vars_list: if var in czce_var: for col in [item for item in table.columns if item.find('open_interest') > -1] + ['vol', 'vol_chg']: table[col] = [float(value.replace(',', '')) if value != '-' else 0.0 for value in table[col]] table_cut = table[table['symbol'] == symbol_inner] table_cut['rank'] = table_cut['rank'].astype('float') table_cut_top5 = table_cut[table_cut['rank'] <= 5] table_cut_top10 = table_cut[table_cut['rank'] <= 10] table_cut_top15 = table_cut[table_cut['rank'] <= 15] table_cut_top20 = table_cut[table_cut['rank'] <= 20] big_dict = {'symbol': symbol_inner, 'variety': var, 'vol_top5': table_cut_top5['vol'].sum(), 'vol_chg_top5': table_cut_top5['vol_chg'].sum(), 'long_open_interest_top5': table_cut_top5['long_open_interest'].sum(), 'long_open_interest_chg_top5': table_cut_top5['long_open_interest_chg'].sum(), 'short_open_interest_top5': table_cut_top5['short_open_interest'].sum(), 'short_open_interest_chg_top5': table_cut_top5['short_open_interest_chg'].sum(), 'vol_top10': table_cut_top10['vol'].sum(), 'vol_chg_top10': table_cut_top10['vol_chg'].sum(), 'long_open_interest_top10': table_cut_top10['long_open_interest'].sum(), 'long_open_interest_chg_top10': table_cut_top10['long_open_interest_chg'].sum(), 'short_open_interest_top10': table_cut_top10['short_open_interest'].sum(), 'short_open_interest_chg_top10': table_cut_top10['short_open_interest_chg'].sum(), 'vol_top15': table_cut_top15['vol'].sum(), 'vol_chg_top15': table_cut_top15['vol_chg'].sum(), 'long_open_interest_top15': table_cut_top15['long_open_interest'].sum(), 'long_open_interest_chg_top15': table_cut_top15['long_open_interest_chg'].sum(), 'short_open_interest_top15': table_cut_top15['short_open_interest'].sum(), 'short_open_interest_chg_top15': table_cut_top15['short_open_interest_chg'].sum(), 'vol_top20': table_cut_top20['vol'].sum(), 'vol_chg_top20': table_cut_top20['vol_chg'].sum(), 'long_open_interest_top20': table_cut_top20['long_open_interest'].sum(), 'long_open_interest_chg_top20': table_cut_top20['long_open_interest_chg'].sum(), 'short_open_interest_top20': table_cut_top20['short_open_interest'].sum(), 'short_open_interest_chg_top20': table_cut_top20['short_open_interest_chg'].sum(), 'date': date.strftime('%Y%m%d') } records = records.append(pd.DataFrame(big_dict, index=[0])) if len(big_dict.items()) > 0: add_vars = [i for i in cons.market_exchange_symbols['dce'] + cons.market_exchange_symbols['shfe'] + cons.market_exchange_symbols['cffex'] if i in records['variety'].tolist()] for var in add_vars: records_cut = records[records['variety'] == var] var_record = pd.DataFrame(records_cut.sum()).T var_record['date'] = date.strftime('%Y%m%d') var_record.loc[:, ['variety', 'symbol']] = var records = records.append(var_record) return records.reset_index(drop=True)
def get_czce_receipt_3(date: str = None, vars_list: List = cons.contract_symbols): """ 抓取郑州商品交易所注册仓单数据 适用20151112(包括)至今 Parameters ------ date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars_list: 合约品种如CF、TA等列表 为空时为所有商品 Return ------- DataFrame: 展期收益率数据(DataFrame):`1 var 商品品种 string receipt 仓单数 int date 日期 string YYYYMMDD """ date = (cons.convert_date(date).strftime("%Y%m%d") if date is not None else datetime.date.today()) if date not in calendar: warnings.warn("%s非交易日" % date.strftime("%Y%m%d")) return None url = cons.CZCE_RECEIPT_URL_3 % (date[:4], date) r = requests_link(url, encoding="utf-8") r.encoding = "utf-8" data = pd.read_html(r.text, encoding="gb2312") records = pd.DataFrame() if len(data) < 4: return records if int(date) <= 20171227: data = data[1:] for data_cut in data: if len(data_cut.columns) > 3 and len(data_cut.index) > 7: last_indexes = [ x for x in data_cut.index if "注:" in str(data_cut[0].tolist()[x]) ] if len(last_indexes) > 0: last_index = last_indexes[0] - 1 data_cut = data_cut.loc[:last_index, :] if "PTA" in data_cut[0].tolist()[0]: var = "TA" else: strings = data_cut[0].tolist()[0] string = strings.split(" ")[0][3:] if len(string) > 7: continue print(string) var = chinese_to_english(re.sub("[A-Z]+", "", string)) data_cut.columns = data_cut.loc[1, :] data_cut = data_cut.fillna(method="pad") try: receipt = data_cut.loc[:, "仓单数量"].tolist()[-1] except: receipt = data_cut.loc[:, "仓单数量(保税)"].tolist()[-1] receipt_chg = data_cut.loc[:, "当日增减"].tolist()[-1] data_dict = { "var": var, "receipt": int(receipt), "receipt_chg": int(receipt_chg), "date": date, } records = records.append(pd.DataFrame(data_dict, index=[0])) if len(records.index) != 0: records.index = records["var"] vars_in_market = [i for i in vars_list if i in records.index] records = records.loc[vars_in_market, :] return records.reset_index(drop=True)
def get_dce_daily(date: str = "20030115") -> pd.DataFrame: """ 大连商品交易所日交易数据 http://www.dce.com.cn/dalianshangpin/xqsj/tjsj26/rtj/rxq/index.html :param date: 交易日, e.g., 20200416 :type date: str :return: 具体交易日的个品种行情数据 :rtype: pandas.DataFrame """ day = cons.convert_date(date) if date is not None else datetime.date.today() if day.strftime("%Y%m%d") not in calendar: warnings.warn("%s非交易日" % day.strftime("%Y%m%d")) return None url = "http://www.dce.com.cn/publicweb/quotesdata/exportDayQuotesChData.html" headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", "Cache-Control": "no-cache", "Connection": "keep-alive", "Content-Length": "86", "Content-Type": "application/x-www-form-urlencoded", "Host": "www.dce.com.cn", "Origin": "http://www.dce.com.cn", "Pragma": "no-cache", "Referer": "http://www.dce.com.cn/publicweb/quotesdata/dayQuotesCh.html", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36", } params = { "dayQuotes.variety": "all", "dayQuotes.trade_type": "0", "year": date[:4], "month": str(int(date[4:6]) - 1), "day": date[6:], "exportFlag": "excel", } r = requests.post(url, data=params, headers=headers) data_df = pd.read_excel(BytesIO(r.content)) data_df = data_df[~data_df["商品名称"].str.contains("小计")] data_df = data_df[~data_df["商品名称"].str.contains("总计")] data_df["variety"] = data_df["商品名称"].map(lambda x: cons.DCE_MAP[x]) data_df["symbol"] = data_df["variety"] + data_df["交割月份"].astype(int).astype(str) del data_df["商品名称"] del data_df["交割月份"] data_df.columns = ["open", "high", "low", "close", "pre_settle", "settle", "_", "_", "volume", "open_interest", "_", "turnover", "variety", "symbol"] data_df["date"] = date data_df = data_df[ ["symbol", "date", "open", "high", "low", "close", "volume", "open_interest", "turnover", "settle", "pre_settle", "variety"]] data_df = data_df.applymap(lambda x: x.replace(",", "")) data_df = data_df.astype({"open": "float", "high": "float", "low": "float", "close": "float", "volume": "float", "open_interest": "float", "turnover": "float", "settle": "float", "pre_settle": "float", }) return data_df
def get_dce_rank_table(date="20201026", vars_list=cons.contract_symbols): """ 大连商品交易所前 20 会员持仓排名数据明细 注: 该交易所只公布标的合约排名 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date 对象, 为空时为当天 :param vars_list: 合约品种如 RB、AL等列表为空时为所有商品, 数据从 20060104 开始,每交易日 16:30 左右更新数据 :return: pandas.DataFrame rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_open_interest 该会员持多单 int long_open_interest_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_open_interest 该会员持空单 int short_open_interest_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ date = cons.convert_date(date) if date is not None else datetime.date.today() if date < datetime.date(2006, 1, 4): print(Exception("大连商品交易所数据源开始日期为20060104,跳过")) return {} if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return {} vars_list = [i for i in vars_list if i in cons.market_exchange_symbols['dce']] big_dict = {} for var in vars_list: symbol_list = _get_dce_contract_list(date, var) for symbol in symbol_list: url = cons.DCE_VOL_RANK_URL_1 % (var.lower(), symbol, var.lower(), date.year, date.month - 1, date.day) list_60_name = [] list_60 = [] list_60_chg = [] rank = [] texts = requests_link(url).content.splitlines() # print(texts) if not texts: return False if len(texts) > 30: for text in texts: line = text.decode("utf-8") string_list = line.split() try: if int(string_list[0]) <= 20: list_60_name.append(string_list[1]) list_60.append(string_list[2]) list_60_chg.append(string_list[3]) rank.append(string_list[0]) except: pass table_cut = pd.DataFrame({'rank': rank[0:20], 'vol_party_name': list_60_name[0:20], 'vol': list_60[0:20], 'vol_chg': list_60_chg[0:20], 'long_party_name': list_60_name[20:40], 'long_open_interest': list_60[20:40], 'long_open_interest_chg': list_60_chg[20:40], 'short_party_name': list_60_name[40:60], 'short_open_interest': list_60[40:60], 'short_open_interest_chg': list_60_chg[40:60] }) table_cut = table_cut.applymap(lambda x: x.replace(',', '')) table_cut = _table_cut_cal(table_cut, symbol) big_dict[symbol] = table_cut.reset_index(drop=True) return big_dict
def get_receipt(start_day: str = None, end_day: str = None, vars_list: List = cons.contract_symbols): """ 获取大宗商品注册仓单数量 :param start_day: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :type start_day: str :param end_day: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :type end_day: str :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品 :type vars_list: str :return: 展期收益率数据 :rtype: pandas.DataFrame """ start_day = cons.convert_date( start_day) if start_day is not None else datetime.date.today() end_day = cons.convert_date( end_day) if end_day is not None else cons.convert_date( cons.get_latest_data_date(datetime.datetime.now())) records = pd.DataFrame() while start_day <= end_day: if start_day.strftime('%Y%m%d') not in calendar: warnings.warn(f"{start_day.strftime('%Y%m%d')}非交易日") else: print(start_day) for market, market_vars in cons.market_exchange_symbols.items(): if market == 'dce': if start_day >= datetime.date(2009, 4, 7): f = get_dce_receipt else: print('20090407起,dce每交易日更新仓单数据') f = None elif market == 'shfe': if datetime.date(2008, 10, 6) <= start_day <= datetime.date( 2014, 5, 16): f = get_shfe_receipt_1 elif start_day > datetime.date(2014, 5, 16): f = get_shfe_receipt_2 else: f = None print('20081006起,shfe每交易日更新仓单数据') elif market == 'czce': if datetime.date(2008, 3, 3) <= start_day <= datetime.date( 2010, 8, 24): f = get_czce_receipt_1 elif datetime.date(2010, 8, 24) < start_day <= datetime.date( 2015, 11, 11): f = get_czce_receipt_2 elif start_day > datetime.date(2015, 11, 11): f = get_czce_receipt_3 else: f = None print('20080303起,czce每交易日更新仓单数据') get_vars = [var for var in vars_list if var in market_vars] if market != 'cffex' and get_vars != []: if f is not None: records = records.append(f(start_day, get_vars)) start_day += datetime.timedelta(days=1) records.reset_index(drop=True, inplace=True) if "MA" in records["var"].to_list(): replace_index = records[records["var"] == "MA"]["receipt"].astype( str).str.split("0", expand=True)[0].index records.loc[replace_index, "receipt"] = records[ records["var"] == "MA"]["receipt"].astype(str).str.split( "0", expand=True)[0] return records
def get_czce_daily(date="20200901"): """ 郑州商品交易所-日频率-量价数据 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象,默认为当前交易日; 日期需要大于200100824 :type date: str or datetime.date :return: 郑州商品交易所-日频率-量价数据 :rtype: pandas.DataFrame or None """ day = cons.convert_date(date) if date is not None else datetime.date.today() if day.strftime("%Y%m%d") not in calendar: warnings.warn(f"{day.strftime('%Y%m%d')}非交易日") return None if day > datetime.date(2010, 8, 24): if day > datetime.date(2015, 9, 19): u = cons.CZCE_DAILY_URL_3 url = u % (day.strftime("%Y"), day.strftime("%Y%m%d")) elif day < datetime.date(2015, 9, 19): u = cons.CZCE_DAILY_URL_2 url = u % (day.strftime("%Y"), day.strftime("%Y%m%d")) listed_columns = cons.CZCE_COLUMNS output_columns = cons.OUTPUT_COLUMNS try: r = requests.get(url) html = r.text except requests.exceptions.HTTPError as reason: if reason.response.status_code != 404: print( cons.CZCE_DAILY_URL_3 % (day.strftime("%Y"), day.strftime("%Y%m%d")), reason, ) return if html.find("您的访问出错了") >= 0 or html.find("无期权每日行情交易记录") >= 0: return html = [ i.replace(" ", "").split("|") for i in html.split("\n")[:-4] if i[0][0] != "小" ] if day > datetime.date(2015, 9, 19): if html[1][0] not in ["品种月份", "品种代码"]: return dict_data = list() day_const = int(day.strftime("%Y%m%d")) for row in html[2:]: m = cons.FUTURES_SYMBOL_PATTERN.match(row[0]) if not m: continue row_dict = {"date": day_const, "symbol": row[0], "variety": m.group(1)} for i, field in enumerate(listed_columns): if row[i + 1] == "\r": row_dict[field] = 0.0 elif field in [ "volume", "open_interest", "oi_chg", "exercise_volume", ]: row[i + 1] = row[i + 1].replace(",", "") row_dict[field] = int(row[i + 1]) else: row[i + 1] = row[i + 1].replace(",", "") row_dict[field] = float(row[i + 1]) dict_data.append(row_dict) return pd.DataFrame(dict_data)[output_columns] elif day < datetime.date(2015, 9, 19): dict_data = list() day_const = int(day.strftime("%Y%m%d")) for row in html[1:]: row = row[0].split(",") m = cons.FUTURES_SYMBOL_PATTERN.match(row[0]) if not m: continue row_dict = {"date": day_const, "symbol": row[0], "variety": m.group(1)} for i, field in enumerate(listed_columns): if row[i + 1] == "\r": row_dict[field] = 0.0 elif field in [ "volume", "open_interest", "oi_chg", "exercise_volume", ]: row_dict[field] = int(float(row[i + 1])) else: row_dict[field] = float(row[i + 1]) dict_data.append(row_dict) return pd.DataFrame(dict_data)[output_columns] if day <= datetime.date(2010, 8, 24): u = cons.CZCE_DAILY_URL_1 url = u % day.strftime("%Y%m%d") listed_columns = cons.CZCE_COLUMNS_2 output_columns = cons.OUTPUT_COLUMNS df = pd.read_html(url)[1].dropna(how="any") dict_data = list() day_const = int(day.strftime("%Y%m%d")) for row in df.to_dict(orient="records")[1:]: m = cons.FUTURES_SYMBOL_PATTERN.match(row[0]) if not m: continue row_dict = {"date": day_const, "symbol": row[0], "variety": m.group(1)} for i, field in enumerate(listed_columns): if row[i + 1] == "\r": row_dict[field] = 0.0 elif field in ["volume", "open_interest", "oi_chg", "exercise_volume"]: row_dict[field] = int(row[i + 1]) else: row_dict[field] = float(row[i + 1]) dict_data.append(row_dict) return pd.DataFrame(dict_data)[output_columns]
def get_roll_yield_bar(type_method="symbol", var="RB", date=None, start_day=None, end_day=None, plot=False): """ 获取展期收益率 :param type_method: 'symbol':获取某天某品种所有交割月合约的收盘价, 'var':获取某天所有品种两个主力合约的展期收益率(展期收益率横截面), ‘date’:获取某品种每天的两个主力合约的展期收益率(展期收益率时间序列) :param var: 合约品种如RB、AL等 :param date: 某一天日期 format: YYYYMMDD :param start_day: 开始日期 format:YYYYMMDD :param end_day: 结束数据 format:YYYYMMDD :param plot: True or False作图 :return: pd.DataFrame 展期收益率数据(DataFrame): ry 展期收益率 index 日期或品种 """ date = cons.convert_date( date) if date is not None else datetime.date.today() start_day = (cons.convert_date(start_day) if start_day is not None else datetime.date.today()) end_day = (cons.convert_date(end_day) if end_day is not None else cons.convert_date( cons.get_latest_data_date(datetime.datetime.now()))) if type_method == "symbol": df = get_futures_daily(start_day=date, end_day=date, market=symbol_market(var)) df = df[df["variety"] == var] if plot: _plot_bar_2(df[["symbol", "close"]]) return df if type_method == "var": df = pd.DataFrame() for market in ["dce", "cffex", "shfe", "czce"]: df = df.append( get_futures_daily(start_day=date, end_day=date, market=market)) var_list = list(set(df["variety"])) df_l = pd.DataFrame() for var in var_list: ry = get_roll_yield(date, var, df=df) if ry: df_l = df_l.append( pd.DataFrame([ry], index=[var], columns=["roll_yield", "near_by", "deferred"])) df_l["date"] = date df_l = df_l.sort_values("roll_yield") if plot: _plot_bar(df_l["roll_yield"]) return df_l if type_method == "date": df_l = pd.DataFrame() while start_day <= end_day: try: ry = get_roll_yield(start_day, var) if ry: df_l = df_l.append( pd.DataFrame( [ry], index=[start_day], columns=["roll_yield", "near_by", "deferred"], )) except: pass start_day += datetime.timedelta(days=1) if plot: _plot(df_l["roll_yield"]) return df_l
def get_czce_receipt_3(date: str = None, vars_list: List = cons.contract_symbols): """ 抓取郑州商品交易所注册仓单数据 适用20151112(包括)至今 Parameters ------ date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars_list: 合约品种如CF、TA等列表 为空时为所有商品 Return ------- DataFrame: 展期收益率数据(DataFrame):`1 var 商品品种 string receipt 仓单数 int date 日期 string YYYYMMDD """ date = cons.convert_date(date).strftime( '%Y%m%d') if date is not None else datetime.date.today() if date not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return None url = cons.CZCE_RECEIPT_URL_3 % (date[:4], date) r = requests_link(url, encoding='utf-8') r.encoding = 'utf-8' data = pd.read_html(r.text, encoding='gb2312') records = pd.DataFrame() if len(data) < 4: return records if int(date) <= 20171227: data = data[1:] for data_cut in data: if len(data_cut.columns) > 3 and len(data_cut.index) > 7: last_indexes = [ x for x in data_cut.index if '注:' in str(data_cut[0].tolist()[x]) ] if len(last_indexes) > 0: last_index = last_indexes[0] - 1 data_cut = data_cut.loc[:last_index, :] if 'PTA' in data_cut[0].tolist()[0]: var = 'TA' else: strings = data_cut[0].tolist()[0] string = strings.split(' ')[0][3:] if len(string) > 7: continue print(string) var = chinese_to_english(re.sub('[A-Z]+', '', string)) data_cut.columns = data_cut.loc[1, :] data_cut = data_cut.fillna(method='pad') try: receipt = data_cut.loc[:, '仓单数量'].tolist()[-1] except: receipt = data_cut.loc[:, '仓单数量(保税)'].tolist()[-1] receipt_chg = data_cut.loc[:, '当日增减'].tolist()[-1] data_dict = { 'var': var, 'receipt': int(receipt), 'receipt_chg': int(receipt_chg), 'date': date } records = records.append(pd.DataFrame(data_dict, index=[0])) if len(records.index) != 0: records.index = records['var'] vars_in_market = [i for i in vars_list if i in records.index] records = records.loc[vars_in_market, :] return records.reset_index(drop=True)
def get_receipt(start_day: str = None, end_day: str = None, vars_list: List = cons.contract_symbols): """ 获取大宗商品注册仓单数量 Parameters ------ start_day: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 end_day: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars_list: 合约品种如RB、AL等列表 为空时为所有商品 Return ------- DataFrame 展期收益率数据(DataFrame): var 商品品种 string receipt 仓单数量 int date 日期 string YYYYMMDD """ start_day = cons.convert_date( start_day) if start_day is not None else datetime.date.today() end_day = cons.convert_date( end_day) if end_day is not None else cons.convert_date( cons.get_latest_data_date(datetime.datetime.now())) records = pd.DataFrame() while start_day <= end_day: if start_day.strftime('%Y%m%d') not in calendar: warnings.warn(f"{start_day.strftime('%Y%m%d')}非交易日") else: print(start_day) for market, market_vars in cons.market_exchange_symbols.items(): if market == 'dce': if start_day >= datetime.date(2009, 4, 7): f = get_dce_receipt else: print('20090407起,dce每交易日更新仓单数据') f = None elif market == 'shfe': if datetime.date(2008, 10, 6) <= start_day <= datetime.date( 2014, 5, 16): f = get_shfe_receipt_1 elif start_day > datetime.date(2014, 5, 16): f = get_shfe_receipt_2 else: f = None print('20081006起,shfe每交易日更新仓单数据') elif market == 'czce': if datetime.date(2008, 3, 3) <= start_day <= datetime.date( 2010, 8, 24): f = get_czce_receipt_1 elif datetime.date(2010, 8, 24) < start_day <= datetime.date( 2015, 11, 11): f = get_czce_receipt_2 elif start_day > datetime.date(2015, 11, 11): f = get_czce_receipt_3 else: f = None print('20080303起,czce每交易日更新仓单数据') get_vars = [var for var in vars_list if var in market_vars] if market != 'cffex' and get_vars != []: if f is not None: records = records.append(f(start_day, get_vars)) start_day += datetime.timedelta(days=1) return records.reset_index(drop=True)
def get_czce_receipt_1(date: str = None, vars_list: List = cons.contract_symbols): """ 抓取郑州商品交易所注册仓单数据 适用20080222至20100824(包括) Parameters ------ date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars_list: 合约品种如CF、TA等列表 为空时为所有商品 Return ------- DataFrame: 展期收益率数据(DataFrame): var 商品品种 string receipt 仓单数 int date 日期 string YYYYMMDD """ date = cons.convert_date(date).strftime( '%Y%m%d') if date is not None else datetime.date.today() if date not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return None if date == '20090820': return pd.DataFrame() url = cons.CZCE_RECEIPT_URL_1 % date r = requests_link(url, encoding='utf-8') context = r.text data = pd.read_html(context)[1] records = pd.DataFrame() indexes = [x for x in data.index if '品种:' in str(data[0].tolist()[x])] ends = [x for x in data.index if '总计' in str(data[0].tolist()[x])] for i in list(range(len(indexes))): if i != len(indexes) - 1: data_cut = data.loc[indexes[i]:ends[i], :] data_cut = data_cut.fillna(method='pad') else: data_cut = data.loc[indexes[i]:, :] data_cut = data_cut.fillna(method='pad') if 'PTA' in data_cut[0].tolist()[0]: var = 'TA' else: var = chinese_to_english( re.sub(r'[A-Z]+', '', data_cut[0].tolist()[0][3:])) if var == 'CF': receipt = data_cut[6].tolist()[-1] receipt_chg = data_cut[7].tolist()[-1] else: receipt = data_cut[5].tolist()[-1] receipt_chg = data_cut[6].tolist()[-1] data_dict = { 'var': var, 'receipt': int(receipt), 'receipt_chg': int(receipt_chg), 'date': date } records = records.append(pd.DataFrame(data_dict, index=[0])) if len(records.index) != 0: records.index = records['var'] vars_in_market = [i for i in vars_list if i in records.index] records = records.loc[vars_in_market, :] return records.reset_index(drop=True)
def get_shfe_receipt_1(date: str = None, vars_list: List = cons.contract_symbols): """ 抓取上海期货交易所注册仓单数据, 适用20081006至20140518(包括) 20100126、20101029日期交易所格式混乱,直接回复脚本中DataFrame, 20100416、20130821日期交易所数据丢失 :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品 :return: pd.DataFrame 展期收益率数据(DataFrame): var 商品品种 string receipt 仓单数 int date 日期 string YYYYMMDD """ date = (cons.convert_date(date).strftime("%Y%m%d") if date is not None else datetime.date.today()) if date not in calendar: warnings.warn(f"{date.strftime('%Y%m%d')}非交易日") return None if date == "20100126": shfe_20100126["date"] = date return shfe_20100126 elif date == "20101029": shfe_20101029["date"] = date return shfe_20101029 elif date in ["20100416", "20130821"]: return warnings.warn("20100416、20130821日期交易所数据丢失") else: var_list = [ "天然橡胶", "沥青仓库", "沥青厂库", "热轧卷板", "燃料油", "白银", "线材", "螺纹钢", "铅", "铜", "铝", "锌", "黄金", "锡", "镍", ] url = cons.SHFE_RECEIPT_URL_1 % date data = pandas_read_html_link(url)[0] indexes = [x for x in data.index if (data[0].tolist()[x] in var_list)] last_index = [ x for x in data.index if "注" in str(data[0].tolist()[x]) ][0] - 1 records = pd.DataFrame() for i in list(range(len(indexes))): if i != len(indexes) - 1: data_cut = data.loc[indexes[i]:indexes[i + 1] - 1, :] else: data_cut = data.loc[indexes[i]:last_index, :] data_cut = data_cut.fillna(method="pad") data_dict = dict() data_dict["var"] = chinese_to_english(data_cut[0].tolist()[0]) data_dict["receipt"] = int(data_cut[2].tolist()[-1]) data_dict["receipt_chg"] = int(data_cut[3].tolist()[-1]) data_dict["date"] = date records = records.append(pd.DataFrame(data_dict, index=[0])) if len(records.index) != 0: records.index = records["var"] vars_in_market = [i for i in vars_list if i in records.index] records = records.loc[vars_in_market, :] return records.reset_index(drop=True)
def get_dce_daily(date="20200416", symbol_type="futures", retries=0): """ 大连商品交易所日交易数据 Parameters ------ date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 symbol_type: 数据类型, 为'futures'期货 或 'option'期权二者之一 retries: int, 当前重试次数,达到3次则获取数据失败 Return ------- DataFrame 大商所日交易数据(DataFrame): symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 DataFrame 郑商所每日期权交易数据 symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 pre_settle 前结算价 settle 结算价 delta 对冲值 volume 成交量 open_interest 持仓量 oi_change 持仓变化 turnover 成交额 implied_volatility 隐含波动率 exercise_volume 行权量 variety 合约类别 或 None(给定日期没有交易数据) """ day = cons.convert_date( date) if date is not None else datetime.date.today() if day.strftime("%Y%m%d") not in calendar: warnings.warn("%s非交易日" % day.strftime("%Y%m%d")) return None if retries > 3: print("maximum retires for DCE market data: ", day.strftime("%Y%m%d")) return if symbol_type == "futures": url = (cons.DCE_DAILY_URL + "?" + urllib.parse.urlencode({ "currDate": day.strftime("%Y%m%d"), "year": day.strftime("%Y"), "month": str(int(day.strftime("%m")) - 1), "day": day.strftime("%d"), })) listed_columns = cons.DCE_COLUMNS output_columns = cons.OUTPUT_COLUMNS elif symbol_type == "option": url = (cons.DCE_DAILY_URL + "?" + urllib.parse.urlencode({ "currDate": day.strftime("%Y%m%d"), "year": day.strftime("%Y"), "month": str(int(day.strftime("%m")) - 1), "day": day.strftime("%d"), "dayQuotes.trade_type": "1", })) listed_columns = cons.DCE_OPTION_COLUMNS output_columns = cons.OPTION_OUTPUT_COLUMNS else: print("invalid symbol_type :" + symbol_type + ', should be one of "futures" or "option"') return try: response = requests_link(url, method="post", headers=cons.headers).text except requests.exceptions.ContentDecodingError as reason: return get_dce_daily(day, retries=retries + 1) except requests.exceptions.HTTPError as reason: if reason.response == 504: return get_dce_daily(day, retries=retries + 1) elif reason.response != 404: print(cons.DCE_DAILY_URL, reason) return if "错误:您所请求的网址(URL)无法获取" in response: return get_dce_daily(day, retries=retries + 1) elif "暂无数据" in response: return data = BeautifulSoup(response, "html.parser").find_all("tr") if len(data) == 0: return dict_data = list() implied_data = list() for i_data in data[1:]: if "小计" in i_data.text or "总计" in i_data.text: continue x = i_data.find_all("td") if symbol_type == "futures": row_dict = {"variety": cons.DCE_MAP[x[0].text.strip()]} row_dict["symbol"] = row_dict["variety"] + x[1].text.strip() for i, field in enumerate(listed_columns): field_content = x[i + 2].text.strip() if "-" in field_content: row_dict[field] = 0 elif field in ["volume", "open_interest"]: row_dict[field] = int(field_content.replace(",", "")) else: row_dict[field] = float(field_content.replace(",", "")) dict_data.append(row_dict) elif len(x) == 16: m = cons.FUTURES_SYMBOL_PATTERN.match(x[1].text.strip()) if not m: continue row_dict = { "symbol": x[1].text.strip(), "variety": m.group(1).upper(), "contract_id": m.group(0), } for i, field in enumerate(listed_columns): field_content = x[i + 2].text.strip() if "-" in field_content: row_dict[field] = 0 elif field in ["volume", "open_interest"]: row_dict[field] = int(field_content.replace(",", "")) else: row_dict[field] = float(field_content.replace(",", "")) dict_data.append(row_dict) elif len(x) == 2: implied_data.append({ "contract_id": x[0].text.strip(), "implied_volatility": float(x[1].text.strip()), }) df = pd.DataFrame(dict_data) df["date"] = day.strftime("%Y%m%d") if symbol_type == "futures": return df[output_columns] else: return pd.merge( df, pd.DataFrame(implied_data), on="contract_id", how="left", indicator=False, )[output_columns]
def get_czce_rank_table(date=None, vars_list=cons.contract_symbols): """ 郑州商品交易所前 20 会员持仓排名数据明细 注:该交易所既公布了品种排名, 也公布了标的排名 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20050509开始,每交易日16:30左右更新数据 :return: pd.DataFrame rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_open_interest 该会员持多单 int long_open_interest_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_open_interest 该会员持空单 int short_open_interest_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ date = cons.convert_date(date) if date is not None else datetime.date.today() if date < datetime.date(2005, 5, 9): print("czce数据源开始日期为20050509,跳过") return {} if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return {} if date <= datetime.date(2010, 8, 25): url = cons.CZCE_VOL_RANK_URL_1 % (date.strftime('%Y%m%d')) data = _czce_df_read(url, skip_rows=0) r = requests_link(url, 'utf-8') r.encoding = 'utf-8' soup = BeautifulSoup(r.text, 'lxml', from_encoding="gb2312") symbols = [] for link in soup.find_all('b'): strings = (str(link).split(' ')) if len(strings) > 5: try: symbol = chinese_to_english(strings[4]) except: symbol = strings[4] symbols.append(symbol) big_dict = {} for i in range(len(symbols)): symbol = symbols[i] table_cut = data[i + 2] table_cut.columns = rank_columns table_cut = table_cut.iloc[:-1, :] table_cut.loc[:, 'rank'] = table_cut.index table_cut.loc['合计', 'rank'] = 999 table_cut.loc['合计', ['vol_party_name', 'long_party_name', 'short_party_name']] = None table_cut.loc[:, 'symbol'] = symbol table_cut.loc[:, 'variety'] = symbol_varieties(symbol) table_cut[intColumns] = table_cut[intColumns].fillna(0) table_cut[intColumns] = table_cut[intColumns].astype(str) table_cut[intColumns] = table_cut[intColumns].applymap(lambda x: x.replace(',', '')) table_cut = table_cut.applymap(lambda x: 0 if x == '-' else x) table_cut[intColumns] = table_cut[intColumns].astype(float) table_cut[intColumns] = table_cut[intColumns].astype(int) big_dict[symbol] = table_cut.reset_index(drop=True) return big_dict elif date <= datetime.date(2015, 11, 11): url = cons.CZCE_VOL_RANK_URL_2 % (date.year, date.strftime('%Y%m%d')) data = _czce_df_read(url, skip_rows=1)[1] elif date < datetime.date(2017, 12, 28): url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d')) data = _czce_df_read(url, skip_rows=1)[0] else: url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d')) data = _czce_df_read(url, skip_rows=0)[0] if len(data.columns) < 6: return {} table = pd.DataFrame(data.iloc[:, :9]) table.columns = rank_columns table.loc[:, 'rank'] = table.index table[intColumns] = table[intColumns].astype(str) table[intColumns] = table[intColumns].applymap(lambda x: x.replace(',', '')) table = table.applymap(lambda x: 0 if x == '-' else x) indexes = [i for i in table.index if '合约' in i or '品种' in i] indexes.insert(0, 0) big_dict = {} for i in range(len(indexes)): if indexes[i] == 0: table_cut = table.loc[:indexes[i + 1], :] string = table_cut.index.name elif i < len(indexes) - 1: table_cut = table.loc[indexes[i]:indexes[i + 1], :] string = table_cut.index[0] else: table_cut = table.loc[indexes[i]:, :] string = table_cut.index[0] if 'PTA' in string: symbol = 'TA' else: try: symbol = chinese_to_english(find_chinese(re.compile(r':(.*) ').findall(string)[0])) except: symbol = re.compile(r':(.*) ').findall(string)[0] var = symbol_varieties(symbol) if var in vars_list: table_cut = table_cut.dropna(how='any').iloc[1:, :] table_cut = table_cut.loc[[x for x in table_cut.index if x in [str(i) for i in range(21)]], :] table_cut = _table_cut_cal(table_cut, symbol) big_dict[symbol] = table_cut.reset_index(drop=True) return big_dict
def get_futures_daily(start_day="20200413", end_day="20200416", market="CFFEX", index_bar=False): """ 交易所日交易数据 Parameters ------ start_day: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 end_day: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 market: 'CFFEX' 中金所, 'CZCE' 郑商所, 'SHFE' 上期所, 'DCE' 大商所 之一, 'INE' 上海国际能源交易中心。默认为中金所 index_bar: bool 是否合成指数K线 Return ------- DataFrame 中金所日交易数据(DataFrame): symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 None(给定日期没有交易数据) """ if market.upper() == "CFFEX": f = get_cffex_daily elif market.upper() == "CZCE": f = get_czce_daily elif market.upper() == "SHFE": f = get_shfe_daily elif market.upper() == "DCE": f = get_dce_daily elif market.upper() == "INE": f = get_ine_daily else: print("Invalid Market Symbol") return start_day = (cons.convert_date(start_day) if start_day is not None else datetime.date.today()) end_day = (cons.convert_date(end_day) if end_day is not None else cons.convert_date( cons.get_latest_data_date(datetime.datetime.now()))) df_list = list() while start_day <= end_day: df = f(start_day) if df is not None: df_list.append(df) if index_bar: df_list.append(get_futures_index(df)) start_day += datetime.timedelta(days=1) if len(df_list) > 0: return pd.concat(df_list).reset_index(drop=True)
def get_cffex_daily(date=None): """ 获取中国金融期货交易所日交易数据 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象; 为空时为当天 :return: pd.DataFrame 中国金融期货交易所日(pd.DataFrame): symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 None(给定日期没有交易数据) """ day = cons.convert_date( date) if date is not None else datetime.date.today() if day.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % day.strftime('%Y%m%d')) return None try: html = requests_link(cons.CFFEX_DAILY_URL.format( day.strftime('%Y%m'), day.strftime('%d'), day.strftime('%Y%m%d')), encoding="gbk", headers=cons.headers).text except requests.exceptions.HTTPError as reason: if reason.response != 404: print( cons.CFFEX_DAILY_URL % (day.strftime('%Y%m'), day.strftime('%d'), day.strftime('%Y%m%d')), reason) return if html.find('网页错误') >= 0: return html = [ i.replace(' ', '').split(',') for i in html.split('\n')[:-2] if i[0][0] != '小' ] if html[0][0] != '合约代码': return dict_data = list() day_const = day.strftime('%Y%m%d') for row in html[1:]: m = cons.FUTURES_SYMBOL_PATTERN.match(row[0]) if not m: continue row_dict = {'date': day_const, 'symbol': row[0], 'variety': m.group(1)} for i, field in enumerate(cons.CFFEX_COLUMNS): if row[i + 1] == "": row_dict[field] = 0.0 elif field in ['volume', 'open_interest', 'oi_chg']: row_dict[field] = int(row[i + 1]) else: try: row_dict[field] = float(row[i + 1]) except: pass row_dict['pre_settle'] = row_dict['close'] - row_dict['change1'] dict_data.append(row_dict) return pd.DataFrame(dict_data)[cons.OUTPUT_COLUMNS]
def get_shfe_daily(date="20131016"): """ 上海期货交易所-日频率-量价数据 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象, 默认为当前交易日 :type date: str or datetime.date :return: 上海期货交易所-日频率-量价数据 :rtype: pandas.DataFrame or None 上期所日交易数据(DataFrame): symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 None(给定交易日没有交易数据) """ day = cons.convert_date(date) if date is not None else datetime.date.today() if day.strftime("%Y%m%d") not in calendar: warnings.warn("%s非交易日" % day.strftime("%Y%m%d")) return None try: json_data = json.loads( requests_link( cons.SHFE_DAILY_URL % (day.strftime("%Y%m%d")), headers=cons.shfe_headers, ).text ) except requests.HTTPError as reason: if reason.response != 404: print(cons.SHFE_DAILY_URL % (day.strftime("%Y%m%d")), reason) return if len(json_data["o_curinstrument"]) == 0: return df = pd.DataFrame( [ row for row in json_data["o_curinstrument"] if row["DELIVERYMONTH"] not in ["小计", "合计"] and row["DELIVERYMONTH"] != "" ] ) df["variety"] = df.PRODUCTID.str.slice(0, -6).str.upper() df["symbol"] = df["variety"] + df["DELIVERYMONTH"] df["date"] = day.strftime("%Y%m%d") v_wap_df = get_shfe_v_wap(day) if v_wap_df is not None: df = pd.merge( df, v_wap_df[v_wap_df.time_range == "9:00-15:00"], on=["date", "symbol"], how="left", ) df["turnover"] = df.v_wap * df.VOLUME else: df["VOLUME"] = df["VOLUME"].apply(lambda x: 0 if x == "" else x) df["turnover"] = df["VOLUME"] * df["SETTLEMENTPRICE"] df.rename(columns=cons.SHFE_COLUMNS, inplace=True) return df[cons.OUTPUT_COLUMNS]
def get_czce_daily(date=None): """ 获取郑商所日交易数据 Parameters ------ date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 Return ------- DataFrame 郑商所每日期货交易数据: symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 DataFrame 郑商所每日期权交易数据 symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 pre_settle 前结算价 settle 结算价 delta 对冲值 volume 成交量 open_interest 持仓量 oi_change 持仓变化 turnover 成交额 implied_volatility 隐含波动率 exercise_volume 行权量 variety 合约类别 None(类型错误或给定日期没有交易数据) """ day = cons.convert_date( date) if date is not None else datetime.date.today() if day.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % day.strftime('%Y%m%d')) return None if day > datetime.date(2010, 8, 24): if day > datetime.date(2015, 9, 19): u = cons.CZCE_DAILY_URL_3 url = u % (day.strftime('%Y'), day.strftime('%Y%m%d')) elif day < datetime.date(2015, 9, 19): u = cons.CZCE_DAILY_URL_2 url = u % (day.strftime('%Y'), day.strftime('%Y%m%d')) listed_columns = cons.CZCE_COLUMNS output_columns = cons.OUTPUT_COLUMNS try: r = requests.get(url) html = r.text except requests.exceptions.HTTPError as reason: if reason.response.status_code != 404: print( cons.CZCE_DAILY_URL_3 % (day.strftime('%Y'), day.strftime('%Y%m%d')), reason) return if html.find('您的访问出错了') >= 0 or html.find('无期权每日行情交易记录') >= 0: return html = [ i.replace(' ', '').split('|') for i in html.split('\n')[:-4] if i[0][0] != u'小' ] if day > datetime.date(2015, 9, 19): if html[1][0] not in ['品种月份', u'品种代码']: return dict_data = list() day_const = int(day.strftime('%Y%m%d')) for row in html[2:]: m = cons.FUTURES_SYMBOL_PATTERN.match(row[0]) if not m: continue row_dict = { 'date': day_const, 'symbol': row[0], 'variety': m.group(1) } for i, field in enumerate(listed_columns): if row[i + 1] == "\r": row_dict[field] = 0.0 elif field in [ 'volume', 'open_interest', 'oi_chg', 'exercise_volume' ]: row[i + 1] = row[i + 1].replace(',', '') row_dict[field] = int(row[i + 1]) else: row[i + 1] = row[i + 1].replace(',', '') row_dict[field] = float(row[i + 1]) dict_data.append(row_dict) return pd.DataFrame(dict_data)[output_columns] elif day < datetime.date(2015, 9, 19): dict_data = list() day_const = int(day.strftime('%Y%m%d')) for row in html[1:]: row = row[0].split(',') m = cons.FUTURES_SYMBOL_PATTERN.match(row[0]) if not m: continue row_dict = { 'date': day_const, 'symbol': row[0], 'variety': m.group(1) } for i, field in enumerate(listed_columns): if row[i + 1] == "\r": row_dict[field] = 0.0 elif field in [ 'volume', 'open_interest', 'oi_chg', 'exercise_volume' ]: row_dict[field] = int(float(row[i + 1])) else: row_dict[field] = float(row[i + 1]) dict_data.append(row_dict) return pd.DataFrame(dict_data)[output_columns] if day <= datetime.date(2010, 8, 24): u = cons.CZCE_DAILY_URL_1 url = u % day.strftime('%Y%m%d') listed_columns = cons.CZCE_COLUMNS_2 output_columns = cons.OUTPUT_COLUMNS df = pd.read_html(url)[1].dropna(how='any') dict_data = list() day_const = int(day.strftime('%Y%m%d')) for row in df.to_dict(orient='records')[1:]: m = cons.FUTURES_SYMBOL_PATTERN.match(row[0]) if not m: continue row_dict = { 'date': day_const, 'symbol': row[0], 'variety': m.group(1) } for i, field in enumerate(listed_columns): if row[i + 1] == "\r": row_dict[field] = 0.0 elif field in [ 'volume', 'open_interest', 'oi_chg', 'exercise_volume' ]: row_dict[field] = int(row[i + 1]) else: row_dict[field] = float(row[i + 1]) dict_data.append(row_dict) return pd.DataFrame(dict_data)[output_columns]
def get_czce_rank_table(date="20201026", vars_list=cons.contract_symbols): """ 郑州商品交易所前 20 会员持仓排名数据明细 注:该交易所既公布了品种排名, 也公布了标的排名 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20050509开始,每交易日16:30左右更新数据 :return: pd.DataFrame rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_open_interest 该会员持多单 int long_open_interest_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_open_interest 该会员持空单 int short_open_interest_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ date = cons.convert_date(date) if date is not None else datetime.date.today() if date < datetime.date(2015, 10, 8): print("CZCE可获取的数据源开始日期为 20151008, 请输入合适的日期参数") return {} if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return {} if date >= datetime.date(2015, 10, 8): url = f"http://www.czce.com.cn/cn/DFSStaticFiles/Future/{date.year}/{date.isoformat().replace('-', '')}/FutureDataHolding.xls" r = requests.get(url) temp_df = pd.read_excel(BytesIO(r.content)) temp_pinzhong_index = [item + 1 for item in temp_df[temp_df.iloc[:, 0].str.contains("合计")].index.to_list()] temp_pinzhong_index.insert(0, 0) temp_pinzhong_index.pop() temp_symbol_index = temp_df.iloc[temp_pinzhong_index, 0].str.split(" ", expand=True).iloc[:, 0] symbol_list = [re.compile(r"[0-9a-zA-Z_]+").findall(item)[0] for item in temp_symbol_index.values] temp_symbol_index_list = temp_symbol_index.index.to_list() big_dict = {} for i in range(len(temp_symbol_index_list)-1): inner_temp_df = temp_df[temp_symbol_index_list[i]+2: temp_symbol_index_list[i+1]-1] inner_temp_df.columns = ["rank", "vol_party_name", "vol", "vol_chg", "long_party_name", "long_open_interest", "long_open_interest_chg", "short_party_name", "short_open_interest", "short_open_interest_chg", ] inner_temp_df.reset_index(inplace=True, drop=True) big_dict[symbol_list[i]] = inner_temp_df inner_temp_df = temp_df[temp_symbol_index_list[i+1]+2:-1] inner_temp_df.columns = ["rank", "vol_party_name", "vol", "vol_chg", "long_party_name", "long_open_interest", "long_open_interest_chg", "short_party_name", "short_open_interest", "short_open_interest_chg", ] inner_temp_df.reset_index(inplace=True, drop=True) big_dict[symbol_list[-1]] = inner_temp_df new_big_dict = {} for key, value in big_dict.items(): value["symbol"] = key value["variety"] = re.compile(r"[a-zA-Z_]+").findall(key)[0] new_big_dict[key] = value return new_big_dict
def get_cffex_daily(date="20100401"): """ 中国金融期货交易所日交易数据 http://www.cffex.com.cn/rtj/ :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象; 为空时为当天 :return: pandas.DataFrame 中国金融期货交易所日: symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 None(给定日期没有交易数据) """ day = cons.convert_date( date) if date is not None else datetime.date.today() if day.strftime("%Y%m%d") not in calendar: warnings.warn("%s非交易日" % day.strftime("%Y%m%d")) return None url = f"http://www.cffex.com.cn/sj/historysj/{date[:-2]}/zip/{date[:-2]}.zip" r = requests.get(url) try: with zipfile.ZipFile(BytesIO(r.content)) as file: with file.open(f"{date}_1.csv") as my_file: data = my_file.read().decode("gb2312") data_df = pd.read_csv(StringIO(data)) except: return None data_df = data_df[data_df["合约代码"] != "小计"] data_df = data_df[data_df["合约代码"] != "合计"] data_df = data_df[~data_df["合约代码"].str.contains("IO")] data_df.reset_index(inplace=True, drop=True) data_df["合约代码"] = data_df["合约代码"].str.strip() symbol_list = data_df["合约代码"].to_list() variety_list = [ re.compile(r"[a-zA-Z_]+").findall(item)[0] for item in symbol_list ] if data_df.shape[1] == 15: data_df.columns = [ "symbol", "open", "high", "low", "volume", "turnover", "open_interest", "_", "close", "settle", "pre_settle", "_", "_", "_", "_" ] else: data_df.columns = [ "symbol", "open", "high", "low", "volume", "turnover", "open_interest", "_", "close", "settle", "pre_settle", "_", "_", "_" ] data_df["date"] = date data_df["variety"] = variety_list data_df = data_df[[ "symbol", "date", "open", "high", "low", "close", "volume", "open_interest", "turnover", "settle", "pre_settle", "variety" ]] return data_df
def futures_dce_position_rank(date: str = "20160104") -> pd.DataFrame: """ 大连商品交易日每日持仓排名-具体合约 http://www.dce.com.cn/dalianshangpin/xqsj/tjsj26/rtj/rcjccpm/index.html :param date: 指定交易日; e.g., "20200511" :type date: str :return: 指定日期的持仓排名数据 :rtype: pandas.DataFrame """ date = cons.convert_date(date) if date is not None else datetime.date.today() if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return {} url = "http://www.dce.com.cn/publicweb/quotesdata/exportMemberDealPosiQuotesBatchData.html" headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", "Cache-Control": "no-cache", "Connection": "keep-alive", "Content-Length": "160", "Content-Type": "application/x-www-form-urlencoded", "Host": "www.dce.com.cn", "Origin": "http://www.dce.com.cn", "Pragma": "no-cache", "Referer": "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36", } payload = { "memberDealPosiQuotes.variety": "a", "memberDealPosiQuotes.trade_type": "0", "contract.contract_id": "a2009", "contract.variety_id": "a", "year": date.year, "month": date.month - 1, "day": date.day, "batchExportFlag": "batch", } r = requests.post(url, payload, headers=headers) big_dict = dict() with zipfile.ZipFile(BytesIO(r.content), "r") as z: for i in z.namelist(): file_name = i.encode('cp437').decode('GBK') try: data = pd.read_table(z.open(i), header=None, sep="\t").iloc[:-6] if len(data) < 12: # 处理没有活跃合约的情况 big_dict[file_name.split("_")[1]] = pd.DataFrame() continue start_list = data[data.iloc[:, 0].str.find("名次") == 0].index.tolist() data = data.iloc[start_list[0]:, data.columns[data.iloc[start_list[0], :].notnull()]] data.reset_index(inplace=True, drop=True) start_list = data[data.iloc[:, 0].str.find("名次") == 0].index.tolist() end_list = data[data.iloc[:, 0].str.find("总计") == 0].index.tolist() part_one = data[start_list[0]: end_list[0]].iloc[1:, :] part_two = data[start_list[1]: end_list[1]].iloc[1:, :] part_three = data[start_list[2]: end_list[2]].iloc[1:, :] temp_df = pd.concat([part_one.reset_index(drop=True), part_two.reset_index(drop=True), part_three.reset_index(drop=True)], axis=1, ignore_index=True) temp_df.columns = ["名次", "会员简称", "成交量", "增减", "名次", "会员简称", "持买单量", "增减", "名次", "会员简称", "持卖单量", "增减"] temp_df["rank"] = range(1, len(temp_df) + 1) del temp_df["名次"] temp_df.columns = ["vol_party_name", "vol", "vol_chg", "long_party_name", "long_open_interest", "long_open_interest_chg", "short_party_name", "short_open_interest", "short_open_interest_chg", "rank"] temp_df["symbol"] = file_name.split("_")[1] temp_df["variety"] = file_name.split("_")[1][:-4].upper() temp_df = temp_df[["long_open_interest", "long_open_interest_chg", "long_party_name", "rank", "short_open_interest", "short_open_interest_chg", "short_party_name", "vol", "vol_chg", "vol_party_name", "symbol", "variety"]] big_dict[file_name.split("_")[1]] = temp_df except UnicodeDecodeError as e: try: data = pd.read_table(z.open(i), header=None, sep="\\s+", encoding="gb2312", skiprows=3) except: data = pd.read_table(z.open(i), header=None, sep="\\s+", encoding="gb2312", skiprows=4) start_list = data[data.iloc[:, 0].str.find("名次") == 0].index.tolist() end_list = data[data.iloc[:, 0].str.find("总计") == 0].index.tolist() part_one = data[start_list[0]: end_list[0]].iloc[1:, :] part_two = data[start_list[1]: end_list[1]].iloc[1:, :] part_three = data[start_list[2]: end_list[2]].iloc[1:, :] temp_df = pd.concat([part_one.reset_index(drop=True), part_two.reset_index(drop=True), part_three.reset_index(drop=True)], axis=1, ignore_index=True) temp_df.columns = ["名次", "会员简称", "成交量", "增减", "名次", "会员简称", "持买单量", "增减", "名次", "会员简称", "持卖单量", "增减"] temp_df["rank"] = range(1, len(temp_df) + 1) del temp_df["名次"] temp_df.columns = ["vol_party_name", "vol", "vol_chg", "long_party_name", "long_open_interest", "long_open_interest_chg", "short_party_name", "short_open_interest", "short_open_interest_chg", "rank"] temp_df["symbol"] = file_name.split("_")[1] temp_df["variety"] = file_name.split("_")[1][:-4].upper() temp_df = temp_df[["long_open_interest", "long_open_interest_chg", "long_party_name", "rank", "short_open_interest", "short_open_interest_chg", "short_party_name", "vol", "vol_chg", "vol_party_name", "symbol", "variety"]] big_dict[file_name.split("_")[1]] = temp_df return big_dict
def get_dce_rank_table(date: str = "20210302", vars_list=cons.contract_symbols): """ 大连商品交易所前 20 会员持仓排名数据明细, 由于交易所网站问题, 需要 20200720 之后才有数据 注: 该交易所只公布标的合约排名 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date 对象, 为空时为当天 :param vars_list: 合约品种如 RB、AL 等列表为空时为所有商品, 数据从 20060104 开始,每交易日 16:30 左右更新数据 :return: pandas.DataFrame rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_open_interest 该会员持多单 int long_open_interest_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_open_interest 该会员持空单 int short_open_interest_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ date_string = date date = cons.convert_date(date) if date is not None else datetime.date.today() if date < datetime.date(2006, 1, 4): print(Exception("大连商品交易所数据源开始日期为 20060104,跳过")) return {} if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return {} vars_list = [i for i in vars_list if i in cons.market_exchange_symbols['dce']] big_dict = {} for var in vars_list: # var = 'V' symbol_list = _get_dce_contract_list(date, var) for symbol in symbol_list: url = cons.DCE_VOL_RANK_URL_1 % (var.lower(), symbol, var.lower(), date.year, date.month - 1, date.day) temp_df = pd.read_excel(url[:-3] + 'excel', header=0, skiprows=3) temp_df.dropna(how='any', axis=0, inplace=True) temp_df = temp_df.applymap(lambda x: str(x).replace(',', '')) del temp_df["名次.1"] del temp_df["名次.2"] temp_df.rename(columns={"名次": "rank", "会员简称": "vol_party_name", "成交量": "vol", "增减": "vol_chg", "会员简称.1": "long_party_name", "持买单量": "long_open_interest", "增减.1": "long_open_interest_chg", "会员简称.2": "short_party_name", "持卖单量": "short_open_interest", "增减.2": "short_open_interest_chg", }, inplace=True) temp_df['symbol'] = symbol.upper() temp_df['var'] = var temp_df['date'] = date_string temp_df = temp_df.applymap(lambda x: str(x).replace("-", "0") if x == "-" else x) temp_df['rank'] = range(1, len(temp_df)+1) temp_df['vol'] = temp_df['vol'].astype(float) temp_df['vol_chg'] = temp_df['vol_chg'].astype(float) temp_df['long_open_interest'] = temp_df['long_open_interest'].astype(float) temp_df['long_open_interest_chg'] = temp_df['long_open_interest_chg'].astype(float) temp_df['short_open_interest'] = temp_df['short_open_interest'].astype(float) temp_df['short_open_interest_chg'] = temp_df['short_open_interest_chg'].astype(float) big_dict[symbol] = temp_df return big_dict
def get_shfe_rank_table(date=None, vars_list=cons.contract_symbols): """ 上海期货交易所前 20 会员持仓排名数据明细 注:该交易所只公布每个品种内部的标的排名,没有公布品种的总排名 数据从20020107开始,每交易日16:30左右更新数据 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品 :return: pd.DataFrame rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_open_interest 该会员持多单 int long_open_interest_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_open_interest 该会员持空单 int short_open_interest_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ date = cons.convert_date( date) if date is not None else datetime.date.today() if date < datetime.date(2002, 1, 7): print("shfe数据源开始日期为20020107,跳过") return {} if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return {} url = cons.SHFE_VOL_RANK_URL % (date.strftime('%Y%m%d')) r = requests_link(url, 'utf-8') try: context = json.loads(r.text) except: return {} df = pd.DataFrame(context['o_cursor']) df = df.rename( columns={ 'CJ1': 'vol', 'CJ1_CHG': 'vol_chg', 'CJ2': 'long_open_interest', 'CJ2_CHG': 'long_open_interest_chg', 'CJ3': 'short_open_interest', 'CJ3_CHG': 'short_open_interest_chg', 'PARTICIPANTABBR1': 'vol_party_name', 'PARTICIPANTABBR2': 'long_party_name', 'PARTICIPANTABBR3': 'short_party_name', 'PRODUCTNAME': 'product1', 'RANK': 'rank', 'INSTRUMENTID': 'symbol', 'PRODUCTSORTNO': 'product2' }) if len(df.columns) < 3: return {} df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x) df = df.applymap(lambda x: None if x == '' else x) df['variety'] = df['symbol'].apply(lambda x: symbol_varieties(x)) df = df[df['rank'] > 0] for col in [ 'PARTICIPANTID1', 'PARTICIPANTID2', 'PARTICIPANTID3', 'product1', 'product2' ]: try: del df[col] except: pass get_vars = [var for var in vars_list if var in df['variety'].tolist()] big_dict = {} for var in get_vars: df_var = df[df['variety'] == var] for symbol in set(df_var['symbol']): df_symbol = df_var[df_var['symbol'] == symbol] big_dict[symbol] = df_symbol.reset_index(drop=True) return big_dict
def get_czce_receipt_3( date: str = None, vars_list: List = cons.contract_symbols) -> pd.DataFrame: """ 郑州商品交易所-注册仓单数据 适用 20151008-至今 http://www.czce.com.cn/cn/jysj/cdrb/H770310index_1.htm :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :type date: str :param vars_list: 合约品种如 CF、TA 等列表为空时为所有商品 :type vars_list: list :return: 注册仓单数据 :rtype: pandas.DataFrame """ if not isinstance(vars_list, list): return warnings.warn("vars_list: 必须是列表") date = cons.convert_date(date).strftime( '%Y%m%d') if date is not None else datetime.date.today() if date not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return None url = f"http://www.czce.com.cn/cn/DFSStaticFiles/Future/{date[:4]}/{date}/FutureDataWhsheet.xls" r = requests_link(url, encoding='utf-8') temp_df = pd.read_excel(BytesIO(r.content)) temp_df = temp_df[[ bool(1 - item) for item in [ item if item is not np.NAN else False for item in temp_df.iloc[:, 0].str.contains("非农产品") ] ]] temp_df.reset_index(inplace=True, drop=True) range_list_one = list(temp_df[[ item if item is not np.NAN else False for item in temp_df.iloc[:, 0].str.contains("品种") ]].index) range_list_two = list(temp_df[[ item if item is not np.NAN else False for item in temp_df.iloc[:, 0].str.contains("品种") ]].index)[1:] range_list_two.append(None) symbol_list = [] receipt_list = [] receipt_chg_list = [] for page in range(len(range_list_one)): inner_df = temp_df[range_list_one[page]:range_list_two[page]] reg = re.compile(r'[A-Z]+') try: symbol = reg.findall(inner_df.iloc[0, 0])[0] except: continue symbol_list.append(symbol) inner_df.columns = inner_df.iloc[1, :] inner_df = inner_df.iloc[2:, :] inner_df = inner_df.dropna(axis=1, how='all') if symbol == "PTA": try: receipt_list.append( inner_df['仓单数量(完税)'].iloc[-1] + inner_df['仓单数量(保税)'].iloc[-1]) # 20210316 TA 分为保税和完税 except: receipt_list.append(0) elif symbol == "MA": try: try: receipt_list.append( inner_df['仓单数量(完税)'].iloc[-2] + inner_df['仓单数量(保税)'].iloc[-2]) # 20210316 MA 分为保税和完税 except: receipt_list.append( inner_df['仓单数量(完税)'].iloc[-2]) # 处理 MA 的特殊格式 except: receipt_list.append(0) else: try: receipt_list.append(inner_df['仓单数量'].iloc[-1]) except: receipt_list.append(0) if symbol == "MA": receipt_chg_list.append(inner_df['当日增减'].iloc[-2]) else: receipt_chg_list.append(inner_df['当日增减'].iloc[-1]) data_df = pd.DataFrame([ symbol_list, receipt_list, receipt_chg_list, [date] * len(receipt_chg_list) ]).T data_df.columns = ['var', 'receipt', 'receipt_chg', 'date'] temp_list = data_df['var'].tolist() data_df['var'] = [item if item != "PTA" else "TA" for item in temp_list] if len(data_df.index) != 0: data_df.index = data_df['var'] vars_in_market = [i for i in vars_list if i in data_df.index] records = data_df.loc[vars_in_market, :] return records.reset_index(drop=True)