def get_futures_daily(start_day=None, end_day=None, market="CFFEX", index_bar=False): """ 获取交易所日交易数据 Parameters ------ start_day: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 end_day: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 market: 'CFFEX' 中金所, 'CZCE' 郑商所, 'SHFE' 上期所, 'DCE' 大商所 之一。默认为中金所 index_bar: bool 是否合成指数K线 Return ------- DataFrame 中金所日交易数据(DataFrame): symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 None(给定日期没有交易数据) """ if market.upper() == "CFFEX": f = get_cffex_daily elif market.upper() == "CZCE": f = get_czce_daily elif market.upper() == "SHFE": f = get_shfe_daily elif market.upper() == "DCE": f = get_dce_daily else: print("Invalid Market Symbol") return start_day = (cons.convert_date(start_day) if start_day is not None else datetime.date.today()) end_day = (cons.convert_date(end_day) if end_day is not None else cons.convert_date( cons.get_latest_data_date(datetime.datetime.now()))) df_list = list() while start_day <= end_day: df = f(start_day) if df is not None: df_list.append(df) if index_bar: df_list.append(get_futures_index(df)) start_day += datetime.timedelta(days=1) if len(df_list) > 0: return pd.concat(df_list).reset_index(drop=True)
def get_czce_receipt_1(date: str = None, vars_list: List = cons.contract_symbols): """ 抓取郑州商品交易所注册仓单数据 适用20080222至20100824(包括) :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :type date: str :param vars_list: list :type vars_list: 合约品种如CF、TA等列表 为空时为所有商品 :return: 展期收益率数据 :rtype: pandas.DataFrame var 商品品种 string receipt 仓单数 int date 日期 string YYYYMMDD """ date = cons.convert_date(date).strftime( '%Y%m%d') if date is not None else datetime.date.today() if date not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return None if date == '20090820': return pd.DataFrame() url = cons.CZCE_RECEIPT_URL_1 % date r = requests_link(url, encoding='utf-8') context = r.text data = pd.read_html(context)[1] records = pd.DataFrame() indexes = [x for x in data.index if '品种:' in str(data[0].tolist()[x])] ends = [x for x in data.index if '总计' in str(data[0].tolist()[x])] for i in list(range(len(indexes))): if i != len(indexes) - 1: data_cut = data.loc[indexes[i]:ends[i], :] data_cut = data_cut.fillna(method='pad') else: data_cut = data.loc[indexes[i]:, :] data_cut = data_cut.fillna(method='pad') if 'PTA' in data_cut[0].tolist()[0]: var = 'TA' else: var = chinese_to_english( re.sub(r'[A-Z]+', '', data_cut[0].tolist()[0][3:])) if var == 'CF': receipt = data_cut[6].tolist()[-1] receipt_chg = data_cut[7].tolist()[-1] else: receipt = data_cut[5].tolist()[-1] receipt_chg = data_cut[6].tolist()[-1] data_dict = { 'var': var, 'receipt': int(receipt), 'receipt_chg': int(receipt_chg), 'date': date } records = records.append(pd.DataFrame(data_dict, index=[0])) if len(records.index) != 0: records.index = records['var'] vars_in_market = [i for i in vars_list if i in records.index] records = records.loc[vars_in_market, :] return records.reset_index(drop=True)
def get_czce_receipt_2(date: str = None, vars_list: List = cons.contract_symbols): """ 抓取郑州商品交易所注册仓单数据 适用20100825(包括)至20151111(包括) Parameters ------ date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars_list: 合约品种如CF、TA等列表 为空时为所有商品 Return ------- DataFrame: 展期收益率数据(DataFrame): var 商品品种 string receipt 仓单数 int date 日期 string YYYYMMDD """ date = cons.convert_date(date).strftime( '%Y%m%d') if date is not None else datetime.date.today() if date not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return None url = cons.CZCE_RECEIPT_URL_2 % (date[:4], date) r = requests.get(url) r.encoding = 'utf-8' data = pd.read_html(r.text)[3:] records = pd.DataFrame() for data_cut in data: if len(data_cut.columns) > 3: last_indexes = [ x for x in data_cut.index if '注:' in str(data_cut[0].tolist()[x]) ] if len(last_indexes) > 0: last_index = last_indexes[0] - 1 data_cut = data_cut.loc[:last_index, :] if 'PTA' in data_cut[0].tolist()[0]: var = 'TA' else: strings = data_cut[0].tolist()[0] string = strings.split(' ')[0][3:] var = chinese_to_english(re.sub(r'[A-Z]+', '', string)) data_cut.columns = data_cut.T[1].tolist() receipt = data_cut['仓单数量'].tolist()[-1] receipt_chg = data_cut['当日增减'].tolist()[-1] data_dict = { 'var': var, 'receipt': int(receipt), 'receipt_chg': int(receipt_chg), 'date': date } records = records.append(pd.DataFrame(data_dict, index=[0])) if len(records.index) != 0: records.index = records['var'] vars_in_market = [i for i in vars_list if i in records.index] records = records.loc[vars_in_market, :] return records.reset_index(drop=True)
def get_spot_price(date=None, vars_list=cons.contract_symbols): """ 获取某个交易日大宗商品现货价格及相应基差 :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品 :return: pandas.DataFrame 展期收益率数据: var 商品品种 string sp 现货价格 float near_symbol 临近交割合约 string near_price 临近交割合约结算价 float dom_symbol 主力合约 string dom_price 主力合约结算价 float near_basis 临近交割合约相对现货的基差 float dom_basis 主力合约相对现货的基差 float near_basis_rate 临近交割合约相对现货的基差率 float dom_basis_rate 主力合约相对现货的基差率 float date 日期 string YYYYMMDD """ date = cons.convert_date( date) if date is not None else datetime.date.today() if date < datetime.date(2011, 1, 4): raise Exception("数据源开始日期为 20110104, 请将获取数据时间点设置在 20110104 后") if date.strftime("%Y%m%d") not in calendar: warnings.warn(f"{date.strftime('%Y%m%d')}非交易日") return None u1 = cons.SYS_SPOT_PRICE_LATEST_URL u2 = cons.SYS_SPOT_PRICE_URL.format(date.strftime("%Y-%m-%d")) i = 1 while True: for url in [u2, u1]: try: r = pandas_read_html_link(url) string = r[0].loc[1, 1] news = "".join(re.findall(r"[0-9]", string)) if news[3:11] == date.strftime("%Y%m%d"): records = _check_information(r[1], date) records.index = records["symbol"] var_list_in_market = [ i for i in vars_list if i in records.index ] temp_df = records.loc[var_list_in_market, :] temp_df.reset_index(drop=True, inplace=True) return temp_df else: time.sleep(3) except: print( f"{date.strftime('%Y-%m-%d')}日生意社数据连接失败,第{str(i)}次尝试,最多5次") i += 1 if i > 5: print( f"{date.strftime('%Y-%m-%d')}日生意社数据连接失败, 如果当前交易日是 2018-09-12, 由于生意社源数据缺失, 无法访问, 否则为重复访问已超过5次,您的地址被网站墙了,请保存好返回数据,稍后从该日期起重试" ) return False
def get_shfe_receipt_1(date: str = None, vars_list: List = cons.contract_symbols): """ 抓取上海期货交易所注册仓单数据, 适用20081006至20140518(包括) 20100126、20101029日期交易所格式混乱,直接回复脚本中DataFrame, 20100416、20130821日期交易所数据丢失 :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品 :return: pd.DataFrame 展期收益率数据(DataFrame): var 商品品种 string receipt 仓单数 int date 日期 string YYYYMMDD """ date = cons.convert_date(date).strftime( '%Y%m%d') if date is not None else datetime.date.today() if date not in calendar: warnings.warn(f"{date.strftime('%Y%m%d')}非交易日") return None if date == '20100126': shfe_20100126['date'] = date return shfe_20100126 elif date == '20101029': shfe_20101029['date'] = date return shfe_20101029 elif date in ['20100416', '20130821']: return warnings.warn('20100416、20130821日期交易所数据丢失') else: var_list = [ '天然橡胶', '沥青仓库', '沥青厂库', '热轧卷板', '燃料油', '白银', '线材', '螺纹钢', '铅', '铜', '铝', '锌', '黄金', '锡', '镍' ] url = cons.SHFE_RECEIPT_URL_1 % date data = pandas_read_html_link(url)[0] indexes = [x for x in data.index if (data[0].tolist()[x] in var_list)] last_index = [ x for x in data.index if '注' in str(data[0].tolist()[x]) ][0] - 1 records = pd.DataFrame() for i in list(range(len(indexes))): if i != len(indexes) - 1: data_cut = data.loc[indexes[i]:indexes[i + 1] - 1, :] else: data_cut = data.loc[indexes[i]:last_index, :] data_cut = data_cut.fillna(method='pad') data_dict = dict() data_dict['var'] = chinese_to_english(data_cut[0].tolist()[0]) data_dict['receipt'] = int(data_cut[2].tolist()[-1]) data_dict['receipt_chg'] = int(data_cut[3].tolist()[-1]) data_dict['date'] = date records = records.append(pd.DataFrame(data_dict, index=[0])) if len(records.index) != 0: records.index = records['var'] vars_in_market = [i for i in vars_list if i in records.index] records = records.loc[vars_in_market, :] return records.reset_index(drop=True)
def get_roll_yield(date=None, var="CU", symbol1=None, symbol2=None, df=None): """ 指定交易日指定品种(主力和次主力)或任意两个合约的展期收益率 Parameters ------ date: string 某一天日期 format: YYYYMMDD var: string 合约品种如RB、AL等 symbol1: string 合约 1如 rb1810 symbol2: string 合约 2 如 rb1812 df: DataFrame或None 从dailyBar得到合约价格,如果为空就在函数内部抓dailyBar,直接喂给数据可以让计算加快 Return ------- tuple roll_yield near_by deferred """ # date = "20200304" date = cons.convert_date( date) if date is not None else datetime.date.today() if date.strftime("%Y%m%d") not in calendar: warnings.warn("%s非交易日" % date.strftime("%Y%m%d")) return None if symbol1: var = symbol_varieties(symbol1) if not isinstance(df, pd.DataFrame): market = symbol_market(var) df = get_futures_daily(start_day=date, end_day=date, market=market) if var: df = df[~df["symbol"].str.contains( "efp")] # 20200304 由于交易所获取的数据中会有比如 "CUefp",所以在这里过滤 df = df[df["variety"] == var].sort_values("open_interest", ascending=False) df["close"] = df["close"].astype("float") symbol1 = df["symbol"].tolist()[0] symbol2 = df["symbol"].tolist()[1] close1 = df["close"][df["symbol"] == symbol1.upper()].tolist()[0] close2 = df["close"][df["symbol"] == symbol2.upper()].tolist()[0] a = re.sub(r"\D", "", symbol1) a_1 = int(a[:-2]) a_2 = int(a[-2:]) b = re.sub(r"\D", "", symbol2) b_1 = int(b[:-2]) b_2 = int(b[-2:]) c = (a_1 - b_1) * 12 + (a_2 - b_2) if close1 == 0 or close2 == 0: return False if c > 0: return np.log(close2 / close1) / c * 12, symbol2, symbol1 else: return np.log(close2 / close1) / c * 12, symbol1, symbol2
def get_rank_sum_daily(start_day=None, end_day=None, vars_list=cons.contract_symbols): """ 采集四个期货交易所前5、前10、前15、前20会员持仓排名数据 注1:由于上期所和中金所只公布每个品种内部的标的排名,没有公布品种的总排名; 所以函数输出的品种排名是由品种中的每个标的加总获得,并不是真实的品种排名列表 注2:大商所只公布了品种排名,未公布标的排名 :param start_day: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param end_day: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品 :return: pd.DataFrame 展期收益率数据(DataFrame): symbol 标的合约 string var 商品品种 string vol_top5 成交量前5会员成交量总和 int vol_chg_top5 成交量前5会员成交量变化总和 int long_open_interest_top5 持多单前5会员持多单总和 int long_open_interest_chg_top5 持多单前5会员持多单变化总和 int short_open_interest_top5 持空单前5会员持空单总和 int short_open_interest_chg_top5 持空单前5会员持空单变化总和 int vol_top10 成交量前10会员成交量总和 int ... date 日期 string YYYYMMDD """ start_day = cons.convert_date(start_day) if start_day is not None else datetime.date.today() end_day = cons.convert_date(end_day) if end_day is not None else cons.convert_date( cons.get_latest_data_date(datetime.datetime.now())) records = pd.DataFrame() while start_day <= end_day: print(start_day) if start_day.strftime('%Y%m%d') in calendar: data = get_rank_sum(start_day, vars_list) if data is False: print(f"{start_day.strftime('%Y-%m-%d')}日交易所数据连接失败,已超过20次,您的地址被网站墙了,请保存好返回数据,稍后从该日期起重试") return records.reset_index(drop=True) records = records.append(data) else: warnings.warn(f"{start_day.strftime('%Y%m%d')}非交易日") start_day += datetime.timedelta(days=1) return records.reset_index(drop=True)
def get_spot_price_daily(start_day=None, end_day=None, vars_list=cons.contract_symbols): """ 获取某段时间大宗商品现货价格及相应基差 :param start_day: str 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象; 默认为当天 :param end_day: str 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象; 默认为当天 :param vars_list: list 合约品种如 [RB, AL]; 默认参数为所有商品 :return: pandas.DataFrame 展期收益率数据: var 商品品种 string sp 现货价格 float near_symbol 临近交割合约 string near_price 临近交割合约结算价 float dom_symbol 主力合约 string dom_price 主力合约结算价 float near_basis 临近交割合约相对现货的基差 float dom_basis 主力合约相对现货的基差 float near_basis_rate 临近交割合约相对现货的基差率 float dom_basis_rate 主力合约相对现货的基差率 float date 日期 string YYYYMMDD """ start_day = (cons.convert_date(start_day) if start_day is not None else datetime.date.today()) end_day = (cons.convert_date(end_day) if end_day is not None else cons.convert_date( cons.get_latest_data_date(datetime.datetime.now()))) df_list = [] while start_day <= end_day: print(start_day) temp_df = get_spot_price(start_day, vars_list) if temp_df is False: return pd.concat(df_list).reset_index(drop=True) elif temp_df is not None: df_list.append(temp_df) start_day += datetime.timedelta(days=1) if len(df_list) > 0: temp_df = pd.concat(df_list) temp_df.reset_index(drop=True, inplace=True) return temp_df
def get_dce_receipt(date: str = None, symbol_list: List = cons.contract_symbols): """ 完成 采集大连商品交易所注册仓单数据 :param date: format 开始日期: YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象, 为空时为当天 :param symbol_list: 合约品种如 RB, AL等列表, 为空时为所有商品数据从 20060106开始,每周五更新仓单数据。直到20090407起,每交易日都更新仓单数据 :return: pd.DataFrame 展期收益率数据(DataFrame): var 商品品种 string receipt 仓单数 int date 日期 string YYYYMMDD """ if not isinstance(symbol_list, list): return warnings.warn(f"symbol_list: 必须是列表") date = cons.convert_date( date) if date is not None else datetime.date.today() if date.strftime('%Y%m%d') not in calendar: warnings.warn(f"{date.strftime('%Y%m%d')}非交易日") return None payload = { "weekQuotes.variety": "all", "year": date.year, "month": date.month - 1, # 网站月份描述少 1 个月, 属于网站问题 "day": date.day } data = pandas_read_html_link(cons.DCE_RECEIPT_URL, method="post", data=payload, headers=cons.dce_headers)[0] records = pd.DataFrame() for x in data.to_dict(orient='records'): if isinstance(x['品种'], str): if x['品种'][-2:] == '小计': var = x['品种'][:-2] temp_data = { 'var': chinese_to_english(var), 'receipt': int(x['今日仓单量']), 'receipt_chg': int(x['增减']), 'date': date.strftime('%Y%m%d') } records = records.append(pd.DataFrame(temp_data, index=[0])) if len(records.index) != 0: records.index = records['var'] vars_in_market = [i for i in symbol_list if i in records.index] records = records.loc[vars_in_market, :] return records.reset_index(drop=True)
def get_shfe_receipt_2(date: str = None, vars_list: List = cons.contract_symbols): """ 抓取上海商品交易所注册仓单数据 适用20140519(包括)至今 Parameters ------ date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars_list: 合约品种如RB、AL等列表 为空时为所有商品 Return ------- DataFrame: 展期收益率数据(DataFrame): var 商品品种 string receipt 仓单数 int date 日期 string YYYYMMDD """ date = cons.convert_date(date).strftime( '%Y%m%d') if date is not None else datetime.date.today() if date not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return None url = cons.SHFE_RECEIPT_URL_2 % date r = requests_link(url, encoding='utf-8') try: context = r.json() except: return pd.DataFrame() data = pd.DataFrame(context['o_cursor']) if len(data.columns) < 1: return pd.DataFrame() records = pd.DataFrame() for var in set(data['VARNAME'].tolist()): data_cut = data[data['VARNAME'] == var] data_dict = { 'var': chinese_to_english(re.sub(r"\W|[a-zA-Z]", "", var)), 'receipt': int(data_cut['WRTWGHTS'].tolist()[-1]), 'receipt_chg': int(data_cut['WRTCHANGE'].tolist()[-1]), 'date': date } records = records.append(pd.DataFrame(data_dict, index=[0])) if len(records.index) != 0: records.index = records['var'] vars_in_market = [i for i in vars_list if i in records.index] records = records.loc[vars_in_market, :] return records.reset_index(drop=True)
def get_cffex_rank_table(date=None, vars_list=cons.contract_symbols): """ 郑州商品交易所前20会员持仓排名数据明细 注:该交易所既公布品种排名,也公布标的排名 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20100416开始,每交易日16:30左右更新数据 :return: pd.DataFrame rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_open_interest 该会员持多单 int long_open_interest_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_open_interest 该会员持空单 int short_open_interest_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ vars_list = [i for i in vars_list if i in cons.market_exchange_symbols['cffex']] date = cons.convert_date(date) if date is not None else datetime.date.today() if date < datetime.date(2010, 4, 16): print(Exception("cffex数据源开始日期为20100416,跳过")) return {} if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return {} big_dict = {} for var in vars_list: url = cons.CFFEX_VOL_RANK_URL % (date.strftime('%Y%m'), date.strftime('%d'), var) r = requests_link(url, encoding='gbk') if not r: return False if '网页错误' not in r.text: table = pd.read_csv(StringIO(r.text.split('\n交易日,')[1])) table = table.dropna(how='any') table = table.applymap(lambda x: x.strip() if isinstance(x, str) else x) for symbol in set(table['合约']): table_cut = table[table['合约'] == symbol] table_cut.columns = ['symbol', 'rank'] + rank_columns table_cut = _table_cut_cal(pd.DataFrame(table_cut), symbol) big_dict[symbol] = table_cut.reset_index(drop=True) return big_dict
def get_shfe_v_wap(date=None): """ 获取上期所日成交均价数据 Parameters ------ date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 Return ------- DataFrame 郑商所日交易数据(DataFrame): symbol 合约代码 date 日期 time_range v_wap时段,分09:00-10:15和09:00-15:00两类 v_wap 加权平均成交均价 或 None(给定日期没有数据) """ day = cons.convert_date( date) if date is not None else datetime.date.today() if day.strftime("%Y%m%d") not in calendar: warnings.warn("%s非交易日" % day.strftime("%Y%m%d")) return None try: json_data = json.loads( requests_link( cons.SHFE_V_WAP_URL % (day.strftime("%Y%m%d")), headers=cons.headers, encoding="utf-8", ).text) except requests.HTTPError as reason: if reason.response not in [404, 403]: print(cons.SHFE_DAILY_URL % (day.strftime("%Y%m%d")), reason) return None if len(json_data["o_currefprice"]) == 0: return None try: df = pd.DataFrame(json_data["o_currefprice"]) df["INSTRUMENTID"] = df["INSTRUMENTID"].str.strip() df[":B1"].astype("int16") return df.rename(columns=cons.SHFE_V_WAP_COLUMNS)[list( cons.SHFE_V_WAP_COLUMNS.values())] except: return None
def get_ine_daily(date="20200312"): """ 上海国际能源交易中心-日频率-量价数据 上海国际能源交易中心: 原油期货(上市时间: 20180326); 20号胶期货(上市时间: 20190812) trade_price: http://www.ine.cn/statements/daily/?paramid=kx trade_note: http://www.ine.cn/data/datanote.dat :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象,默认为当前交易日 :type date: str or datetime.date :return: 上海国际能源交易中心-日频率-量价数据 :rtype: pandas.DataFrame or None """ day = cons.convert_date( date) if date is not None else datetime.date.today() if day.strftime("%Y%m%d") not in calendar: warnings.warn(f"{day.strftime('%Y%m%d')}非交易日") return None url = f"http://www.ine.cn/data/dailydata/kx/kx{day.strftime('%Y%m%d')}.dat" r = requests.get(url) result_df = pd.DataFrame() temp_df = pd.DataFrame(r.json()["o_curinstrument"]).iloc[:-1, :] temp_df = temp_df[temp_df["DELIVERYMONTH"] != "小计"] result_df["symbol"] = temp_df["PRODUCTID"].str.upper().str.split( "_", expand=True)[0] + temp_df["DELIVERYMONTH"] result_df["date"] = day.strftime("%Y%m%d") result_df["open"] = temp_df["OPENPRICE"] result_df["high"] = temp_df["HIGHESTPRICE"] result_df["low"] = temp_df["LOWESTPRICE"] result_df["close"] = temp_df["CLOSEPRICE"] result_df["volume"] = temp_df["VOLUME"] result_df["open_interest"] = temp_df["OPENINTEREST"] result_df["turnover"] = 0 result_df["settle"] = temp_df["SETTLEMENTPRICE"] result_df["pre_settle"] = temp_df["PRESETTLEMENTPRICE"] result_df["variety"] = temp_df["PRODUCTID"].str.upper().str.split( "_", expand=True)[0] return result_df
def get_czce_rank_table(date=None, vars_list=cons.contract_symbols): """ 郑州商品交易所前 20 会员持仓排名数据明细 注:该交易所既公布了品种排名, 也公布了标的排名 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20050509开始,每交易日16:30左右更新数据 :return: pd.DataFrame rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_open_interest 该会员持多单 int long_open_interest_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_open_interest 该会员持空单 int short_open_interest_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ date = cons.convert_date(date) if date is not None else datetime.date.today() if date < datetime.date(2005, 5, 9): print("czce数据源开始日期为20050509,跳过") return {} if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return {} if date <= datetime.date(2010, 8, 25): url = cons.CZCE_VOL_RANK_URL_1 % (date.strftime('%Y%m%d')) data = _czce_df_read(url, skip_rows=0) r = requests_link(url, 'utf-8') r.encoding = 'utf-8' soup = BeautifulSoup(r.text, 'lxml', from_encoding="gb2312") symbols = [] for link in soup.find_all('b'): strings = (str(link).split(' ')) if len(strings) > 5: try: symbol = chinese_to_english(strings[4]) except: symbol = strings[4] symbols.append(symbol) big_dict = {} for i in range(len(symbols)): symbol = symbols[i] table_cut = data[i + 2] table_cut.columns = rank_columns table_cut = table_cut.iloc[:-1, :] table_cut.loc[:, 'rank'] = table_cut.index table_cut.loc['合计', 'rank'] = 999 table_cut.loc['合计', ['vol_party_name', 'long_party_name', 'short_party_name']] = None table_cut.loc[:, 'symbol'] = symbol table_cut.loc[:, 'variety'] = symbol_varieties(symbol) table_cut[intColumns] = table_cut[intColumns].fillna(0) table_cut[intColumns] = table_cut[intColumns].astype(str) table_cut[intColumns] = table_cut[intColumns].applymap(lambda x: x.replace(',', '')) table_cut = table_cut.applymap(lambda x: 0 if x == '-' else x) table_cut[intColumns] = table_cut[intColumns].astype(float) table_cut[intColumns] = table_cut[intColumns].astype(int) big_dict[symbol] = table_cut.reset_index(drop=True) return big_dict elif date <= datetime.date(2015, 11, 11): url = cons.CZCE_VOL_RANK_URL_2 % (date.year, date.strftime('%Y%m%d')) data = _czce_df_read(url, skip_rows=1)[1] elif date < datetime.date(2017, 12, 28): url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d')) data = _czce_df_read(url, skip_rows=1)[0] else: url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d')) data = _czce_df_read(url, skip_rows=0)[0] if len(data.columns) < 6: return {} table = pd.DataFrame(data.iloc[:, :9]) table.columns = rank_columns table.loc[:, 'rank'] = table.index table[intColumns] = table[intColumns].astype(str) table[intColumns] = table[intColumns].applymap(lambda x: x.replace(',', '')) table = table.applymap(lambda x: 0 if x == '-' else x) indexes = [i for i in table.index if '合约' in i or '品种' in i] indexes.insert(0, 0) big_dict = {} for i in range(len(indexes)): if indexes[i] == 0: table_cut = table.loc[:indexes[i + 1], :] string = table_cut.index.name elif i < len(indexes) - 1: table_cut = table.loc[indexes[i]:indexes[i + 1], :] string = table_cut.index[0] else: table_cut = table.loc[indexes[i]:, :] string = table_cut.index[0] if 'PTA' in string: symbol = 'TA' else: try: symbol = chinese_to_english(find_chinese(re.compile(r':(.*) ').findall(string)[0])) except: symbol = re.compile(r':(.*) ').findall(string)[0] var = symbol_varieties(symbol) if var in vars_list: table_cut = table_cut.dropna(how='any').iloc[1:, :] table_cut = table_cut.loc[[x for x in table_cut.index if x in [str(i) for i in range(21)]], :] table_cut = _table_cut_cal(table_cut, symbol) big_dict[symbol] = table_cut.reset_index(drop=True) return big_dict
def get_dce_rank_table(date=None, vars_list=cons.contract_symbols): """ 大连商品交易所前 20 会员持仓排名数据明细 注: 该交易所既公布品种排名, 也公布标的排名 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date 对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表为空时为所有商品, 数据从20060104开始,每交易日16:30左右更新数据 :return: pd.DataFrame rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_open_interest 该会员持多单 int long_open_interest_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_open_interest 该会员持空单 int short_open_interest_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ date = cons.convert_date(date) if date is not None else datetime.date.today() if date < datetime.date(2006, 1, 4): print(Exception("dce数据源开始日期为20060104,跳过")) return {} if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return {} vars_list = [i for i in vars_list if i in cons.market_exchange_symbols['dce']] big_dict = {} for var in vars_list: url = cons.DCE_VOL_RANK_URL % (var.lower(), var.lower(), date.year, date.month - 1, date.day) list_60_name = [] list_60 = [] list_60_chg = [] rank = [] texts = requests_link(url).content.splitlines() if not texts: return False if len(texts) > 30: for text in texts: line = text.decode("utf-8") string_list = line.split() try: if int(string_list[0]) <= 20: list_60_name.append(string_list[1]) list_60.append(string_list[2]) list_60_chg.append(string_list[3]) rank.append(string_list[0]) except: pass table_cut = pd.DataFrame({'rank': rank[0:20], 'vol_party_name': list_60_name[0:20], 'vol': list_60[0:20], 'vol_chg': list_60_chg[0:20], 'long_party_name': list_60_name[20:40], 'long_open_interest': list_60[20:40], 'long_open_interest_chg': list_60_chg[20:40], 'short_party_name': list_60_name[40:60], 'short_open_interest': list_60[40:60], 'short_open_interest_chg': list_60_chg[40:60] }) table_cut = table_cut.applymap(lambda x: x.replace(',', '')) table_cut = _table_cut_cal(table_cut, var) big_dict[var] = table_cut.reset_index(drop=True) return big_dict
def get_rank_sum(date=None, vars_list=cons.contract_symbols): """ 抓取四个期货交易所前5、前10、前15、前20会员持仓排名数据 注1:由于上期所和中金所只公布每个品种内部的标的排名, 没有公布品种的总排名; 所以函数输出的品种排名是由品种中的每个标的加总获得, 并不是真实的品种排名列表 注2:大商所只公布了品种排名, 未公布标的排名 :param date: 日期 format: YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如 RB, AL等列表 为空时为所有商品 :return: pd.DataFrame: 展期收益率数据 symbol 标的合约 string var 商品品种 string vol_top5 成交量前5会员成交量总和 int vol_chg_top5 成交量前5会员成交量变化总和 int long_open_interest_top5 持多单前5会员持多单总和 int long_open_interest_chg_top5 持多单前5会员持多单变化总和 int short_open_interest_top5 持空单前5会员持空单总和 int short_open_interest_chg_top5 持空单前5会员持空单变化总和 int vol_top10 成交量前10会员成交量总和 int ... date 日期 string YYYYMMDD """ date = cons.convert_date(date) if date is not None else datetime.date.today() if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return None dce_var = [i for i in vars_list if i in cons.market_exchange_symbols['dce']] shfe_var = [i for i in vars_list if i in cons.market_exchange_symbols['shfe']] czce_var = [i for i in vars_list if i in cons.market_exchange_symbols['czce']] cffex_var = [i for i in vars_list if i in cons.market_exchange_symbols['cffex']] big_dict = {} if len(dce_var) > 0: data = get_dce_rank_table(date, dce_var) if data is False: return False big_dict.update(data) if len(shfe_var) > 0: data = get_shfe_rank_table(date, shfe_var) if data is False: return False big_dict.update(data) if len(czce_var) > 0: data = get_czce_rank_table(date, czce_var) if data is False: return False big_dict.update(data) if len(cffex_var) > 0: data = get_cffex_rank_table(date, cffex_var) if data is False: return False big_dict.update(data) records = pd.DataFrame() for symbol, table in big_dict.items(): table = table.applymap(lambda x: 0 if x == '' else x) for symbol_inner in set(table['symbol']): var = symbol_varieties(symbol_inner) if var in vars_list: table_cut = table[table['symbol'] == symbol_inner] table_cut['rank'] = table_cut['rank'].astype('float') table_cut_top5 = table_cut[table_cut['rank'] <= 5] table_cut_top10 = table_cut[table_cut['rank'] <= 10] table_cut_top15 = table_cut[table_cut['rank'] <= 15] table_cut_top20 = table_cut[table_cut['rank'] <= 20] big_dict = {'symbol': symbol_inner, 'variety': var, 'vol_top5': table_cut_top5['vol'].sum(), 'vol_chg_top5': table_cut_top5['vol_chg'].sum(), 'long_open_interest_top5': table_cut_top5['long_open_interest'].sum(), 'long_open_interest_chg_top5': table_cut_top5['long_open_interest_chg'].sum(), 'short_open_interest_top5': table_cut_top5['short_open_interest'].sum(), 'short_open_interest_chg_top5': table_cut_top5['short_open_interest_chg'].sum(), 'vol_top10': table_cut_top10['vol'].sum(), 'vol_chg_top10': table_cut_top10['vol_chg'].sum(), 'long_open_interest_top10': table_cut_top10['long_open_interest'].sum(), 'long_open_interest_chg_top10': table_cut_top10['long_open_interest_chg'].sum(), 'short_open_interest_top10': table_cut_top10['short_open_interest'].sum(), 'short_open_interest_chg_top10': table_cut_top10['short_open_interest_chg'].sum(), 'vol_top15': table_cut_top15['vol'].sum(), 'vol_chg_top15': table_cut_top15['vol_chg'].sum(), 'long_open_interest_top15': table_cut_top15['long_open_interest'].sum(), 'long_open_interest_chg_top15': table_cut_top15['long_open_interest_chg'].sum(), 'short_open_interest_top15': table_cut_top15['short_open_interest'].sum(), 'short_open_interest_chg_top15': table_cut_top15['short_open_interest_chg'].sum(), 'vol_top20': table_cut_top20['vol'].sum(), 'vol_chg_top20': table_cut_top20['vol_chg'].sum(), 'long_open_interest_top20': table_cut_top20['long_open_interest'].sum(), 'long_open_interest_chg_top20': table_cut_top20['long_open_interest_chg'].sum(), 'short_open_interest_top20': table_cut_top20['short_open_interest'].sum(), 'short_open_interest_chg_top20': table_cut_top20['short_open_interest_chg'].sum(), 'date': date.strftime('%Y%m%d') } records = records.append(pd.DataFrame(big_dict, index=[0])) if len(big_dict.items()) > 0: add_vars = [i for i in cons.market_exchange_symbols['shfe'] + cons.market_exchange_symbols['cffex'] if i in records['variety'].tolist()] for var in add_vars: records_cut = records[records['variety'] == var] var_record = pd.DataFrame(records_cut.sum()).T var_record['date'] = date.strftime('%Y%m%d') var_record.loc[:, ['variety', 'symbol']] = var records = records.append(var_record) return records.reset_index(drop=True)
def get_shfe_daily(date=None): """ 上海期货交易所-日频率-量价数据 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象, 默认为当前交易日 :type date: str or datetime.date :return: 上海期货交易所-日频率-量价数据 :rtype: pandas.DataFrame or None 上期所日交易数据(DataFrame): symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 None(给定交易日没有交易数据) """ day = cons.convert_date( date) if date is not None else datetime.date.today() if day.strftime("%Y%m%d") not in calendar: warnings.warn("%s非交易日" % day.strftime("%Y%m%d")) return None try: json_data = json.loads( requests_link( cons.SHFE_DAILY_URL % (day.strftime("%Y%m%d")), headers=cons.shfe_headers, ).text) except requests.HTTPError as reason: if reason.response != 404: print(cons.SHFE_DAILY_URL % (day.strftime("%Y%m%d")), reason) return if len(json_data["o_curinstrument"]) == 0: return df = pd.DataFrame([ row for row in json_data["o_curinstrument"] if row["DELIVERYMONTH"] not in ["小计", "合计"] and row["DELIVERYMONTH"] != "" ]) df["variety"] = df.PRODUCTID.str.slice(0, -6).str.upper() df["symbol"] = df["variety"] + df["DELIVERYMONTH"] df["date"] = day.strftime("%Y%m%d") v_wap_df = get_shfe_v_wap(day) if v_wap_df is not None: df = pd.merge( df, v_wap_df[v_wap_df.time_range == "9:00-15:00"], on=["date", "symbol"], how="left", ) df["turnover"] = df.v_wap * df.VOLUME else: df["VOLUME"] = df["VOLUME"].apply(lambda x: 0 if x == "" else x) df["turnover"] = df["VOLUME"] * df["SETTLEMENTPRICE"] df.rename(columns=cons.SHFE_COLUMNS, inplace=True) return df[cons.OUTPUT_COLUMNS]
def get_shfe_rank_table(date=None, vars_list=cons.contract_symbols): """ 上海期货交易所前 20 会员持仓排名数据明细 注:该交易所只公布每个品种内部的标的排名,没有公布品种的总排名 数据从20020107开始,每交易日16:30左右更新数据 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品 :return: pd.DataFrame rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_open_interest 该会员持多单 int long_open_interest_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_open_interest 该会员持空单 int short_open_interest_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ date = cons.convert_date(date) if date is not None else datetime.date.today() if date < datetime.date(2002, 1, 7): print("shfe数据源开始日期为20020107,跳过") return {} if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return {} url = cons.SHFE_VOL_RANK_URL % (date.strftime('%Y%m%d')) r = requests_link(url, 'utf-8') try: context = json.loads(r.text) except: return {} df = pd.DataFrame(context['o_cursor']) df = df.rename( columns={'CJ1': 'vol', 'CJ1_CHG': 'vol_chg', 'CJ2': 'long_open_interest', 'CJ2_CHG': 'long_open_interest_chg', 'CJ3': 'short_open_interest', 'CJ3_CHG': 'short_open_interest_chg', 'PARTICIPANTABBR1': 'vol_party_name', 'PARTICIPANTABBR2': 'long_party_name', 'PARTICIPANTABBR3': 'short_party_name', 'PRODUCTNAME': 'product1', 'RANK': 'rank', 'INSTRUMENTID': 'symbol', 'PRODUCTSORTNO': 'product2'}) if len(df.columns) < 3: return {} df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x) df = df.applymap(lambda x: None if x == '' else x) df['variety'] = df['symbol'].apply(lambda x: symbol_varieties(x)) df = df[df['rank'] > 0] for col in ['PARTICIPANTID1', 'PARTICIPANTID2', 'PARTICIPANTID3', 'product1', 'product2']: try: del df[col] except: pass get_vars = [var for var in vars_list if var in df['variety'].tolist()] big_dict = {} for var in get_vars: df_var = df[df['variety'] == var] for symbol in set(df_var['symbol']): df_symbol = df_var[df_var['symbol'] == symbol] big_dict[symbol] = df_symbol.reset_index(drop=True) return big_dict
def get_czce_daily(date=None): """ 郑州商品交易所-日频率-量价数据 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象,默认为当前交易日 :type date: str or datetime.date :return: 郑州商品交易所-日频率-量价数据 :rtype: pandas.DataFrame or None 郑商所每日期货交易数据: symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 郑商所每日期权交易数据 symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 pre_settle 前结算价 settle 结算价 delta 对冲值 volume 成交量 open_interest 持仓量 oi_change 持仓变化 turnover 成交额 implied_volatility 隐含波动率 exercise_volume 行权量 variety 合约类别 None(类型错误或给定日期没有交易数据) """ day = cons.convert_date( date) if date is not None else datetime.date.today() if day.strftime("%Y%m%d") not in calendar: warnings.warn(f"{day.strftime('%Y%m%d')}非交易日") return None if day > datetime.date(2010, 8, 24): if day > datetime.date(2015, 9, 19): u = cons.CZCE_DAILY_URL_3 url = u % (day.strftime("%Y"), day.strftime("%Y%m%d")) elif day < datetime.date(2015, 9, 19): u = cons.CZCE_DAILY_URL_2 url = u % (day.strftime("%Y"), day.strftime("%Y%m%d")) listed_columns = cons.CZCE_COLUMNS output_columns = cons.OUTPUT_COLUMNS try: r = requests.get(url) html = r.text except requests.exceptions.HTTPError as reason: if reason.response.status_code != 404: print( cons.CZCE_DAILY_URL_3 % (day.strftime("%Y"), day.strftime("%Y%m%d")), reason, ) return if html.find("您的访问出错了") >= 0 or html.find("无期权每日行情交易记录") >= 0: return html = [ i.replace(" ", "").split("|") for i in html.split("\n")[:-4] if i[0][0] != u"小" ] if day > datetime.date(2015, 9, 19): if html[1][0] not in ["品种月份", u"品种代码"]: return dict_data = list() day_const = int(day.strftime("%Y%m%d")) for row in html[2:]: m = cons.FUTURES_SYMBOL_PATTERN.match(row[0]) if not m: continue row_dict = { "date": day_const, "symbol": row[0], "variety": m.group(1) } for i, field in enumerate(listed_columns): if row[i + 1] == "\r": row_dict[field] = 0.0 elif field in [ "volume", "open_interest", "oi_chg", "exercise_volume", ]: row[i + 1] = row[i + 1].replace(",", "") row_dict[field] = int(row[i + 1]) else: row[i + 1] = row[i + 1].replace(",", "") row_dict[field] = float(row[i + 1]) dict_data.append(row_dict) return pd.DataFrame(dict_data)[output_columns] elif day < datetime.date(2015, 9, 19): dict_data = list() day_const = int(day.strftime("%Y%m%d")) for row in html[1:]: row = row[0].split(",") m = cons.FUTURES_SYMBOL_PATTERN.match(row[0]) if not m: continue row_dict = { "date": day_const, "symbol": row[0], "variety": m.group(1) } for i, field in enumerate(listed_columns): if row[i + 1] == "\r": row_dict[field] = 0.0 elif field in [ "volume", "open_interest", "oi_chg", "exercise_volume", ]: row_dict[field] = int(float(row[i + 1])) else: row_dict[field] = float(row[i + 1]) dict_data.append(row_dict) return pd.DataFrame(dict_data)[output_columns] if day <= datetime.date(2010, 8, 24): u = cons.CZCE_DAILY_URL_1 url = u % day.strftime("%Y%m%d") listed_columns = cons.CZCE_COLUMNS_2 output_columns = cons.OUTPUT_COLUMNS df = pd.read_html(url)[1].dropna(how="any") dict_data = list() day_const = int(day.strftime("%Y%m%d")) for row in df.to_dict(orient="records")[1:]: m = cons.FUTURES_SYMBOL_PATTERN.match(row[0]) if not m: continue row_dict = { "date": day_const, "symbol": row[0], "variety": m.group(1) } for i, field in enumerate(listed_columns): if row[i + 1] == "\r": row_dict[field] = 0.0 elif field in [ "volume", "open_interest", "oi_chg", "exercise_volume" ]: row_dict[field] = int(row[i + 1]) else: row_dict[field] = float(row[i + 1]) dict_data.append(row_dict) return pd.DataFrame(dict_data)[output_columns]
def get_roll_yield_bar(type_method="symbol", var="RB", date=None, start_day=None, end_day=None, plot=False): """ 获取展期收益率 :param type_method: 'symbol':获取某天某品种所有交割月合约的收盘价, 'var':获取某天所有品种两个主力合约的展期收益率(展期收益率横截面), ‘date’:获取某品种每天的两个主力合约的展期收益率(展期收益率时间序列) :param var: 合约品种如RB、AL等 :param date: 某一天日期 format: YYYYMMDD :param start_day: 开始日期 format:YYYYMMDD :param end_day: 结束数据 format:YYYYMMDD :param plot: True or False作图 :return: pd.DataFrame 展期收益率数据(DataFrame): ry 展期收益率 index 日期或品种 """ date = cons.convert_date( date) if date is not None else datetime.date.today() start_day = (cons.convert_date(start_day) if start_day is not None else datetime.date.today()) end_day = (cons.convert_date(end_day) if end_day is not None else cons.convert_date( cons.get_latest_data_date(datetime.datetime.now()))) if type_method == "symbol": df = get_futures_daily(start_day=date, end_day=date, market=symbol_market(var)) df = df[df["variety"] == var] if plot: _plot_bar_2(df[["symbol", "close"]]) return df if type_method == "var": df = pd.DataFrame() for market in ["dce", "cffex", "shfe", "czce"]: df = df.append( get_futures_daily(start_day=date, end_day=date, market=market)) var_list = list(set(df["variety"])) df_l = pd.DataFrame() for var in var_list: ry = get_roll_yield(date, var, df=df) if ry: df_l = df_l.append( pd.DataFrame([ry], index=[var], columns=["roll_yield", "near_by", "deferred"])) df_l["date"] = date df_l = df_l.sort_values("roll_yield") if plot: _plot_bar(df_l["roll_yield"]) return df_l if type_method == "date": df_l = pd.DataFrame() while start_day <= end_day: try: ry = get_roll_yield(start_day, var) if ry: df_l = df_l.append( pd.DataFrame( [ry], index=[start_day], columns=["roll_yield", "near_by", "deferred"], )) except: pass start_day += datetime.timedelta(days=1) if plot: _plot(df_l["roll_yield"]) return df_l
def get_receipt(start_day: str = None, end_day: str = None, vars_list: List = cons.contract_symbols): """ 获取大宗商品注册仓单数量 :param start_day: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :type start_day: str :param end_day: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :type end_day: str :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品 :type vars_list: str :return: 展期收益率数据 :rtype: pandas.DataFrame """ start_day = cons.convert_date( start_day) if start_day is not None else datetime.date.today() end_day = cons.convert_date( end_day) if end_day is not None else cons.convert_date( cons.get_latest_data_date(datetime.datetime.now())) records = pd.DataFrame() while start_day <= end_day: if start_day.strftime('%Y%m%d') not in calendar: warnings.warn(f"{start_day.strftime('%Y%m%d')}非交易日") else: print(start_day) for market, market_vars in cons.market_exchange_symbols.items(): if market == 'dce': if start_day >= datetime.date(2009, 4, 7): f = get_dce_receipt else: print('20090407起,dce每交易日更新仓单数据') f = None elif market == 'shfe': if datetime.date(2008, 10, 6) <= start_day <= datetime.date( 2014, 5, 16): f = get_shfe_receipt_1 elif start_day > datetime.date(2014, 5, 16): f = get_shfe_receipt_2 else: f = None print('20081006起,shfe每交易日更新仓单数据') elif market == 'czce': if datetime.date(2008, 3, 3) <= start_day <= datetime.date( 2010, 8, 24): f = get_czce_receipt_1 elif datetime.date(2010, 8, 24) < start_day <= datetime.date( 2015, 11, 11): f = get_czce_receipt_2 elif start_day > datetime.date(2015, 11, 11): f = get_czce_receipt_3 else: f = None print('20080303起,czce每交易日更新仓单数据') get_vars = [var for var in vars_list if var in market_vars] if market != 'cffex' and get_vars != []: if f is not None: records = records.append(f(start_day, get_vars)) start_day += datetime.timedelta(days=1) records.reset_index(drop=True, inplace=True) if "MA" in records["var"].to_list(): replace_index = records[records["var"] == "MA"]["receipt"].astype( str).str.split("0", expand=True)[0].index records.loc[replace_index, "receipt"] = records[ records["var"] == "MA"]["receipt"].astype(str).str.split( "0", expand=True)[0] return records
def get_cffex_daily(date=None): """ 中国金融期货交易所日交易数据 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象; 为空时为当天 :return: pandas.DataFrame 中国金融期货交易所日: symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 None(给定日期没有交易数据) """ day = cons.convert_date( date) if date is not None else datetime.date.today() if day.strftime("%Y%m%d") not in calendar: warnings.warn("%s非交易日" % day.strftime("%Y%m%d")) return None try: html = requests_link( cons.CFFEX_DAILY_URL.format(day.strftime("%Y%m"), day.strftime("%d"), day.strftime("%Y%m%d")), encoding="gbk", headers=cons.headers, ).text except requests.exceptions.HTTPError as reason: if reason.response != 404: print( cons.CFFEX_DAILY_URL % (day.strftime("%Y%m"), day.strftime("%d"), day.strftime("%Y%m%d")), reason, ) return if html.find("网页错误") >= 0: return html = [ i.replace(" ", "").split(",") for i in html.split("\n")[:-2] if i[0][0] != "小" ] if html[0][0] != "合约代码": return dict_data = list() day_const = day.strftime("%Y%m%d") for row in html[1:]: m = cons.FUTURES_SYMBOL_PATTERN.match(row[0]) if not m: continue row_dict = {"date": day_const, "symbol": row[0], "variety": m.group(1)} for i, field in enumerate(cons.CFFEX_COLUMNS): if row[i + 1] == "": row_dict[field] = 0.0 elif field in ["volume", "open_interest", "oi_chg"]: row_dict[field] = int(row[i + 1]) else: try: row_dict[field] = float(row[i + 1]) except: pass row_dict["pre_settle"] = row_dict["close"] - row_dict["change1"] dict_data.append(row_dict) return pd.DataFrame(dict_data)[cons.OUTPUT_COLUMNS]
def get_dce_daily(date=None, symbol_type="futures", retries=0): """ 获取大连商品交易所日交易数据 Parameters ------ date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 symbol_type: 数据类型, 为'futures'期货 或 'option'期权二者之一 retries: int, 当前重试次数,达到3次则获取数据失败 Return ------- DataFrame 大商所日交易数据(DataFrame): symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 DataFrame 郑商所每日期权交易数据 symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 pre_settle 前结算价 settle 结算价 delta 对冲值 volume 成交量 open_interest 持仓量 oi_change 持仓变化 turnover 成交额 implied_volatility 隐含波动率 exercise_volume 行权量 variety 合约类别 或 None(给定日期没有交易数据) """ day = cons.convert_date( date) if date is not None else datetime.date.today() if day.strftime("%Y%m%d") not in calendar: warnings.warn("%s非交易日" % day.strftime("%Y%m%d")) return None if retries > 3: print("maximum retires for DCE market data: ", day.strftime("%Y%m%d")) return if symbol_type == "futures": url = (cons.DCE_DAILY_URL + "?" + urllib.parse.urlencode({ "currDate": day.strftime("%Y%m%d"), "year": day.strftime("%Y"), "month": str(int(day.strftime("%m")) - 1), "day": day.strftime("%d"), })) listed_columns = cons.DCE_COLUMNS output_columns = cons.OUTPUT_COLUMNS elif symbol_type == "option": url = (cons.DCE_DAILY_URL + "?" + urllib.parse.urlencode({ "currDate": day.strftime("%Y%m%d"), "year": day.strftime("%Y"), "month": str(int(day.strftime("%m")) - 1), "day": day.strftime("%d"), "dayQuotes.trade_type": "1", })) listed_columns = cons.DCE_OPTION_COLUMNS output_columns = cons.OPTION_OUTPUT_COLUMNS else: print("invalid symbol_type :" + symbol_type + ', should be one of "futures" or "option"') return try: response = requests_link(url, method="post", headers=cons.headers).text except requests.exceptions.ContentDecodingError as reason: return get_dce_daily(day, retries=retries + 1) except requests.exceptions.HTTPError as reason: if reason.response == 504: return get_dce_daily(day, retries=retries + 1) elif reason.response != 404: print(cons.DCE_DAILY_URL, reason) return if "错误:您所请求的网址(URL)无法获取" in response: return get_dce_daily(day, retries=retries + 1) elif "暂无数据" in response: return data = BeautifulSoup(response, "html.parser").find_all("tr") if len(data) == 0: return dict_data = list() implied_data = list() for i_data in data[1:]: if "小计" in i_data.text or "总计" in i_data.text: continue x = i_data.find_all("td") if symbol_type == "futures": row_dict = {"variety": cons.DCE_MAP[x[0].text.strip()]} row_dict["symbol"] = row_dict["variety"] + x[1].text.strip() for i, field in enumerate(listed_columns): field_content = x[i + 2].text.strip() if "-" in field_content: row_dict[field] = 0 elif field in ["volume", "open_interest"]: row_dict[field] = int(field_content.replace(",", "")) else: row_dict[field] = float(field_content.replace(",", "")) dict_data.append(row_dict) elif len(x) == 16: m = cons.FUTURES_SYMBOL_PATTERN.match(x[1].text.strip()) if not m: continue row_dict = { "symbol": x[1].text.strip(), "variety": m.group(1).upper(), "contract_id": m.group(0), } for i, field in enumerate(listed_columns): field_content = x[i + 2].text.strip() if "-" in field_content: row_dict[field] = 0 elif field in ["volume", "open_interest"]: row_dict[field] = int(field_content.replace(",", "")) else: row_dict[field] = float(field_content.replace(",", "")) dict_data.append(row_dict) elif len(x) == 2: implied_data.append({ "contract_id": x[0].text.strip(), "implied_volatility": float(x[1].text.strip()), }) df = pd.DataFrame(dict_data) df["date"] = day.strftime("%Y%m%d") if symbol_type == "futures": return df[output_columns] else: return pd.merge( df, pd.DataFrame(implied_data), on="contract_id", how="left", indicator=False, )[output_columns]