def get_rollYield_bar(type = 'symbol', var = 'RB',date= None, start = None, end = None, plot = False): """ 获取展期收益率 Parameters ------ type = 'symbol':获取某天某品种所有交割月合约的收盘价 type = 'var':获取某天所有品种两个主力合约的展期收益率(展期收益率横截面) type = ‘date’:获取某品种每天的两个主力合约的展期收益率(展期收益率时间序列) start: 开始日期 format:YYYYMMDD end: 结束数据 format:YYYYMMDD date: 某一天日期 format: YYYYMMDD var: 合约品种如RB、AL等 Return ------- DataFrame 展期收益率数据(DataFrame): ry 展期收益率 index 日期或品种 """ date = cons.convert_date(date) if date is not None else datetime.date.today() start = cons.convert_date(start) if start is not None else datetime.date.today() end = cons.convert_date(end) if end is not None else cons.convert_date(cons.get_latestDataDate(datetime.datetime.now())) if type == 'symbol': df = get_future_daily(start=date, end=date, market=symbolMarket(var)) df = df[df['variety'] == var] if plot: _plot_bar(df['close'].tolist(), df['symbol'].tolist()) return df if type == 'var': df = pd.DataFrame() for market in ['dce','cffex','shfe','czce']: df = df.append(get_future_daily(start=date, end=date, market=market)) varList = list(set(df['variety'])) ryList = [] for var in varList: ryList.append(get_rollYield(date, var, df=df)) df = pd.DataFrame(ryList,index = varList,columns = ['ry']) df = df.sort_values('ry') if plot: _plot_bar(df['ry'].tolist(), df.index) return df if type == 'date': dfL=pd.DataFrame() while start <= end: try: ry = get_rollYield(start, var) dfL = dfL.append(pd.DataFrame([ry], index=[start], columns=['ry'])) except: pass start += datetime.timedelta(days=1) if plot: _plot(pd.to_datetime(dfL.index), dfL['ry'].tolist()) return dfL
def get_future_daily(start=None, end=None, market='CFFEX', indexBar=False): """ 获取交易所日交易数据 Parameters ------ start: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 end: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 market: 'CFFEX' 中金所, 'CZCE' 郑商所, 'SHFE' 上期所, 'DCE' 大商所 之一。默认为中金所 indexBar: bool 是否合成指数K线 Return ------- DataFrame 中金所日交易数据(DataFrame): symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 None(给定日期没有交易数据) """ if market.upper() == 'CFFEX': f = get_cffex_daily elif market.upper() == 'CZCE': f = get_czce_daily elif market.upper() == 'SHFE': f = get_shfe_daily elif market.upper() == 'DCE': f = get_dce_daily else: print('Invalid market.') return start = cons.convert_date( start) if start is not None else datetime.date.today() end = cons.convert_date(end) if end is not None else cons.convert_date( cons.get_latestDataDate(datetime.datetime.now())) df_list = list() while start <= end: df = f(start) if df is not None: df_list.append(df) if indexBar: df_list.append(get_futureIndex(df)) start += datetime.timedelta(days=1) if len(df_list) > 0: return pd.concat(df_list).reset_index(drop=True)
def get_reciept(start=None, end=None, vars=cons.vars): """ 获取大宗商品注册仓单数量 Parameters ------ start: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 end: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars: 合约品种如RB、AL等列表 为空时为所有商品 Return ------- DataFrame 展期收益率数据(DataFrame): var 商品品种 string reciept 仓单数量 int date 日期 string YYYYMMDD """ start = cons.convert_date( start) if start is not None else datetime.date.today() end = cons.convert_date(end) if end is not None else cons.convert_date( cons.get_latestDataDate(datetime.datetime.now())) records = pd.DataFrame() while start <= end: if start.strftime('%Y%m%d') not in calendar: print('%s非交易日' % start.strftime('%Y%m%d')) else: print(start) for market, marketVars in cons.market_var.items(): if market == 'dce': f = get_dce_reciept elif market == 'shfe': if start <= datetime.date(2014, 5, 16): f = get_shfe_reciept_1 else: f = get_shfe_reciept_2 elif market == 'czce': if start <= datetime.date(2010, 8, 24): f = get_czce_reciept_1 elif start <= datetime.date(2015, 11, 11): f = get_czce_reciept_2 else: f = get_czce_reciept_3 get_vars = [var for var in vars if var in marketVars] if market != 'cffex' and get_vars != []: records = records.append(f(start, get_vars)) start += datetime.timedelta(days=1) return records.reset_index(drop=True)
def get_shfe_reciept_1(date=None, vars=cons.vars): """ 抓取上海商品交易所注册仓单数据 适用20081006至20140518(包括) 20100126、20101029日期交易所格式混乱,直接回复脚本中DataFrame 20100416、20130821日期交易所数据丢失 Parameters ------ date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars: 合约品种如RB、AL等列表 为空时为所有商品 Return ------- DataFrame: 展期收益率数据(DataFrame): var 商品品种 string reciept 仓单数 int date 日期 string YYYYMMDD """ date = cons.convert_date(date).strftime( '%Y%m%d') if date is not None else datetime.date.today() if date not in calendar: print('%s非交易日' % date) return None if date == '20100126': shfe_20100126['date'] = date return shfe_20100126 elif date == '20101029': shfe_20101029['date'] = date return shfe_20101029 elif date in ['20100416', '20130821']: return None else: varList = [ '天然橡胶', '沥青仓库', '沥青厂库', '热轧卷板', '燃料油', '白银', '线材', '螺纹钢', '铅', '铜', '铝', '锌', '黄金', '锡', '镍' ] url = cons.SHFE_RECIEPT_URL_1 % date data = pandas_readHtml_link(url)[0] indexs = [x for x in data.index if (data[0].tolist()[x] in varList)] lastIndex = [x for x in data.index if '注' in str(data[0].tolist()[x]) ][0] - 1 records = pd.DataFrame() for i in list(range(len(indexs))): if i != len(indexs) - 1: dataCut = data.loc[indexs[i]:indexs[i + 1] - 1, :] else: dataCut = data.loc[indexs[i]:lastIndex, :] dataCut = dataCut.fillna(method='pad') D = {} D['var'] = chinese_to_english(dataCut[0].tolist()[0]) D['reciept'] = int(dataCut[1].tolist()[-1]) D['date'] = date records = records.append(pd.DataFrame(D, index=[0])) if len(records.index) != 0: records.index = records['var'] vars_inMarket = [i for i in vars if i in records.index] records = records.loc[vars_inMarket, :] return records.reset_index(drop=True)
def get_shfe_daily(date=None): """ 获取上期所日交易数据 Parameters ------ date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 Return ------- DataFrame 上期所日交易数据(DataFrame): symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 None(给定日期没有交易数据) """ day = cons.convert_date( date) if date is not None else datetime.date.today() if day.strftime('%Y%m%d') not in calendar: print('%s非交易日' % date.strftime('%Y%m%d')) return None try: json_data = json.loads( urlopen( Request(cons.SHFE_DAILY_URL % (day.strftime('%Y%m%d')), headers=cons.shfe_headers)).read().decode('utf8')) except HTTPError as reason: if reason.code != 404: print(cons.SHFE_DAILY_URL % (day.strftime('%Y%m%d')), reason) return if len(json_data['o_curinstrument']) == 0: return df = pd.DataFrame([ row for row in json_data['o_curinstrument'] if row['DELIVERYMONTH'] != u'小计' and row['DELIVERYMONTH'] != '' ]) df['variety'] = df.PRODUCTID.str.slice(0, -6).str.upper() df['symbol'] = df['variety'] + df['DELIVERYMONTH'] df['date'] = day.strftime('%Y%m%d') vwap_df = get_shfe_vwap(day) if vwap_df is not None: df = pd.merge(df, vwap_df[vwap_df.time_range == '9:00-15:00'], on=['date', 'symbol'], how='left') df['turnover'] = df.vwap * df.VOLUME else: df['turnover'] = df['VOLUME'] * df['SETTLEMENTPRICE'] df.rename(columns=cons.SHFE_COLUMNS, inplace=True) return df[cons.OUTPUT_COLUMNS]
def get_czce_reciept_3(date=None, vars=cons.vars): """ 抓取郑州商品交易所注册仓单数据 适用20151112(包括)至今 Parameters ------ date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars: 合约品种如CF、TA等列表 为空时为所有商品 Return ------- DataFrame: 展期收益率数据(DataFrame):`1 var 商品品种 string reciept 仓单数 int date 日期 string YYYYMMDD """ date = cons.convert_date(date).strftime( '%Y%m%d') if date is not None else datetime.date.today() if date not in calendar: print('%s非交易日' % date) return None url = cons.CZCE_RECIEPT_URL_3 % (date[:4], date) r = requests_link(url, encoding='utf-8') r.encoding = 'utf-8' data = pd.read_html(r.text, encoding='gb2312') records = pd.DataFrame() if len(data) < 4: return records if int(date) <= 20171227: data = data[1:] for dataCut in data: if len(dataCut.columns) > 3: lastIndexs = [ x for x in dataCut.index if '注:' in str(dataCut[0].tolist()[x]) ] if len(lastIndexs) > 0: lastIndex = lastIndexs[0] - 1 dataCut = dataCut.loc[:lastIndex, :] if 'PTA' in dataCut[0].tolist()[0]: var = 'TA' else: strings = dataCut[0].tolist()[0] string = strings.split(' ')[0][3:] var = chinese_to_english(re.sub('[A-Z]+', '', string)) dataCut.columns = dataCut.loc[1, :] dataCut = dataCut.fillna(method='pad') try: reciept = dataCut.loc[:, '仓单数量'].tolist()[-1] except: reciept = dataCut.loc[:, '仓单数量(保税)'].tolist()[-1] D = {'var': var, 'reciept': int(reciept), 'date': date} records = records.append(pd.DataFrame(D, index=[0])) if len(records.index) != 0: records.index = records['var'] vars_inMarket = [i for i in vars if i in records.index] records = records.loc[vars_inMarket, :] return records.reset_index(drop=True)
def get_spotPrice(date=None, vars=cons.vars): """ 获取某一天大宗商品现货价格,及相应基差 Parameters ------ date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars: 合约品种如RB、AL等列表 为空时为所有商品 Return ------- DataFrame 展期收益率数据(DataFrame): var 商品品种 string SP 现货价格 float nearSymbol 临近交割合约 string nearPrice 临近交割合约结算价 float domSymbol 主力合约 string domPrice 主力合约结算价 float nearBasis 临近交割合约相对现货的基差 float domBasis 主力合约相对现货的基差 float nearBasisRate 临近交割合约相对现货的基差率 float domBasisRate 主力合约相对现货的基差率 float date 日期 string YYYYMMDD """ date = cons.convert_date( date) if date is not None else datetime.date.today() if date < datetime.date(2011, 1, 4): raise Exception("数据源开始日期为20110104,请修改获取数据时段检查") if date.strftime('%Y%m%d') not in calendar: print('%s非交易日' % date.strftime('%Y%m%d')) return None u1 = cons.SYS_SPOTPRICE_LATEST_URL u2 = cons.SYS_SPOTPRICE_URL % date.strftime('%Y-%m-%d') i = 1 while True: for url in [u2, u1]: try: r = requests.get(url, timeout=2) string = pd.read_html(r.text)[0].loc[1, 1] news = ''.join(re.findall(r'[0-9]', string)) if news[3:11] == date.strftime('%Y%m%d'): records = _check_information(pd.read_html(r.text)[1], date) records.index = records['var'] vars_inMarket = [i for i in vars if i in records.index] return records.loc[vars_inMarket, :].reset_index(drop=True) else: time.sleep(3) except Exception as e: print('%s日生意社数据连接失败,第%s次尝试,最多5次' % (date.strftime('%Y-%m-%d'), str(i))) i += 1 if i > 5: print('%s日生意社数据连接失败,已超过5次,您的地址被网站墙了,请保存好返回数据,稍后从该日期起重试' % date.strftime('%Y-%m-%d')) return False
def get_rank_sum_daily(start=None, end=None, vars=cons.vars): """ 抓取四个期货交易所前5、前10、前15、前20会员持仓排名数据 注1:由于上期所和中金所只公布每个品种内部的标的排名,没有公布品种的总排名; 所以函数输出的品种排名是由品种中的每个标的加总获得,并不是真实的品种排名列表 注2:大商所只公布了品种排名,未公布标的排名 Parameters ------ start: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 end: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars: 合约品种如RB、AL等列表 为空时为所有商品 Return ------- DataFrame: 展期收益率数据(DataFrame): symbol 标的合约 string var 商品品种 string vol_top5 成交量前5会员成交量总和 int vol_chg_top5 成交量前5会员成交量变化总和 int long_openIntr_top5 持多单前5会员持多单总和 int long_openIntr_chg_top5 持多单前5会员持多单变化总和 int short_openIntr_top5 持空单前5会员持空单总和 int short_openIntr_chg_top5 持空单前5会员持空单变化总和 int vol_top10 成交量前10会员成交量总和 int ... date 日期 string YYYYMMDD """ start = cons.convert_date( start) if start is not None else datetime.date.today() end = cons.convert_date(end) if end is not None else cons.convert_date( cons.get_latestDataDate(datetime.datetime.now())) records = pd.DataFrame() while start <= end: print(start) if start.strftime('%Y%m%d') in calendar: records = records.append(get_rank_sum(start, vars)) else: print('%s非交易日' % start.strftime('%Y%m%d')) start += datetime.timedelta(days=1) return records.reset_index(drop=True)
def get_spotPrice_daily(start=None, end=None, vars=cons.vars): """ 获取大宗商品现货价格,及相应基差 Parameters ------ start: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 end: 结束数据 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars: 合约品种如RB、AL等列表 为空时为所有商品 Return ------- DataFrame 展期收益率数据(DataFrame): var 商品品种 string SP 现货价格 float nearSymbol 临近交割合约 string nearPrice 临近交割合约结算价 float domSymbol 主力合约 string domPrice 主力合约结算价 float nearBasis 临近交割合约相对现货的基差 float domBasis 主力合约相对现货的基差 float nearBasisRate 临近交割合约相对现货的基差率 float domBasisRate 主力合约相对现货的基差率 float date 日期 string YYYYMMDD """ start = cons.convert_date( start) if start is not None else datetime.date.today() end = cons.convert_date(end) if end is not None else cons.convert_date( cons.get_latestDataDate(datetime.datetime.now())) df_list = [] while start <= end: print(start) df = get_spotPrice(start, vars) if df is False: return pd.concat(df_list).reset_index(drop=True) elif df is not None: df_list.append(df) start += datetime.timedelta(days=1) if len(df_list) > 0: return pd.concat(df_list).reset_index(drop=True)
def get_czce_reciept_1(date=None, vars=cons.vars): """ 抓取郑州商品交易所注册仓单数据 适用20080222至20100824(包括) Parameters ------ date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars: 合约品种如CF、TA等列表 为空时为所有商品 Return ------- DataFrame: 展期收益率数据(DataFrame): var 商品品种 string reciept 仓单数 int date 日期 string YYYYMMDD """ date = cons.convert_date(date).strftime( '%Y%m%d') if date is not None else datetime.date.today() if date not in calendar: print('%s非交易日' % date) return None if date == '20090820': return pd.DataFrame() url = cons.CZCE_RECIEPT_URL_1 % date r = requests_link(url, encoding='utf-8') r.encoding = 'utf-8' context = r.text data = pd.read_html(context)[1] records = pd.DataFrame() indexs = [x for x in data.index if '品种:' in str(data[0].tolist()[x])] for i in list(range(len(indexs))): if i != len(indexs) - 1: dataCut = data.loc[indexs[i]:indexs[i + 1] - 1, :] dataCut = dataCut.fillna(method='pad') else: dataCut = data.loc[indexs[i]:, :] dataCut = dataCut.fillna(method='pad') if 'PTA' in dataCut[0].tolist()[0]: var = 'TA' else: var = chinese_to_english( re.sub('[A-Z]+', '', dataCut[0].tolist()[0][3:])) if var == 'CF': reciept = dataCut[6].tolist()[-1] else: reciept = dataCut[5].tolist()[-1] D = {'var': var, 'reciept': int(reciept), 'date': date} records = records.append(pd.DataFrame(D, index=[0])) if len(records.index) != 0: records.index = records['var'] vars_inMarket = [i for i in vars if i in records.index] records = records.loc[vars_inMarket, :] return records.reset_index(drop=True)
def get_cffex_rank_table(date=None, vars=cons.vars): """ 抓取郑州商品期货交易所前20会员持仓排名数据明细 注:该交易所即公布了品种排名,也公布了标的排名 Parameters ------ date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars: 合约品种如RB、AL等列表 为空时为所有商品 数据从20100416开始,每交易日16:30左右更新数据 Return ------- DataFrame: rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_openIntr 该会员持多单 int long_openIntr_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_openIntr 该会员持空单 int short_openIntr_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ vars = [i for i in vars if i in cons.market_var['cffex']] date = cons.convert_date( date) if date is not None else datetime.date.today() if date.strftime('%Y%m%d') not in calendar: print('%s非交易日' % date.strftime('%Y%m%d')) return {} D = {} for var in vars: url = cons.CFFEX_VOLRANK_URL % (date.strftime('%Y%m'), date.strftime('%d'), var) r = requests_link(url, encoding='gbk') if '网页错误' not in r.text: table = pd.read_csv(StringIO(r.text.split('\n交易日,')[1])) table = table.dropna(how='any') table = table.applymap(lambda x: x.strip() if type(x) == type('') else x) for symbol in set(table['合约']): tableCut = table[table['合约'] == symbol] tableCut.columns = ['symbol', 'rank'] + rank_columns tableCut = _tableCut_cal(tableCut, symbol) D[symbol] = tableCut.reset_index(drop=True) return D
def get_shfe_reciept_2(date=None, vars=None): """ 抓取上海商品交易所注册仓单数据 适用20140519(包括)至今 Parameters ------ date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars: 合约品种如RB、AL等列表 为空时为所有商品 Return ------- DataFrame: 展期收益率数据(DataFrame): var 商品品种 string reciept 仓单数 int date 日期 string YYYYMMDD """ date = cons.convert_date(date).strftime( '%Y%m%d') if date is not None else datetime.date.today() if date not in calendar: print('%s非交易日' % date) return None url = cons.SHFE_RECIEPT_URL_2 % date r = requests_link(url, encoding='utf-8') r.encoding = 'utf-8' try: context = json.loads(r.text) except: return pd.DataFrame() data = pd.DataFrame(context['o_cursor']) if len(data.columns) < 1: return pd.DataFrame() records = pd.DataFrame() for var in set(data['VARNAME'].tolist()): dataCut = data[data['VARNAME'] == var] D = { 'var': chinese_to_english(re.sub("\W|[a-zA-Z]", "", var)), 'reciept': int(dataCut['WRTWGHTS'].tolist()[-1]), 'date': date } records = records.append(pd.DataFrame(D, index=[0])) if len(records.index) != 0: records.index = records['var'] vars_inMarket = [i for i in vars if i in records.index] records = records.loc[vars_inMarket, :] return records.reset_index(drop=True)
def get_czce_reciept_2(date=None, vars=cons.vars): """ 抓取郑州商品交易所注册仓单数据 适用20100825(包括)至20151111(包括) Parameters ------ date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars: 合约品种如CF、TA等列表 为空时为所有商品 Return ------- DataFrame: 展期收益率数据(DataFrame): var 商品品种 string reciept 仓单数 int date 日期 string YYYYMMDD """ date = cons.convert_date(date).strftime( '%Y%m%d') if date is not None else datetime.date.today() url = cons.CZCE_RECIEPT_URL_2 % (date[:4], date) r = requests.get(url) r.encoding = 'utf-8' data = pd.read_html(r.text)[3:] records = pd.DataFrame() for dataCut in data: if len(dataCut.columns) > 3: lastIndexs = [ x for x in dataCut.index if '注:' in str(dataCut[0].tolist()[x]) ] if len(lastIndexs) > 0: lastIndex = lastIndexs[0] - 1 dataCut = dataCut.loc[:lastIndex, :] if 'PTA' in dataCut[0].tolist()[0]: var = 'TA' else: strings = dataCut[0].tolist()[0] string = strings.split(' ')[0][3:] var = chinese_to_english(re.sub('[A-Z]+', '', string)) reciept = dataCut.iloc[:, -3].tolist()[-1] D = {'var': var, 'reciept': reciept, 'date': date} records = records.append(pd.DataFrame(D, index=[0])) if len(records.index) != 0: records.index = records['var'] vars_inMarket = [i for i in vars if i in records.index] records = records.loc[vars_inMarket, :] return records.reset_index(drop=True)
def get_shfe_vwap(date=None): """ 获取上期所日成交均价数据 Parameters ------ date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 Return ------- DataFrame 郑商所日交易数据(DataFrame): symbol 合约代码 date 日期 time_range vwap时段,分09:00-10:15和09:00-15:00两类 vwap 加权平均成交均价 或 None(给定日期没有数据) """ day = cons.convert_date( date) if date is not None else datetime.date.today() if day.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % day.strftime('%Y%m%d')) return None try: json_data = json.loads( urlopen( Request(cons.SHFE_VWAP_URL % (day.strftime('%Y%m%d')), headers=cons.headers)).read().decode('utf8')) except HTTPError as reason: if reason.code not in [404, 403]: print(cons.SHFE_DAILY_URL % (day.strftime('%Y%m%d')), reason) return None if len(json_data['o_currefprice']) == 0: return None try: df = pd.DataFrame(json_data['o_currefprice']) df['INSTRUMENTID'] = df['INSTRUMENTID'].str.strip() df[':B1'].astype('int16') return df.rename(columns=cons.SHFE_VWAP_COLUMNS)[list( cons.SHFE_VWAP_COLUMNS.values())] except: return None
def get_rollYield(date=None, var='IF', symbol1=None, symbol2=None, df=None): """ 获取某一天某一品种(主力和次主力)、或固定两个合约的展期收益率 Parameters ------ date: string 某一天日期 format: YYYYMMDD var: string 合约品种如RB、AL等 symbol1: string 合约1如rb1810 symbol2: string 合约2如rb1812 df: DataFrame或None 从dailyBar得到合约价格,如果为空就在函数内部抓dailyBar,直接喂给数据可以让计算加快 Return ------- DataFrame 展期收益率数据(DataFrame): ry 展期收益率 index 日期或品种 """ date = cons.convert_date( date) if date is not None else datetime.date.today() if date.strftime('%Y%m%d') not in calendar: print('%s非交易日' % date.strftime('%Y%m%d')) return None if symbol1: var = symbol2varietie(symbol1) if type(df) != type(pd.DataFrame()): market = symbolMarket(var) df = get_future_daily(start=date, end=date, market=market) if var: df = df[df['variety'] == var].sort_values('open_interest', ascending=False) df['close'] = df['close'].astype('float') priceRate = df['close'].pct_change().tolist()[1] symbol1 = df['symbol'].tolist()[0] symbol2 = df['symbol'].tolist()[1] else: close1 = df['close'][df['symbol'] == symbol1.upper()].tolist()[0] close2 = df['close'][df['symbol'] == symbol2.upper()].tolist()[0] priceRate = close2 / close1 - 1 mc = _monthChange(symbol2, symbol1) ry = priceRate / mc return ry
def get_dce_reciept(date=None, vars=cons.vars): """ 抓取大连商品交易所注册仓单数据 Parameters ------ date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars: 合约品种如RB、AL等列表 为空时为所有商品 数据从20060106开始,每周五更新仓单数据。直到20090407起,每交易日都更新仓单数据 Return ------- DataFrame: 展期收益率数据(DataFrame): var 商品品种 string reciept 仓单数 int date 日期 string YYYYMMDD """ date = cons.convert_date( date) if date is not None else datetime.date.today() if date.strftime('%Y%m%d') not in calendar: print('%s非交易日' % date.strftime('%Y%m%d')) return None url = cons.DCE_RECIEPT_URL % (date.year, date.month - 1, date.day) data = pandas_readHtml_link(url, encoding='utf-8')[0] records = pd.DataFrame() for x in data.to_dict(orient='records'): if type(x[0]) == type('a'): if x[0][-2:] == '小计': var = x[0][:-2] D = { 'var': chinese_to_english(var), 'reciept': int(x[3]), 'date': date.strftime('%Y%m%d') } records = records.append(pd.DataFrame(D, index=[0])) if len(records.index) != 0: records.index = records['var'] vars_inMarket = [i for i in vars if i in records.index] records = records.loc[vars_inMarket, :] return records.reset_index(drop=True)
def get_czce_rank_table(date=None, vars=cons.vars): """ 抓取郑州商品期货交易所前20会员持仓排名数据明细 注:该交易所即公布了品种排名,也公布了标的排名 Parameters ------ date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars: 合约品种如RB、AL等列表 为空时为所有商品 数据从20050509开始,每交易日16:30左右更新数据 Return ------- DataFrame: rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_openIntr 该会员持多单 int long_openIntr_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_openIntr 该会员持空单 int short_openIntr_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ date = cons.convert_date( date) if date is not None else datetime.date.today() if date.strftime('%Y%m%d') not in calendar: print('%s非交易日' % date.strftime('%Y%m%d')) return {} if date <= datetime.date(2010, 8, 25): url = cons.CZCE_VOLRANK_URL_1 % (date.strftime('%Y%m%d')) data = _czce_df_read(url, skiprow=0) r = requests_link(url, 'utf-8') r.encoding = 'utf-8' soup = BeautifulSoup(r.text, 'lxml', from_encoding="gb2312") symbols = [] for link in soup.find_all('b'): strings = (str(link).split(' ')) if len(strings) > 5: try: symbol = chinese_to_english(strings[4]) except: symbol = strings[4] symbols.append(symbol) D = {} for i in range(len(symbols)): symbol = symbols[i] tableCut = data[i + 1] tableCut.columns = rank_columns tableCut = tableCut.iloc[:-1, :] tableCut.loc[:, 'rank'] = tableCut.index tableCut.loc['合计', 'rank'] = 999 tableCut.loc[ '合计', ['vol_party_name', 'long_party_name', 'short_party_name' ]] = None tableCut.loc[:, 'symbol'] = symbol tableCut.loc[:, 'var'] = symbol2varietie(symbol) D[symbol] = tableCut.reset_index(drop=True) return D elif date <= datetime.date(2015, 11, 11): url = cons.CZCE_VOLRANK_URL_2 % (date.year, date.strftime('%Y%m%d')) data = _czce_df_read(url, skiprow=1)[1] elif date < datetime.date(2017, 12, 28): url = cons.CZCE_VOLRANK_URL_3 % (date.year, date.strftime('%Y%m%d')) data = _czce_df_read(url, skiprow=1)[0] else: url = cons.CZCE_VOLRANK_URL_3 % (date.year, date.strftime('%Y%m%d')) data = _czce_df_read(url, skiprow=0)[0] if len(data.columns) < 6: return {} table = data.iloc[:, :9] table.columns = rank_columns table.loc[:, 'rank'] = table.index table[intColumns] = table[intColumns].astype(str) table[intColumns] = table[intColumns].applymap( lambda x: x.replace(',', '')) table = table.applymap(lambda x: 0 if x == '-' else x) indexs = [i for i in table.index if '合约' in i or '品种' in i] indexs.insert(0, 0) D = {} for i in range(len(indexs)): if indexs[i] == 0: tableCut = table.loc[:indexs[i + 1], :] string = tableCut.index.name elif i < len(indexs) - 1: tableCut = table.loc[indexs[i]:indexs[i + 1], :] string = tableCut.index[0] else: tableCut = table.loc[indexs[i]:, :] string = tableCut.index[0] if 'PTA' in string: symbol = 'TA' else: try: symbol = chinese_to_english( find_chinese(re.compile(':(.*) ').findall(string)[0])) except: symbol = re.compile(':(.*) ').findall(string)[0] var = symbol2varietie(symbol) if var in vars: tableCut = tableCut.dropna(how='any').iloc[1:, :] tableCut = tableCut.loc[[ x for x in tableCut.index if x in [str(i) for i in range(21)] ], :] tableCut = _tableCut_cal(tableCut, symbol) D[symbol] = tableCut.reset_index(drop=True) return D
def get_dce_rank_table(date=None, vars=cons.vars): """ 抓取大连商品期货交易所前20会员持仓排名数据明细 注:该交易所即公布了品种排名,也公布了标的排名 Parameters ------ date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars: 合约品种如RB、AL等列表 为空时为所有商品 数据从20060104开始,每交易日16:30左右更新数据 Return ------- DataFrame: rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_openIntr 该会员持多单 int long_openIntr_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_openIntr 该会员持空单 int short_openIntr_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ date = cons.convert_date( date) if date is not None else datetime.date.today() if date.strftime('%Y%m%d') not in calendar: print('%s非交易日' % date.strftime('%Y%m%d')) return {} vars = [i for i in vars if i in cons.market_var['dce']] D = {} for var in vars: url = cons.DCE_VOLRANK_URL % (var.lower(), var.lower(), date.year, date.month - 1, date.day) list_60_name = [] list_60 = [] list_60_chg = [] rank = [] texts = urllib.request.urlopen(url).readlines() if len(texts) > 30: for text in texts: line = text.decode('utf8') stringlist = line.split() try: if int(stringlist[0]) <= 20: list_60_name.append(stringlist[1]) list_60.append(stringlist[2]) list_60_chg.append(stringlist[3]) rank.append(stringlist[0]) except: pass tableCut = pd.DataFrame({ 'rank': rank[0:20], 'vol_party_name': list_60_name[0:20], 'vol': list_60[0:20], 'vol_chg': list_60_chg[0:20], 'long_party_name': list_60_name[20:40], 'long_openIntr': list_60[20:40], 'long_openIntr_chg': list_60_chg[20:40], 'short_party_name': list_60_name[40:60], 'short_openIntr': list_60[40:60], 'short_openIntr_chg': list_60_chg[40:60] }) tableCut = tableCut.applymap(lambda x: x.replace(',', '')) tableCut = _tableCut_cal(tableCut, var) D[var] = tableCut return D
def get_czce_daily(date=None): """ 获取郑商所日交易数据 Parameters ------ date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 type: 数据类型, 为'future'期货 或 'option'期权二者之一 Return ------- DataFrame 郑商所每日期货交易数据: symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 DataFrame 郑商所每日期权交易数据 symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 pre_settle 前结算价 settle 结算价 delta 对冲值 volume 成交量 open_interest 持仓量 oi_change 持仓变化 turnover 成交额 implied_volatility 隐含波动率 exercise_volume 行权量 variety 合约类别 None(类型错误或给定日期没有交易数据) """ day = cons.convert_date(date) if date is not None else datetime.date.today() if day.strftime('%Y%m%d') not in calendar: print('%s非交易日' %date.strftime('%Y%m%d')) return None if day > datetime.date(2010, 8, 24): if day > datetime.date(2015,9,19): u = cons.CZCE_DAILY_URL_3 url = u % (day.strftime('%Y'), day.strftime('%Y%m%d')) elif day < datetime.date(2015,9,19): u = cons.CZCE_DAILY_URL_2 url = u % (day.strftime('%Y'), day.strftime('%Y%m%d')) listed_columns = cons.CZCE_COLUMNS output_columns = cons.OUTPUT_COLUMNS try: r = requests.get(url) html = r.text except HTTPError as reason: if reason.code != 404: print(cons.CZCE_DAILY_URL % (day.strftime('%Y'), day.strftime('%Y%m%d')), reason) return if html.find(u'您的访问出错了') >= 0 or html.find(u'无期权每日行情交易记录') >= 0: return html = [i.replace(' ', '').split('|') for i in html.split('\n')[:-4] if i[0][0] != u'小'] if day > datetime.date(2015, 9, 19): if html[1][0] not in [u'品种月份', u'品种代码']: return dict_data = list() day_const = int(day.strftime('%Y%m%d')) for row in html[2:]: m = cons.FUTURE_SYMBOL_PATTERN.match(row[0]) if not m: continue row_dict = {'date': day_const, 'symbol': row[0], 'variety': m.group(1)} for i, field in enumerate(listed_columns): if row[i + 1] == "\r": row_dict[field] = 0.0 elif field in ['volume', 'open_interest', 'oi_chg', 'exercise_volume']: row[i + 1] = row[i + 1].replace(',', '') row_dict[field] = int(row[i + 1]) else: row[i + 1] = row[i + 1].replace(',', '') row_dict[field] = float(row[i + 1]) dict_data.append(row_dict) return pd.DataFrame(dict_data)[output_columns] elif day < datetime.date(2015, 9, 19): dict_data = list() day_const = int(day.strftime('%Y%m%d')) for row in html[1:]: row = row[0].split(',') m = cons.FUTURE_SYMBOL_PATTERN.match(row[0]) if not m: continue row_dict = {'date': day_const, 'symbol': row[0], 'variety': m.group(1)} for i, field in enumerate(listed_columns): if row[i + 1] == "\r": row_dict[field] = 0.0 elif field in ['volume', 'open_interest', 'oi_chg', 'exercise_volume']: row_dict[field] = int(float(row[i + 1])) else: row_dict[field] = float(row[i + 1]) dict_data.append(row_dict) return pd.DataFrame(dict_data)[output_columns] if day <= datetime.date(2010,8,24): u = cons.CZCE_DAILY_URL_1 url = u % day.strftime('%Y%m%d') listed_columns = cons.CZCE_COLUMNS_2 output_columns = cons.OUTPUT_COLUMNS df = pd.read_html(url)[1].dropna(how='any') dict_data = list() day_const = int(day.strftime('%Y%m%d')) for row in df.to_dict(orient = 'records')[1:]: m = cons.FUTURE_SYMBOL_PATTERN.match(row[0]) if not m: continue row_dict = {'date': day_const, 'symbol': row[0], 'variety': m.group(1)} for i, field in enumerate(listed_columns): if row[i + 1] == "\r": row_dict[field] = 0.0 elif field in ['volume', 'open_interest', 'oi_chg', 'exercise_volume']: row_dict[field] = int(row[i + 1]) else: row_dict[field] = float(row[i + 1]) dict_data.append(row_dict) return pd.DataFrame(dict_data)[output_columns]
def get_dce_daily(date=None, type="future", retries=0): """ 获取大连商品交易所日交易数据 Parameters ------ date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 type: 数据类型, 为'future'期货 或 'option'期权二者之一 retries: int, 当前重试次数,达到3次则获取数据失败 Return ------- DataFrame 大商所日交易数据(DataFrame): symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 DataFrame 郑商所每日期权交易数据 symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 pre_settle 前结算价 settle 结算价 delta 对冲值 volume 成交量 open_interest 持仓量 oi_change 持仓变化 turnover 成交额 implied_volatility 隐含波动率 exercise_volume 行权量 variety 合约类别 或 None(给定日期没有交易数据) """ day = cons.convert_date(date) if date is not None else datetime.date.today() if day.strftime('%Y%m%d') not in calendar: print('%s非交易日' %date.strftime('%Y%m%d')) return None if retries > 3: print("maximum retires for DCE market data: ", day.strftime("%Y%m%d")) return if type == 'future': url = cons.DCE_DAILY_URL + '?' + urlencode({"currDate": day.strftime('%Y%m%d'), "year": day.strftime('%Y'), "month": str(int(day.strftime('%m')) - 1), "day": day.strftime('%d')}) listed_columns = cons.DCE_COLUMNS output_columns = cons.OUTPUT_COLUMNS elif type == 'option': url = cons.DCE_DAILY_URL + '?' + urlencode({"currDate": day.strftime('%Y%m%d'), "year": day.strftime('%Y'), "month": str(int(day.strftime('%m')) - 1), "day": day.strftime('%d'), "dayQuotes.trade_type": "1"}) listed_columns = cons.DCE_OPTION_COLUMNS output_columns = cons.OPTION_OUTPUT_COLUMNS else: print('invalid type :' + type + ', should be one of "future" or "option"') return try: response = urlopen(Request(url, method='POST', headers=cons.headers)).read().decode('utf8') except IncompleteRead as reason: return get_dce_daily(day, retries=retries + 1) except HTTPError as reason: if reason.code == 504: return get_dce_daily(day, retries=retries + 1) elif reason.code != 404: print(cons.DCE_DAILY_URL, reason) return if u'错误:您所请求的网址(URL)无法获取' in response: return get_dce_daily(day, retries=retries + 1) elif u'暂无数据' in response: return data = BeautifulSoup(response, 'html.parser').find_all('tr') if len(data) == 0: return dict_data = list() implied_data = list() for idata in data[1:]: if u'小计' in idata.text or u'总计' in idata.text: continue x = idata.find_all('td') if type == 'future': row_dict = {'variety': cons.DCE_MAP[x[0].text.strip()]} row_dict['symbol'] = row_dict['variety'] + x[1].text.strip() for i, field in enumerate(listed_columns): field_content = x[i + 2].text.strip() if '-' in field_content: row_dict[field] = 0 elif field in ['volume', 'open_interest']: row_dict[field] = int(field_content.replace(',', '')) else: row_dict[field] = float(field_content.replace(',', '')) dict_data.append(row_dict) elif len(x) == 16: m = cons.FUTURE_SYMBOL_PATTERN.match(x[1].text.strip()) if not m: continue row_dict = {'symbol': x[1].text.strip(), 'variety': m.group(1).upper(), 'contract_id': m.group(0)} for i, field in enumerate(listed_columns): field_content = x[i + 2].text.strip() if '-' in field_content: row_dict[field] = 0 elif field in ['volume', 'open_interest']: row_dict[field] = int(field_content.replace(',', '')) else: row_dict[field] = float(field_content.replace(',', '')) dict_data.append(row_dict) elif len(x) == 2: implied_data.append({'contract_id': x[0].text.strip(), 'implied_volatility': float(x[1].text.strip())}) df = pd.DataFrame(dict_data) df['date'] = day.strftime('%Y%m%d') if type == 'future': return df[output_columns] else: return pd.merge(df, pd.DataFrame(implied_data), on='contract_id', how='left', indicator=False)[output_columns]
def get_cffex_daily(date=None): """ 获取中金所日交易数据 Parameters ------ date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 Return ------- DataFrame 中金所日交易数据(DataFrame): symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 None(给定日期没有交易数据) """ day = cons.convert_date(date) if date is not None else datetime.date.today() if day.strftime('%Y%m%d') not in calendar: print('%s非交易日' %date.strftime('%Y%m%d')) return None try: html = urlopen(Request(cons.CFFEX_DAILY_URL % (day.strftime('%Y%m'), day.strftime('%d'), day.strftime('%Y%m%d')), headers=cons.headers)).read().decode('gbk', 'ignore') except HTTPError as reason: if reason.code != 404: print(cons.CFFEX_DAILY_URL % (day.strftime('%Y%m'), day.strftime('%d'), day.strftime('%Y%m%d')), reason) return if html.find(u'网页错误') >= 0: return html = [i.replace(' ', '').split(',') for i in html.split('\n')[:-2] if i[0][0] != u'小'] if html[0][0] != u'合约代码': return dict_data = list() day_const = day.strftime('%Y%m%d') for row in html[1:]: m = cons.FUTURE_SYMBOL_PATTERN.match(row[0]) if not m: continue row_dict = {'date': day_const, 'symbol': row[0], 'variety': m.group(1)} for i, field in enumerate(cons.CFFEX_COLUMNS): if row[i + 1] == u"": row_dict[field] = 0.0 elif field in ['volume', 'open_interest', 'oi_chg']: row_dict[field] = int(row[i + 1]) else: row_dict[field] = float(row[i + 1]) row_dict['pre_settle'] = row_dict['close'] - row_dict['change1'] dict_data.append(row_dict) return pd.DataFrame(dict_data)[cons.OUTPUT_COLUMNS]
def get_shfe_rank_table(date=None, vars=cons.vars): """ 抓取上海商品期货交易所前20会员持仓排名数据明细 注:该交易所只公布每个品种内部的标的排名,没有公布品种的总排名 Parameters ------ date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars: 合约品种如RB、AL等列表 为空时为所有商品 数据从20020107开始,每交易日16:30左右更新数据 Return ------- DataFrame: rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_openIntr 该会员持多单 int long_openIntr_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_openIntr 该会员持空单 int short_openIntr_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ date = cons.convert_date( date) if date is not None else datetime.date.today() if date.strftime('%Y%m%d') not in calendar: print('%s非交易日' % date.strftime('%Y%m%d')) return {} url = cons.SHFE_VOLRANK_URL % (date.strftime('%Y%m%d')) r = requests_link(url, 'utf-8') try: context = json.loads(r.text) except: return {} df = pd.DataFrame(context['o_cursor']) df = df.rename( columns={ 'CJ1': 'vol', 'CJ1_CHG': 'vol_chg', 'CJ2': 'long_openIntr', 'CJ2_CHG': 'long_openIntr_chg', 'CJ3': 'short_openIntr', 'CJ3_CHG': 'short_openIntr_chg', 'PARTICIPANTABBR1': 'vol_party_name', 'PARTICIPANTABBR2': 'long_party_name', 'PARTICIPANTABBR3': 'short_party_name', 'PRODUCTNAME': 'product1', 'RANK': 'rank', 'INSTRUMENTID': 'symbol', 'PRODUCTSORTNO': 'product2' }) if len(df.columns) < 3: return {} df = df.applymap(lambda x: x.strip() if type(x) == type('') else x) df = df.applymap(lambda x: None if x == '' else x) df['var'] = df['symbol'].apply(lambda x: symbol2varietie(x)) df = df[df['rank'] > 0] for col in [ 'PARTICIPANTID1', 'PARTICIPANTID2', 'PARTICIPANTID3', 'product1', 'product2' ]: try: del df[col] except: pass get_vars = [var for var in vars if var in df['var'].tolist()] D = {} for var in get_vars: df_var = df[df['var'] == var] for symbol in set(df_var['symbol']): df_symbol = df_var[df_var['symbol'] == symbol] D[symbol] = df_symbol.reset_index(drop=True) return D
def get_rank_sum(date=None, vars=cons.vars): """ 抓取四个期货交易所前5、前10、前15、前20会员持仓排名数据 注1:由于上期所和中金所只公布每个品种内部的标的排名,没有公布品种的总排名; 所以函数输出的品种排名是由品种中的每个标的加总获得,并不是真实的品种排名列表 注2:大商所只公布了品种排名,未公布标的排名 Parameters ------ date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars: 合约品种如RB、AL等列表 为空时为所有商品 Return ------- DataFrame: 展期收益率数据(DataFrame): symbol 标的合约 string var 商品品种 string vol_top5 成交量前5会员成交量总和 int vol_chg_top5 成交量前5会员成交量变化总和 int long_openIntr_top5 持多单前5会员持多单总和 int long_openIntr_chg_top5 持多单前5会员持多单变化总和 int short_openIntr_top5 持空单前5会员持空单总和 int short_openIntr_chg_top5 持空单前5会员持空单变化总和 int vol_top10 成交量前10会员成交量总和 int ... date 日期 string YYYYMMDD """ date = cons.convert_date( date) if date is not None else datetime.date.today() if date.strftime('%Y%m%d') not in calendar: return None dce_var = [i for i in vars if i in cons.market_var['dce']] shfe_var = [i for i in vars if i in cons.market_var['shfe']] czce_var = [i for i in vars if i in cons.market_var['czce']] cffex_var = [i for i in vars if i in cons.market_var['cffex']] D = {} if len(dce_var) > 0: D.update(get_dce_rank_table(date, dce_var)) if len(shfe_var) > 0: D.update(get_shfe_rank_table(date, shfe_var)) if len(czce_var) > 0: D.update(get_czce_rank_table(date, czce_var)) if len(cffex_var) > 0: D.update(get_cffex_rank_table(date, cffex_var)) records = pd.DataFrame() for symbol, table in D.items(): table = table.applymap(lambda x: 0 if x == '' else x) for symbol in set(table['symbol']): var = symbol2varietie(symbol) tableCut = table[table['symbol'] == symbol] tableCut_top5 = tableCut[tableCut['rank'] <= 5] tableCut_top10 = tableCut[tableCut['rank'] <= 10] tableCut_top15 = tableCut[tableCut['rank'] <= 15] tableCut_top20 = tableCut[tableCut['rank'] <= 20] D = { 'symbol': symbol, 'var': var, 'vol_top5': tableCut_top5['vol'].sum(), 'vol_chg_top5': tableCut_top5['vol_chg'].sum(), 'long_openIntr_top5': tableCut_top5['long_openIntr'].sum(), 'long_openIntr_chg_top5': tableCut_top5['long_openIntr_chg'].sum(), 'short_openIntr_top5': tableCut_top5['short_openIntr'].sum(), 'short_openIntr_chg_top5': tableCut_top5['short_openIntr_chg'].sum(), 'vol_top10': tableCut_top10['vol'].sum(), 'vol_chg_top10': tableCut_top10['vol_chg'].sum(), 'long_openIntr_top10': tableCut_top10['long_openIntr'].sum(), 'long_openIntr_chg_top10': tableCut_top10['long_openIntr_chg'].sum(), 'short_openIntr_top10': tableCut_top10['short_openIntr'].sum(), 'short_openIntr_chg_top10': tableCut_top10['short_openIntr_chg'].sum(), 'vol_top15': tableCut_top15['vol'].sum(), 'vol_chg_top15': tableCut_top15['vol_chg'].sum(), 'long_openIntr_top15': tableCut_top15['long_openIntr'].sum(), 'long_openIntr_chg_top15': tableCut_top15['long_openIntr_chg'].sum(), 'short_openIntr_top15': tableCut_top15['short_openIntr'].sum(), 'short_openIntr_chg_top15': tableCut_top15['short_openIntr_chg'].sum(), 'vol_top20': tableCut_top20['vol'].sum(), 'vol_chg_top20': tableCut_top20['vol_chg'].sum(), 'long_openIntr_top20': tableCut_top20['long_openIntr'].sum(), 'long_openIntr_chg_top20': tableCut_top20['long_openIntr_chg'].sum(), 'short_openIntr_top20': tableCut_top20['short_openIntr'].sum(), 'short_openIntr_chg_top20': tableCut_top20['short_openIntr_chg'].sum(), 'date': date.strftime('%Y%m%d') } records = records.append(pd.DataFrame(D, index=[0])) if len(D.items()) > 0: add_vars = [ i for i in cons.market_var['shfe'] + cons.market_var['cffex'] if i in records['var'].tolist() ] for var in add_vars: recordsCut = records[records['var'] == var] var_record = pd.DataFrame(recordsCut.sum()).T var_record['date'] = date.strftime('%Y%m%d') var_record.loc[:, ['var', 'symbol']] = var records = records.append(var_record) return records.reset_index(drop=True)