def fetch_kdata(exchange_str='bitstamp'): ccxt_exchange = eval("ccxt.{}()".format(exchange_str)) if ccxt_exchange.has['fetchOHLCV']: for _, security_item in get_security_list(security_type='cryptocurrency', exchanges=[exchange_str]).iterrows(): try: if security_item['name'] not in CRYPTOCURRENCY_PAIR: continue start_date, df = get_latest_download_trading_date(security_item) # 日K线只抓到昨天 end_date = pd.Timestamp.today() - pd.DateOffset(1) if start_date and (start_date > end_date): logger.info("{} kdata is ok".format(security_item['code'])) continue try: kdatas = ccxt_exchange.fetch_ohlcv(security_item['name'], timeframe='1d') # for rateLimit time.sleep(5) except Exception as e: logger.exception("fetch_kdata for {} {} failed".format(exchange_str, security_item['name']), e) continue for kdata in kdatas: timestamp = pd.Timestamp.fromtimestamp(int(kdata[0] / 1000)) if is_same_date(timestamp, pd.Timestamp.today()): continue kdata_json = { 'timestamp': to_time_str(timestamp), 'code': security_item['code'], 'name': security_item['name'], 'open': kdata[1], 'high': kdata[2], 'low': kdata[3], 'close': kdata[4], 'volume': kdata[5], 'securityId': security_item['id'], 'preClose': None, 'change': None, 'changePct': None } df = df.append(kdata_json, ignore_index=True) if not df.empty: df = df.loc[:, KDATA_COMMON_COL] kdata_df_save(df, get_kdata_path(security_item), calculate_change=True) logger.info( "fetch_kdata for exchange:{} security:{} success".format(exchange_str, security_item['name'])) except Exception as e: logger.info( "fetch_kdata for exchange:{} security:{} failed".format(exchange_str, security_item['name'], e)) else: logger.warning("exchange:{} not support fetchOHLCV".format(exchange_str))
def parse_shfe_data(force_parse=False): the_dir = get_exchange_cache_dir(security_type='future', exchange='shfe') need_parse_files = [] for the_zip_file in [ os.path.join(the_dir, f) for f in os.listdir(the_dir) if f.endswith('.zip') ]: dst_file = the_zip_file.replace('.zip', ".xls") if not os.path.exists(dst_file): dst_dir = the_zip_file.replace('.zip', "") os.makedirs(dst_dir) unzip(the_zip_file, dst_dir) files = [ os.path.join(dst_dir, f) for f in os.listdir(dst_dir) if f.endswith('.xls') ] if len(files) == 1: os.rename(files[0], dst_file) need_parse_files.append(dst_file) if force_parse: need_parse_files = [ os.path.join(the_dir, f) for f in os.listdir(the_dir) if f.endswith('.xls') ] for the_file in need_parse_files: logger.info("parse {}".format(the_file)) df = pd.read_excel(the_file, skiprows=2, skip_footer=4, index_col='合约', converters={'日期': str}) df.index = pd.Series(df.index).fillna(method='ffill') df = df.loc[:, [ '日期', '前收盘', '前结算', '开盘价', '最高价', '最低价', '收盘价', '结算价', '涨跌1', '涨跌2', '成交量', '成交金额', '持仓量' ]] df.columns = [ 'timestamp', 'preClose', 'preSettlement', 'open', 'high', 'low', 'close', 'settlement', 'change', 'change1', 'volume', 'turnover', 'openInterest' ] # 日期格式统一,方便导入es # df.timestamp = df.timestamp.apply(lambda x: to_time_str(x)) unique_index = df.index.drop_duplicates() security_list = get_security_list(security_type='future', exchanges=['shfe']) for the_contract in unique_index: logger.info("start handling {} in {}".format( the_contract, the_file)) security_item = { 'code': the_contract, 'name': get_future_name(the_contract), 'id': 'future_{}_{}'.format('shfe', the_contract), 'exchange': 'shfe', 'type': 'future' } # 检查是否需要保存合约meta if (not security_list.empty) and ('code' in security_list.columns): security_list = security_list.set_index(security_list['code'], drop=False) if the_contract not in security_list.index: security_list = security_list.append(security_item, ignore_index=True) security_list = security_list.sort_index() security_list.to_csv(get_security_list_path('future', 'shfe'), index=False) the_df = df.loc[the_contract, ] the_df['code'] = the_contract the_df['name'] = get_future_name(the_contract) the_df['securityId'] = 'future_{}_{}'.format('shfe', the_contract) the_df['changePct'] = the_df['change'] / the_df['preClose'] the_df['changePct1'] = the_df['change1'] / the_df['preSettlement'] kdata_path = get_kdata_path(item=security_item, source='exchange') # TODO:这些逻辑应该统一处理 kdata_dir = get_kdata_dir(item=security_item) if not os.path.exists(kdata_dir): os.makedirs(kdata_dir) if os.path.exists(kdata_path): saved_df = pd.read_csv(kdata_path, dtype=str) else: saved_df = pd.DataFrame() saved_df = saved_df.append(the_df, ignore_index=True) saved_df = saved_df.loc[:, KDATA_FUTURE_COL] if not saved_df.empty: kdata_df_save(saved_df, kdata_path) logger.info("end handling {} in {}".format(the_contract, the_file))