def spider_closed(self, spider, reason): if self.trading_dates: if self.saved_trading_dates: self.trading_dates.append(self.saved_trading_dates) result_list = drop_duplicate(self.trading_dates) result_list = sorted(result_list) the_path = get_exchange_trading_calendar_path('future', 'shfe') with open(the_path, 'w') as outfile: json.dump(result_list, outfile) spider.logger.info('Spider closed: %s,%s\n', spider.name, reason)
def parse_shfe_day_data(force_parse=False): cache_dir = get_exchange_cache_dir(security_type='future', exchange='shfe', the_year=datetime.datetime.today().year, data_type="day_kdata") the_parsed_path = os.path.join(cache_dir, 'parsed') the_parsed = [] if os.path.exists(the_parsed_path): with open(the_parsed_path) as data_file: the_parsed = json.load(data_file) if force_parse: the_dates = [f for f in os.listdir(cache_dir) if f != 'parsed' and f] else: the_dates = [ f for f in os.listdir(cache_dir) if f != 'parsed' and f not in the_parsed ] for the_date in the_dates: the_path = os.path.join(cache_dir, the_date) logger.info("start handling {}".format(the_path)) with open(the_path, 'r', encoding='UTF8') as f: tmp_str = f.read() the_json = json.loads(tmp_str) the_datas = the_json['o_curinstrument'] # 日期,代码,名称,最低,开盘,收盘,最高,成交量(手),成交额(元),唯一标识,前收盘,涨跌额,涨跌幅(%),持仓量,结算价,前结算,涨跌额(按结算价),涨跌幅(按结算价) KDATA_COLUMN_FUTURE = [ 'timestamp', 'code', 'name', 'low', 'open', 'close', 'high', 'volume', 'turnover', 'securityId', 'preClose', 'change', 'changePct', 'openInterest', 'settlement', 'preSettlement', 'change1', 'changePct1' ] for the_data in the_datas: # {'CLOSEPRICE': 11480, # 'DELIVERYMONTH': '1809', # 'HIGHESTPRICE': 11555, # 'LOWESTPRICE': 11320, # 'OPENINTEREST': 425692, # 'OPENINTERESTCHG': 3918, # 'OPENPRICE': 11495, # 'ORDERNO': 0, # 'PRESETTLEMENTPRICE': 11545, # 'PRODUCTID': 'ru_f ', # 'PRODUCTNAME': '天然橡胶 ', # 'PRODUCTSORTNO': 100, # 'SETTLEMENTPRICE': 11465, # 'VOLUME': 456574, # 'ZD1_CHG': -65, # 'ZD2_CHG': -80} if not re.match("\d{4}", the_data['DELIVERYMONTH']): continue code = "{}{}".format( the_data['PRODUCTID'][:the_data['PRODUCTID'].index('_')], the_data['DELIVERYMONTH']) logger.info("start handling {} for {}".format(code, the_date)) name = get_future_name(code) security_id = "future_shfe_{}".format(code) security_list = get_security_list(security_type='future', exchanges=['shfe']) logger.info("start handling {} for {}".format(code, the_date)) security_item = { 'code': code, 'name': name, 'id': security_id, 'exchange': 'shfe', 'type': 'future' } # 检查是否需要保存合约meta if security_list is not None and 'code' in security_list.columns: security_list = security_list.set_index( security_list['code'], drop=False) if code not in security_list.index: security_list = security_list.append(security_item, ignore_index=True) security_list.to_csv(get_security_list_path( 'future', 'shfe'), index=False) kdata_path = get_kdata_path(item=security_item, source='exchange') # TODO:这些逻辑应该统一处理 kdata_dir = get_kdata_dir(item=security_item) if not os.path.exists(kdata_dir): os.makedirs(kdata_dir) if os.path.exists(kdata_path): saved_df = pd.read_csv(kdata_path, dtype=str) saved_df = saved_df.set_index(saved_df['timestamp'], drop=False) else: saved_df = pd.DataFrame() if saved_df.empty or the_date not in saved_df.index: low_price = the_data['LOWESTPRICE'] if not low_price: low_price = 0 open_price = the_data['OPENPRICE'] if not open_price: open_price = 0 close_price = the_data['CLOSEPRICE'] if not close_price: close_price = 0 high_price = the_data['HIGHESTPRICE'] if not high_price: high_price = 0 volume = the_data['VOLUME'] if not volume: volume = 0 if type(the_data['ZD1_CHG']) == str: change = 0 else: change = the_data['ZD1_CHG'] if type(the_data['ZD2_CHG']) == str: change1 = 0 else: change1 = the_data['ZD2_CHG'] pre_close = close_price - change pre_settlement = the_data['PRESETTLEMENTPRICE'] # 首日交易 if pre_close != 0: change_pct = change / pre_close else: change_pct = 0 if pre_settlement != 0: change_pct1 = change1 / pre_settlement else: change_pct1 = 0 the_json = { "timestamp": to_time_str(the_date), "code": code, "name": name, "low": low_price, "open": open_price, "close": close_price, "high": high_price, "volume": volume, # 成交额为估算 "turnover": (low_price + open_price + close_price + high_price / 4) * volume, "securityId": security_id, "preClose": pre_close, "change": change, "changePct": change_pct, "openInterest": the_data['OPENINTEREST'], "settlement": the_data['SETTLEMENTPRICE'], "preSettlement": the_data['PRESETTLEMENTPRICE'], "change1": change1, "changePct1": change_pct1 } saved_df = saved_df.append(the_json, ignore_index=True) saved_df = saved_df.loc[:, KDATA_COLUMN_FUTURE] saved_df = saved_df.drop_duplicates(subset='timestamp', keep='last') saved_df = saved_df.set_index(saved_df['timestamp'], drop=False) saved_df.index = pd.to_datetime(saved_df.index) saved_df = saved_df.sort_index() saved_df.to_csv(kdata_path, index=False) logger.info("end handling {} for {}".format( code, the_date)) if the_date not in the_parsed: the_parsed.append(the_date) if the_parsed: result_list = drop_duplicate(the_parsed) result_list = sorted(result_list) with open(the_parsed_path, 'w') as outfile: json.dump(result_list, outfile) logger.info("end handling {}".format(the_path))