def spider_closed(self, spider, reason):
        if self.trading_dates:
            if self.saved_trading_dates:
                self.trading_dates.append(self.saved_trading_dates)
            result_list = drop_duplicate(self.trading_dates)
            result_list = sorted(result_list)

            the_path = get_exchange_trading_calendar_path('future', 'shfe')
            with open(the_path, 'w') as outfile:
                json.dump(result_list, outfile)

        spider.logger.info('Spider closed: %s,%s\n', spider.name, reason)
Esempio n. 2
0
def parse_shfe_day_data(force_parse=False):
    cache_dir = get_exchange_cache_dir(security_type='future',
                                       exchange='shfe',
                                       the_year=datetime.datetime.today().year,
                                       data_type="day_kdata")
    the_parsed_path = os.path.join(cache_dir, 'parsed')
    the_parsed = []
    if os.path.exists(the_parsed_path):
        with open(the_parsed_path) as data_file:
            the_parsed = json.load(data_file)

    if force_parse:
        the_dates = [f for f in os.listdir(cache_dir) if f != 'parsed' and f]
    else:
        the_dates = [
            f for f in os.listdir(cache_dir)
            if f != 'parsed' and f not in the_parsed
        ]

    for the_date in the_dates:
        the_path = os.path.join(cache_dir, the_date)
        logger.info("start handling {}".format(the_path))

        with open(the_path, 'r', encoding='UTF8') as f:
            tmp_str = f.read()
            the_json = json.loads(tmp_str)
            the_datas = the_json['o_curinstrument']
            # 日期,代码,名称,最低,开盘,收盘,最高,成交量(手),成交额(元),唯一标识,前收盘,涨跌额,涨跌幅(%),持仓量,结算价,前结算,涨跌额(按结算价),涨跌幅(按结算价)
            KDATA_COLUMN_FUTURE = [
                'timestamp', 'code', 'name', 'low', 'open', 'close', 'high',
                'volume', 'turnover', 'securityId', 'preClose', 'change',
                'changePct', 'openInterest', 'settlement', 'preSettlement',
                'change1', 'changePct1'
            ]
            for the_data in the_datas:
                # {'CLOSEPRICE': 11480,
                #  'DELIVERYMONTH': '1809',
                #  'HIGHESTPRICE': 11555,
                #  'LOWESTPRICE': 11320,
                #  'OPENINTEREST': 425692,
                #  'OPENINTERESTCHG': 3918,
                #  'OPENPRICE': 11495,
                #  'ORDERNO': 0,
                #  'PRESETTLEMENTPRICE': 11545,
                #  'PRODUCTID': 'ru_f    ',
                #  'PRODUCTNAME': '天然橡胶            ',
                #  'PRODUCTSORTNO': 100,
                #  'SETTLEMENTPRICE': 11465,
                #  'VOLUME': 456574,
                #  'ZD1_CHG': -65,
                #  'ZD2_CHG': -80}

                if not re.match("\d{4}", the_data['DELIVERYMONTH']):
                    continue

                code = "{}{}".format(
                    the_data['PRODUCTID'][:the_data['PRODUCTID'].index('_')],
                    the_data['DELIVERYMONTH'])
                logger.info("start handling {} for {}".format(code, the_date))

                name = get_future_name(code)
                security_id = "future_shfe_{}".format(code)

                security_list = get_security_list(security_type='future',
                                                  exchanges=['shfe'])

                logger.info("start handling {} for {}".format(code, the_date))
                security_item = {
                    'code': code,
                    'name': name,
                    'id': security_id,
                    'exchange': 'shfe',
                    'type': 'future'
                }
                # 检查是否需要保存合约meta
                if security_list is not None and 'code' in security_list.columns:
                    security_list = security_list.set_index(
                        security_list['code'], drop=False)
                if code not in security_list.index:
                    security_list = security_list.append(security_item,
                                                         ignore_index=True)
                    security_list.to_csv(get_security_list_path(
                        'future', 'shfe'),
                                         index=False)

                kdata_path = get_kdata_path(item=security_item,
                                            source='exchange')
                # TODO:这些逻辑应该统一处理
                kdata_dir = get_kdata_dir(item=security_item)
                if not os.path.exists(kdata_dir):
                    os.makedirs(kdata_dir)

                if os.path.exists(kdata_path):
                    saved_df = pd.read_csv(kdata_path, dtype=str)
                    saved_df = saved_df.set_index(saved_df['timestamp'],
                                                  drop=False)
                else:
                    saved_df = pd.DataFrame()

                if saved_df.empty or the_date not in saved_df.index:
                    low_price = the_data['LOWESTPRICE']
                    if not low_price:
                        low_price = 0
                    open_price = the_data['OPENPRICE']
                    if not open_price:
                        open_price = 0
                    close_price = the_data['CLOSEPRICE']
                    if not close_price:
                        close_price = 0
                    high_price = the_data['HIGHESTPRICE']
                    if not high_price:
                        high_price = 0
                    volume = the_data['VOLUME']
                    if not volume:
                        volume = 0

                    if type(the_data['ZD1_CHG']) == str:
                        change = 0
                    else:
                        change = the_data['ZD1_CHG']

                    if type(the_data['ZD2_CHG']) == str:
                        change1 = 0
                    else:
                        change1 = the_data['ZD2_CHG']

                    pre_close = close_price - change
                    pre_settlement = the_data['PRESETTLEMENTPRICE']

                    # 首日交易
                    if pre_close != 0:
                        change_pct = change / pre_close
                    else:
                        change_pct = 0
                    if pre_settlement != 0:
                        change_pct1 = change1 / pre_settlement
                    else:
                        change_pct1 = 0

                    the_json = {
                        "timestamp":
                        to_time_str(the_date),
                        "code":
                        code,
                        "name":
                        name,
                        "low":
                        low_price,
                        "open":
                        open_price,
                        "close":
                        close_price,
                        "high":
                        high_price,
                        "volume":
                        volume,
                        # 成交额为估算
                        "turnover":
                        (low_price + open_price + close_price + high_price / 4)
                        * volume,
                        "securityId":
                        security_id,
                        "preClose":
                        pre_close,
                        "change":
                        change,
                        "changePct":
                        change_pct,
                        "openInterest":
                        the_data['OPENINTEREST'],
                        "settlement":
                        the_data['SETTLEMENTPRICE'],
                        "preSettlement":
                        the_data['PRESETTLEMENTPRICE'],
                        "change1":
                        change1,
                        "changePct1":
                        change_pct1
                    }
                    saved_df = saved_df.append(the_json, ignore_index=True)
                    saved_df = saved_df.loc[:, KDATA_COLUMN_FUTURE]
                    saved_df = saved_df.drop_duplicates(subset='timestamp',
                                                        keep='last')
                    saved_df = saved_df.set_index(saved_df['timestamp'],
                                                  drop=False)
                    saved_df.index = pd.to_datetime(saved_df.index)
                    saved_df = saved_df.sort_index()
                    saved_df.to_csv(kdata_path, index=False)

                    logger.info("end handling {} for {}".format(
                        code, the_date))

                    if the_date not in the_parsed:
                        the_parsed.append(the_date)
        if the_parsed:
            result_list = drop_duplicate(the_parsed)
            result_list = sorted(result_list)

            with open(the_parsed_path, 'w') as outfile:
                json.dump(result_list, outfile)
        logger.info("end handling {}".format(the_path))