コード例 #1
0
def fetch_kdata(exchange_str='bitstamp'):
    ccxt_exchange = eval("ccxt.{}()".format(exchange_str))
    if ccxt_exchange.has['fetchOHLCV']:
        for _, security_item in get_security_list(security_type='cryptocurrency', exchanges=[exchange_str]).iterrows():
            try:
                if security_item['name'] not in CRYPTOCURRENCY_PAIR:
                    continue

                start_date, df = get_latest_download_trading_date(security_item)
                # 日K线只抓到昨天
                end_date = pd.Timestamp.today() - pd.DateOffset(1)

                if start_date and (start_date > end_date):
                    logger.info("{} kdata is ok".format(security_item['code']))
                    continue

                try:
                    kdatas = ccxt_exchange.fetch_ohlcv(security_item['name'], timeframe='1d')
                    # for rateLimit
                    time.sleep(5)
                except Exception as e:
                    logger.exception("fetch_kdata for {} {} failed".format(exchange_str, security_item['name']), e)
                    continue

                for kdata in kdatas:
                    timestamp = pd.Timestamp.fromtimestamp(int(kdata[0] / 1000))
                    if is_same_date(timestamp, pd.Timestamp.today()):
                        continue
                    kdata_json = {
                        'timestamp': to_time_str(timestamp),
                        'code': security_item['code'],
                        'name': security_item['name'],
                        'open': kdata[1],
                        'high': kdata[2],
                        'low': kdata[3],
                        'close': kdata[4],
                        'volume': kdata[5],
                        'securityId': security_item['id'],
                        'preClose': None,
                        'change': None,
                        'changePct': None
                    }
                    df = df.append(kdata_json, ignore_index=True)
                if not df.empty:
                    df = df.loc[:, KDATA_COMMON_COL]
                    kdata_df_save(df, get_kdata_path(security_item), calculate_change=True)
                    logger.info(
                        "fetch_kdata for exchange:{} security:{} success".format(exchange_str, security_item['name']))
            except Exception as e:
                logger.info(
                    "fetch_kdata for exchange:{} security:{} failed".format(exchange_str, security_item['name'], e))
    else:
        logger.warning("exchange:{} not support fetchOHLCV".format(exchange_str))
コード例 #2
0
def parse_shfe_data(force_parse=False):
    the_dir = get_exchange_cache_dir(security_type='future', exchange='shfe')

    need_parse_files = []

    for the_zip_file in [
            os.path.join(the_dir, f) for f in os.listdir(the_dir)
            if f.endswith('.zip')
    ]:
        dst_file = the_zip_file.replace('.zip', ".xls")

        if not os.path.exists(dst_file):
            dst_dir = the_zip_file.replace('.zip', "")
            os.makedirs(dst_dir)

            unzip(the_zip_file, dst_dir)
            files = [
                os.path.join(dst_dir, f) for f in os.listdir(dst_dir)
                if f.endswith('.xls')
            ]
            if len(files) == 1:
                os.rename(files[0], dst_file)
            need_parse_files.append(dst_file)

    if force_parse:
        need_parse_files = [
            os.path.join(the_dir, f) for f in os.listdir(the_dir)
            if f.endswith('.xls')
        ]
    for the_file in need_parse_files:
        logger.info("parse {}".format(the_file))

        df = pd.read_excel(the_file,
                           skiprows=2,
                           skip_footer=4,
                           index_col='合约',
                           converters={'日期': str})
        df.index = pd.Series(df.index).fillna(method='ffill')
        df = df.loc[:, [
            '日期', '前收盘', '前结算', '开盘价', '最高价', '最低价', '收盘价', '结算价', '涨跌1',
            '涨跌2', '成交量', '成交金额', '持仓量'
        ]]
        df.columns = [
            'timestamp', 'preClose', 'preSettlement', 'open', 'high', 'low',
            'close', 'settlement', 'change', 'change1', 'volume', 'turnover',
            'openInterest'
        ]

        # 日期格式统一,方便导入es
        # df.timestamp = df.timestamp.apply(lambda x: to_time_str(x))

        unique_index = df.index.drop_duplicates()

        security_list = get_security_list(security_type='future',
                                          exchanges=['shfe'])

        for the_contract in unique_index:
            logger.info("start handling {} in {}".format(
                the_contract, the_file))
            security_item = {
                'code': the_contract,
                'name': get_future_name(the_contract),
                'id': 'future_{}_{}'.format('shfe', the_contract),
                'exchange': 'shfe',
                'type': 'future'
            }
            # 检查是否需要保存合约meta
            if (not security_list.empty) and ('code' in security_list.columns):
                security_list = security_list.set_index(security_list['code'],
                                                        drop=False)
            if the_contract not in security_list.index:
                security_list = security_list.append(security_item,
                                                     ignore_index=True)
                security_list = security_list.sort_index()
                security_list.to_csv(get_security_list_path('future', 'shfe'),
                                     index=False)

            the_df = df.loc[the_contract, ]
            the_df['code'] = the_contract
            the_df['name'] = get_future_name(the_contract)
            the_df['securityId'] = 'future_{}_{}'.format('shfe', the_contract)
            the_df['changePct'] = the_df['change'] / the_df['preClose']
            the_df['changePct1'] = the_df['change1'] / the_df['preSettlement']

            kdata_path = get_kdata_path(item=security_item, source='exchange')
            # TODO:这些逻辑应该统一处理
            kdata_dir = get_kdata_dir(item=security_item)
            if not os.path.exists(kdata_dir):
                os.makedirs(kdata_dir)

            if os.path.exists(kdata_path):
                saved_df = pd.read_csv(kdata_path, dtype=str)
            else:
                saved_df = pd.DataFrame()

            saved_df = saved_df.append(the_df, ignore_index=True)
            saved_df = saved_df.loc[:, KDATA_FUTURE_COL]

            if not saved_df.empty:
                kdata_df_save(saved_df, kdata_path)

            logger.info("end handling {} in {}".format(the_contract, the_file))