Esempio n. 1
0
def init_env():
    if not os.path.exists(FOOLTRADER_STORE_PATH):
        print("{} is a wrong path")
        print("please set env FOOLTRADER_STORE_PATH to working path or set it in settings.py")
    else:
        # 初始化股票文件夹
        for _, item in get_security_list(exchanges=EXCHANGE_LIST_COL).iterrows():
            mkdir_for_stock(item)

        # 初始化指数文件夹
        for _, item in get_security_list(security_type='index', exchanges=['sh', 'sz', 'nasdaq']).iterrows():
            kdata_dir = get_kdata_dir(item)
            if not os.path.exists(kdata_dir):
                os.makedirs(kdata_dir)
        # 初始化期货文件夹
        for exchange in ['shfe', 'dce', 'zce']:
            exchange_cache_dir = get_exchange_cache_dir(security_type='future', exchange=exchange)
            if not os.path.exists(exchange_cache_dir):
                os.makedirs(exchange_cache_dir)

            exchange_cache_dir = get_exchange_cache_dir(security_type='future', exchange='shfe',
                                                        the_year=datetime.today().year,
                                                        data_type="day_kdata")
            if not os.path.exists(exchange_cache_dir):
                os.makedirs(exchange_cache_dir)

            exchange_dir = get_exchange_dir(security_type='future', exchange=exchange)
            if not os.path.exists(exchange_dir):
                os.makedirs(exchange_dir)
Esempio n. 2
0
def check_convert_result():
    for index, security_item in get_security_list().iterrows():
        for fuquan in ('bfq', 'hfq'):
            dayk_path = get_kdata_path(security_item, fuquan=fuquan)
            if os.path.exists(dayk_path):
                df_result = pd.read_csv(dayk_path)

                if fuquan == 'hfq':
                    df = pd.DataFrame(columns=data_contract.KDATA_COLUMN_FQ)
                else:
                    df = pd.DataFrame(columns=data_contract.KDATA_COLUMN)

                dir = get_kdata_dir(security_item, fuquan=fuquan)

                if os.path.exists(dir):
                    files = [
                        os.path.join(dir, f) for f in os.listdir(dir)
                        if ('day' not in f and 'csv' in f
                            and os.path.isfile(os.path.join(dir, f)))
                    ]
                    for f in files:
                        df = df.append(pd.read_csv(f), ignore_index=True)
                    assert_df(df, df_result)
                    logger.info("{} merge as one ok".format(
                        security_item['code']))
Esempio n. 3
0
def init_markets(exchanges=CRYPTOCURRENCY_EXCHANGES):
    for exchange_str in set(ccxt.exchanges) & set(exchanges):
        exchange_dir = get_exchange_dir(security_type='cryptocurrency', exchange=exchange_str)

        # 创建交易所目录
        if not os.path.exists(exchange_dir):
            os.makedirs(exchange_dir)

        exchange = eval("ccxt.{}()".format(exchange_str))
        try:
            markets = exchange.fetch_markets()
            df = pd.DataFrame()

            # markets有些为key=symbol的dict,有些为list
            markets_type = type(markets)
            if markets_type != dict and markets_type != list:
                logger.exception("unknown return markets type {}".format(markets_type))
                return

            for market in markets:
                if markets_type == dict:
                    name = market
                    code = name.replace('/', "-")

                if markets_type == list:
                    name = market['symbol']
                    code = name.replace('/', "-")

                security_item = generate_security_item(security_type='cryptocurrency', exchange=exchange_str,
                                                       code=code,
                                                       name=name, list_date=None)

                kdata_dir = get_kdata_dir(security_item)

                if not os.path.exists(kdata_dir):
                    os.makedirs(kdata_dir)

                df = df.append(security_item, ignore_index=True)

                logger.info("init_markets,exchange:{} security:{}".format(exchange_str, security_item))

                if markets_type == dict:
                    security_info = markets[market]

                if markets_type == list:
                    security_info = market

                # 存储数字货币的meta信息
                if security_info:
                    with open(get_security_meta_path(security_type='cryptocurrency', exchange=exchange_str,
                                                     code=code), "w") as f:
                        json.dump(security_info, f, ensure_ascii=False)

            # 存储该交易所的数字货币列表
            if not df.empty:
                df.to_csv(get_security_list_path(security_type='cryptocurrency', exchange=exchange_str),
                          index=False)
            logger.exception("init_markets for {} success".format(exchange_str))
        except Exception as e:
            logger.exception("init_markets for {} failed".format(exchange_str), e)
Esempio n. 4
0
def mkdir_for_security(item):
    finance_dir = get_finance_dir(item)
    if not os.path.exists(finance_dir):
        os.makedirs(finance_dir)

    tick_dir = get_tick_dir(item)
    if not os.path.exists(tick_dir):
        os.makedirs(tick_dir)

    event_dir = get_event_dir(item)
    if not os.path.exists(event_dir):
        os.makedirs(event_dir)

    bfq_kdata_dir = get_kdata_dir(item, 'bfq')
    if not os.path.exists(bfq_kdata_dir):
        os.makedirs(bfq_kdata_dir)

    hfq_kdata_dir = get_kdata_dir(item, 'hfq')
    if not os.path.exists(hfq_kdata_dir):
        os.makedirs(hfq_kdata_dir)
Esempio n. 5
0
def mkdir_for_security(item):
    finance_dir = get_finance_dir(item)
    if not os.path.exists(finance_dir):
        os.makedirs(finance_dir)

    tick_dir = get_tick_dir(item)
    if not os.path.exists(tick_dir):
        os.makedirs(tick_dir)

    event_dir = get_event_dir(item)
    if not os.path.exists(event_dir):
        os.makedirs(event_dir)

    bfq_kdata_dir = get_kdata_dir(item, 'bfq')
    if not os.path.exists(bfq_kdata_dir):
        os.makedirs(bfq_kdata_dir)

    hfq_kdata_dir = get_kdata_dir(item, 'hfq')
    if not os.path.exists(hfq_kdata_dir):
        os.makedirs(hfq_kdata_dir)
Esempio n. 6
0
def init_env():
    if not os.path.exists(settings.FILES_STORE):
        os.makedirs(settings.FILES_STORE)
    # 初始化股票文件夹
    for _, item in get_security_list().iterrows():
        mkdir_for_security(item)

    # 初始化指数文件夹
    for _, item in get_security_list(security_type='index').iterrows():
        kdata_dir = get_kdata_dir(item)
        if not os.path.exists(kdata_dir):
            os.makedirs(kdata_dir)
Esempio n. 7
0
def init_env():
    if not os.path.exists(settings.FILES_STORE):
        os.makedirs(settings.FILES_STORE)
    # 初始化股票文件夹
    for _, item in get_security_list(exchanges=EXCHANGE_LIST_COL).iterrows():
        mkdir_for_security(item)

    # 初始化指数文件夹
    for _, item in get_security_list(security_type='index', exchanges=['sh', 'sz', 'nasdaq']).iterrows():
        kdata_dir = get_kdata_dir(item)
        if not os.path.exists(kdata_dir):
            os.makedirs(kdata_dir)
    def merge_kdata_to_one(security_item=None, replace=False, fuquan='bfq'):
        if type(security_item) != 'NoneType':
            items = pd.DataFrame().append(security_item).iterrows()
        else:
            items = get_security_list().iterrows()

        if fuquan:
            fuquans = [fuquan]
        else:
            fuquans = ['bfq', 'hfq']

        for index, security_item in items:
            for fuquan in fuquans:
                dayk_path = get_kdata_path(security_item,
                                           source='sina',
                                           fuquan=fuquan)
                if fuquan == 'hfq':
                    df = pd.DataFrame(
                        columns=data_contract.KDATA_COLUMN_SINA_FQ)
                else:
                    df = pd.DataFrame(columns=data_contract.KDATA_COLUMN_SINA)

                the_dir = get_kdata_dir(security_item, fuquan=fuquan)

                if os.path.exists(the_dir):
                    files = [
                        os.path.join(the_dir, f) for f in os.listdir(the_dir)
                        if ('dayk.csv' not in f
                            and os.path.isfile(os.path.join(the_dir, f)))
                    ]
                    for f in files:
                        df = df.append(pd.read_csv(f, dtype=str),
                                       ignore_index=True)
                if df.size > 0:
                    df = df.set_index(df['timestamp'])
                    df.index = pd.to_datetime(df.index)
                    df = df.sort_index()
                    logger.info("{} to {}".format(security_item['code'],
                                                  dayk_path))
                    if replace:
                        df.to_csv(dayk_path, index=False)
                    else:
                        StockKDataSinaSpider.merge_to_current_kdata(
                            security_item, df, fuquan=fuquan)

                for f in files:
                    logger.info("remove {}".format(f))
                    os.remove(f)

                if fuquan == 'hfq':
                    StockKDataSinaSpider.add_factor_to_163(security_item)
Esempio n. 9
0
def init_env():
    if not os.path.exists(settings.FOOLTRADER_STORE_PATH):
        os.makedirs(settings.FOOLTRADER_STORE_PATH)
    # 初始化股票文件夹
    for _, item in get_security_list(exchanges=EXCHANGE_LIST_COL).iterrows():
        mkdir_for_security(item)

    # 初始化指数文件夹
    for _, item in get_security_list(security_type='index',
                                     exchanges=['sh', 'sz',
                                                'nasdaq']).iterrows():
        kdata_dir = get_kdata_dir(item)
        if not os.path.exists(kdata_dir):
            os.makedirs(kdata_dir)
Esempio n. 10
0
def merge_kdata_to_one(security_item=None, replace=False, fuquan='bfq'):
    if type(security_item) != 'NoneType':
        items = pd.DataFrame().append(security_item).iterrows()
    else:
        items = get_security_list().iterrows()

    if fuquan:
        fuquans = [fuquan]
    else:
        fuquans = ['bfq', 'hfq']

    for index, security_item in items:
        for fuquan in fuquans:
            dayk_path = get_kdata_path(security_item, source='sina', fuquan=fuquan)
            if fuquan == 'hfq':
                df = pd.DataFrame(
                    columns=data_contract.KDATA_COLUMN_FQ)
            else:
                df = pd.DataFrame(
                    columns=data_contract.KDATA_COLUMN)

            the_dir = get_kdata_dir(security_item, fuquan=fuquan)

            if os.path.exists(the_dir):
                files = [os.path.join(the_dir, f) for f in os.listdir(the_dir) if
                         ('dayk.csv' not in f and os.path.isfile(os.path.join(the_dir, f)))]
                for f in files:
                    df = df.append(pd.read_csv(f, dtype=str), ignore_index=True)
            if df.size > 0:
                df = df.set_index(df['timestamp'])
                df.index = pd.to_datetime(df.index)
                df = df.sort_index()
                logger.info("{} to {}".format(security_item['code'], dayk_path))
                if replace:
                    df.to_csv(dayk_path, index=False)
                else:
                    merge_to_current_kdata(security_item, df, fuquan=fuquan)

            for f in files:
                logger.info("remove {}".format(f))
                os.remove(f)

            if fuquan == 'hfq':
                add_factor_to_163(security_item)
Esempio n. 11
0
def init_env():
    if not os.path.exists(FOOLTRADER_STORE_PATH):
        print("{} is a wrong path")
        print(
            "please set env FOOLTRADER_STORE_PATH to working path or set it in settings.py"
        )
    else:
        # 初始化股票文件夹
        for _, item in get_security_list(
                exchanges=EXCHANGE_LIST_COL).iterrows():
            mkdir_for_security(item)

        # 初始化指数文件夹
        for _, item in get_security_list(security_type='index',
                                         exchanges=['sh', 'sz',
                                                    'nasdaq']).iterrows():
            kdata_dir = get_kdata_dir(item)
            if not os.path.exists(kdata_dir):
                os.makedirs(kdata_dir)
Esempio n. 12
0
def check_convert_result():
    for index, security_item in get_security_list().iterrows():
        for fuquan in ('bfq', 'hfq'):
            dayk_path = get_kdata_path(security_item, fuquan=fuquan)
            if os.path.exists(dayk_path):
                df_result = pd.read_csv(dayk_path)

                if fuquan == 'hfq':
                    df = pd.DataFrame(
                        columns=data_contract.KDATA_COLUMN_FQ)
                else:
                    df = pd.DataFrame(
                        columns=data_contract.KDATA_COLUMN)

                dir = get_kdata_dir(security_item, fuquan=fuquan)

                if os.path.exists(dir):
                    files = [os.path.join(dir, f) for f in os.listdir(dir) if
                             ('day' not in f and 'csv' in f and os.path.isfile(os.path.join(dir, f)))]
                    for f in files:
                        df = df.append(pd.read_csv(f), ignore_index=True)
                    assert_df(df, df_result)
                    logger.info("{} merge as one ok".format(security_item['code']))
Esempio n. 13
0
def parse_shfe_data(force_parse=False):
    the_dir = get_exchange_cache_dir(security_type='future', exchange='shfe')

    need_parse_files = []

    for the_zip_file in [
            os.path.join(the_dir, f) for f in os.listdir(the_dir)
            if f.endswith('.zip')
    ]:
        dst_file = the_zip_file.replace('.zip', ".xls")

        if not os.path.exists(dst_file):
            dst_dir = the_zip_file.replace('.zip', "")
            os.makedirs(dst_dir)

            unzip(the_zip_file, dst_dir)
            files = [
                os.path.join(dst_dir, f) for f in os.listdir(dst_dir)
                if f.endswith('.xls')
            ]
            if len(files) == 1:
                os.rename(files[0], dst_file)
            need_parse_files.append(dst_file)

    if force_parse:
        need_parse_files = [
            os.path.join(the_dir, f) for f in os.listdir(the_dir)
            if f.endswith('.xls')
        ]
    for the_file in need_parse_files:
        logger.info("parse {}".format(the_file))

        df = pd.read_excel(the_file,
                           skiprows=2,
                           skip_footer=4,
                           index_col='合约',
                           converters={'日期': str})
        df.index = pd.Series(df.index).fillna(method='ffill')
        df = df.loc[:, [
            '日期', '前收盘', '前结算', '开盘价', '最高价', '最低价', '收盘价', '结算价', '涨跌1',
            '涨跌2', '成交量', '成交金额', '持仓量'
        ]]
        df.columns = [
            'timestamp', 'preClose', 'preSettlement', 'open', 'high', 'low',
            'close', 'settlement', 'change', 'change1', 'volume', 'turnover',
            'openInterest'
        ]

        # 日期格式统一,方便导入es
        # df.timestamp = df.timestamp.apply(lambda x: to_time_str(x))

        unique_index = df.index.drop_duplicates()

        security_list = get_security_list(security_type='future',
                                          exchanges=['shfe'])

        for the_contract in unique_index:
            logger.info("start handling {} in {}".format(
                the_contract, the_file))
            security_item = {
                'code': the_contract,
                'name': get_future_name(the_contract),
                'id': 'future_{}_{}'.format('shfe', the_contract),
                'exchange': 'shfe',
                'type': 'future'
            }
            # 检查是否需要保存合约meta
            if (not security_list.empty) and ('code' in security_list.columns):
                security_list = security_list.set_index(security_list['code'],
                                                        drop=False)
            if the_contract not in security_list.index:
                security_list = security_list.append(security_item,
                                                     ignore_index=True)
                security_list = security_list.sort_index()
                security_list.to_csv(get_security_list_path('future', 'shfe'),
                                     index=False)

            the_df = df.loc[the_contract, ]
            the_df['code'] = the_contract
            the_df['name'] = get_future_name(the_contract)
            the_df['securityId'] = 'future_{}_{}'.format('shfe', the_contract)
            the_df['changePct'] = the_df['change'] / the_df['preClose']
            the_df['changePct1'] = the_df['change1'] / the_df['preSettlement']

            kdata_path = get_kdata_path(item=security_item, source='exchange')
            # TODO:这些逻辑应该统一处理
            kdata_dir = get_kdata_dir(item=security_item)
            if not os.path.exists(kdata_dir):
                os.makedirs(kdata_dir)

            if os.path.exists(kdata_path):
                saved_df = pd.read_csv(kdata_path, dtype=str)
            else:
                saved_df = pd.DataFrame()

            saved_df = saved_df.append(the_df, ignore_index=True)
            saved_df = saved_df.loc[:, KDATA_FUTURE_COL]

            if not saved_df.empty:
                kdata_df_save(saved_df, kdata_path)

            logger.info("end handling {} in {}".format(the_contract, the_file))
Esempio n. 14
0
def parse_shfe_day_data(force_parse=False):
    cache_dir = get_exchange_cache_dir(security_type='future',
                                       exchange='shfe',
                                       the_year=datetime.datetime.today().year,
                                       data_type="day_kdata")
    the_parsed_path = os.path.join(cache_dir, 'parsed')
    the_parsed = []
    if os.path.exists(the_parsed_path):
        with open(the_parsed_path) as data_file:
            the_parsed = json.load(data_file)

    if force_parse:
        the_dates = [f for f in os.listdir(cache_dir) if f != 'parsed' and f]
    else:
        the_dates = [
            f for f in os.listdir(cache_dir)
            if f != 'parsed' and f not in the_parsed
        ]

    for the_date in the_dates:
        the_path = os.path.join(cache_dir, the_date)
        logger.info("start handling {}".format(the_path))

        with open(the_path, 'r', encoding='UTF8') as f:
            tmp_str = f.read()
            the_json = json.loads(tmp_str)
            the_datas = the_json['o_curinstrument']
            # 日期,代码,名称,最低,开盘,收盘,最高,成交量(手),成交额(元),唯一标识,前收盘,涨跌额,涨跌幅(%),持仓量,结算价,前结算,涨跌额(按结算价),涨跌幅(按结算价)
            KDATA_COLUMN_FUTURE = [
                'timestamp', 'code', 'name', 'low', 'open', 'close', 'high',
                'volume', 'turnover', 'securityId', 'preClose', 'change',
                'changePct', 'openInterest', 'settlement', 'preSettlement',
                'change1', 'changePct1'
            ]
            for the_data in the_datas:
                # {'CLOSEPRICE': 11480,
                #  'DELIVERYMONTH': '1809',
                #  'HIGHESTPRICE': 11555,
                #  'LOWESTPRICE': 11320,
                #  'OPENINTEREST': 425692,
                #  'OPENINTERESTCHG': 3918,
                #  'OPENPRICE': 11495,
                #  'ORDERNO': 0,
                #  'PRESETTLEMENTPRICE': 11545,
                #  'PRODUCTID': 'ru_f    ',
                #  'PRODUCTNAME': '天然橡胶            ',
                #  'PRODUCTSORTNO': 100,
                #  'SETTLEMENTPRICE': 11465,
                #  'VOLUME': 456574,
                #  'ZD1_CHG': -65,
                #  'ZD2_CHG': -80}

                if not re.match("\d{4}", the_data['DELIVERYMONTH']):
                    continue

                code = "{}{}".format(
                    the_data['PRODUCTID'][:the_data['PRODUCTID'].index('_')],
                    the_data['DELIVERYMONTH'])
                logger.info("start handling {} for {}".format(code, the_date))

                name = get_future_name(code)
                security_id = "future_shfe_{}".format(code)

                security_list = get_security_list(security_type='future',
                                                  exchanges=['shfe'])

                logger.info("start handling {} for {}".format(code, the_date))
                security_item = {
                    'code': code,
                    'name': name,
                    'id': security_id,
                    'exchange': 'shfe',
                    'type': 'future'
                }
                # 检查是否需要保存合约meta
                if security_list is not None and 'code' in security_list.columns:
                    security_list = security_list.set_index(
                        security_list['code'], drop=False)
                if code not in security_list.index:
                    security_list = security_list.append(security_item,
                                                         ignore_index=True)
                    security_list.to_csv(get_security_list_path(
                        'future', 'shfe'),
                                         index=False)

                kdata_path = get_kdata_path(item=security_item,
                                            source='exchange')
                # TODO:这些逻辑应该统一处理
                kdata_dir = get_kdata_dir(item=security_item)
                if not os.path.exists(kdata_dir):
                    os.makedirs(kdata_dir)

                if os.path.exists(kdata_path):
                    saved_df = pd.read_csv(kdata_path, dtype=str)
                    saved_df = saved_df.set_index(saved_df['timestamp'],
                                                  drop=False)
                else:
                    saved_df = pd.DataFrame()

                if saved_df.empty or the_date not in saved_df.index:
                    low_price = the_data['LOWESTPRICE']
                    if not low_price:
                        low_price = 0
                    open_price = the_data['OPENPRICE']
                    if not open_price:
                        open_price = 0
                    close_price = the_data['CLOSEPRICE']
                    if not close_price:
                        close_price = 0
                    high_price = the_data['HIGHESTPRICE']
                    if not high_price:
                        high_price = 0
                    volume = the_data['VOLUME']
                    if not volume:
                        volume = 0

                    if type(the_data['ZD1_CHG']) == str:
                        change = 0
                    else:
                        change = the_data['ZD1_CHG']

                    if type(the_data['ZD2_CHG']) == str:
                        change1 = 0
                    else:
                        change1 = the_data['ZD2_CHG']

                    pre_close = close_price - change
                    pre_settlement = the_data['PRESETTLEMENTPRICE']

                    # 首日交易
                    if pre_close != 0:
                        change_pct = change / pre_close
                    else:
                        change_pct = 0
                    if pre_settlement != 0:
                        change_pct1 = change1 / pre_settlement
                    else:
                        change_pct1 = 0

                    the_json = {
                        "timestamp":
                        to_time_str(the_date),
                        "code":
                        code,
                        "name":
                        name,
                        "low":
                        low_price,
                        "open":
                        open_price,
                        "close":
                        close_price,
                        "high":
                        high_price,
                        "volume":
                        volume,
                        # 成交额为估算
                        "turnover":
                        (low_price + open_price + close_price + high_price / 4)
                        * volume,
                        "securityId":
                        security_id,
                        "preClose":
                        pre_close,
                        "change":
                        change,
                        "changePct":
                        change_pct,
                        "openInterest":
                        the_data['OPENINTEREST'],
                        "settlement":
                        the_data['SETTLEMENTPRICE'],
                        "preSettlement":
                        the_data['PRESETTLEMENTPRICE'],
                        "change1":
                        change1,
                        "changePct1":
                        change_pct1
                    }
                    saved_df = saved_df.append(the_json, ignore_index=True)
                    saved_df = saved_df.loc[:, KDATA_COLUMN_FUTURE]
                    saved_df = saved_df.drop_duplicates(subset='timestamp',
                                                        keep='last')
                    saved_df = saved_df.set_index(saved_df['timestamp'],
                                                  drop=False)
                    saved_df.index = pd.to_datetime(saved_df.index)
                    saved_df = saved_df.sort_index()
                    saved_df.to_csv(kdata_path, index=False)

                    logger.info("end handling {} for {}".format(
                        code, the_date))

                    if the_date not in the_parsed:
                        the_parsed.append(the_date)
        if the_parsed:
            result_list = drop_duplicate(the_parsed)
            result_list = sorted(result_list)

            with open(the_parsed_path, 'w') as outfile:
                json.dump(result_list, outfile)
        logger.info("end handling {}".format(the_path))