예제 #1
0
def tdx_bundle(assets,
               ingest_minute,  # whether to ingest minute data, default False
               environ,
               asset_db_writer,
               minute_bar_writer,
               daily_bar_writer,
               adjustment_writer,
               calendar,
               start_session,
               end_session,
               cache,
               show_progress,
               output_dir):
    eg = Engine(auto_retry=True, multithread=True, best_ip=True, thread_num=8)
    eg.connect()

    symbols = fetch_symbols(eg, assets)
    metas = []

    def gen_symbols_data(symbol_map, freq='1d'):
        for index, symbol in symbol_map.iteritems():
            data = reindex_to_calendar(
                calendar,
                fetch_single_equity(eg, symbol, freq),
                freq=freq,
            )
            if freq == '1d':
                metas.append(get_meta_from_bars(data))
            yield int(symbol), data

    symbol_map = symbols.symbol

    assets = set([int(s) for s in symbol_map])
    daily_bar_writer.write(gen_symbols_data(symbol_map, freq="1d"), assets=assets, show_progress=show_progress)

    if ingest_minute:
        with click.progressbar(gen_symbols_data(symbol_map, freq="1m"),
                               label="Merging minute equity files:",
                               length=len(assets),
                               item_show_func=lambda e: e if e is None else str(e[0]),
                               ) as bar:
            minute_bar_writer.write(bar, show_progress=False)

    symbols = pd.concat([symbols, pd.DataFrame(data=metas)], axis=1)
    splits, dividends = fetch_splits_and_dividends(eg, symbols)
    symbols.set_index('symbol', drop=False, inplace=True)
    asset_db_writer.write(symbols)
    adjustment_writer.write(
        splits=splits,
        dividends=dividends
    )

    eg.exit()
예제 #2
0
파일: tdx_bundle.py 프로젝트: xujun05/nzl
def tdx_bundle(
        assets,
        ingest_minute,  # whether to ingest minute data, default False
        fundamental,  # whether to ingest fundamental data, default False
        environ,
        asset_db_writer,
        minute_bar_writer,
        daily_bar_writer,
        adjustment_writer,
        fundamental_writer,
        calendar,
        start_session,
        end_session,
        cache,
        show_progress,
        output_dir):
    eg = Engine(auto_retry=True, multithread=True, best_ip=True, thread_num=8)
    eg.connect()

    symbols = fetch_symbols(eg, assets)
    metas = []

    today = pd.to_datetime('today', utc=True)
    distance = calendar.session_distance(start_session, today)

    dates_path = join(output_dir, DATE_DIR)
    if os.path.isfile(dates_path):
        with open(dates_path, 'r') as f:
            dates_json = json.load(f)
    else:
        dates_json = {'1d': {}, '1m': {}}

    session_bars = create_engine('sqlite:///' +
                                 join(output_dir, SESSION_BAR_DB))

    def gen_symbols_data(symbol_map, freq='1d'):
        if not session_bars.has_table(SESSION_BAR_TABLE):
            Base.metadata.create_all(
                session_bars.connect(),
                checkfirst=True,
                tables=[Base.metadata.tables[SESSION_BAR_TABLE]])

        func = partial(fetch_single_equity, eg)
        now = pd.to_datetime('now', utc=True)
        if end_session >= now.normalize():
            end = now.normalize()
            if now.tz_convert('Asia/Shanghai').time() < datetime.time(15, 5):
                end = end - pd.Timedelta('1 D')
        else:
            end = end_session

        if freq == '1m':
            if distance >= 100:
                func = eg.get_k_data

        for index, symbol in symbol_map.iteritems():
            try:
                start = pd.to_datetime(dates_json[freq][symbol],
                                       utc=True) + pd.Timedelta('1 D')
                if start >= end:
                    continue
            except KeyError:
                start = start_session
            data = reindex_to_calendar(
                calendar,
                func(symbol, start, end, freq),
                freq=freq,
            )
            if freq == '1d':
                data.to_sql(SESSION_BAR_TABLE,
                            session_bars.connect(),
                            if_exists='append',
                            index_label='day')
                if symbol in dates_json[freq]:
                    data = pd.read_sql(
                        "select * from {} where id = {} order by day ASC ".
                        format(SESSION_BAR_TABLE, int(symbol)),
                        session_bars,
                        index_col='day')
                    data.index = pd.to_datetime(data.index)
            dates_json[freq][symbol] = end.strftime('%Y%m%d')
            yield int(symbol), data

            with open(dates_path, 'w') as f:
                json.dump(dates_json, f)

    symbol_map = symbols.symbol

    assets = set([int(s) for s in symbol_map])
    daily_bar_writer.write(gen_symbols_data(symbol_map, freq="1d"),
                           assets=assets,
                           show_progress=show_progress)

    if ingest_minute:
        with click.progressbar(
                gen_symbols_data(symbol_map, freq="1m"),
                label="Merging minute equity files:",
                length=len(assets),
                item_show_func=lambda e: e if e is None else str(e[0]),
        ) as bar:
            minute_bar_writer.write(bar, show_progress=False)

    splits, dividends, shares = fetch_splits_and_dividends(
        eg, symbols, start_session, end_session)
    metas = pd.read_sql(
        "select id as symbol,min(day) as start_date,max(day) as end_date from bars group by id;",
        session_bars,
        parse_dates=['start_date', 'end_date'])
    metas['symbol'] = metas['symbol'].apply(lambda x: format(x, '06'))
    metas['first_traded'] = metas['start_date']
    metas['auto_close_date'] = metas['end_date']

    symbols = symbols.set_index('symbol',
                                drop=False).join(metas.set_index('symbol'),
                                                 how='inner')
    asset_db_writer.write(symbols)
    adjustment_writer.write(splits=splits, dividends=dividends, shares=shares)

    if fundamental:
        logger.info("writing fundamental data:")
        try:
            fundamental_writer.write(start_session, end_session)
        except Exception as e:
            pass

    eg.exit()
예제 #3
0
def tdx_bundle(assets,
               ingest_minute,  # whether to ingest minute data, default False
               overwrite,
               environ,
               asset_db_writer,
               minute_bar_writer,
               daily_bar_writer,
               adjustment_writer,
               calendar,
               start_session,
               end_session,
               cache,
               show_progress,
               output_dir):
    eg = Engine(auto_retry=True, multithread=True, best_ip=True, thread_num=8)
    eg.connect()

    symbols = fetch_symbols(eg, assets)
    metas = []

    today = pd.to_datetime('today',utc=True)
    distance = calendar.session_distance(start_session, today)
    if ingest_minute and not overwrite and (start_session < today - pd.DateOffset(years=3)):
        minute_start = calendar.all_sessions[searchsorted(calendar.all_sessions, today - pd.DateOffset(years=3))]
        logger.warning(
            "overwrite start_session for minute bars to {}(3 years),"
            " to fetch minute data before that, please add '--overwrite True'".format(minute_start))
    else:
        minute_start = start_session

    def gen_symbols_data(symbol_map, freq='1d'):
        func = partial(fetch_single_equity, eg)
        start = start_session
        end = end_session

        if freq == '1m':
            if distance >= 100:
                func = eg.get_k_data
                start = minute_start

        for index, symbol in symbol_map.iteritems():
            data = reindex_to_calendar(
                calendar,
                func(symbol, start, end, freq),
                freq=freq,
            )
            if freq == '1d':
                metas.append(get_meta_from_bars(data))
            yield int(symbol), data

    symbol_map = symbols.symbol

    assets = set([int(s) for s in symbol_map])
    daily_bar_writer.write(gen_symbols_data(symbol_map, freq="1d"), assets=assets, show_progress=show_progress)

    if ingest_minute:
        with click.progressbar(gen_symbols_data(symbol_map, freq="1m"),
                               label="Merging minute equity files:",
                               length=len(assets),
                               item_show_func=lambda e: e if e is None else str(e[0]),
                               ) as bar:
            minute_bar_writer.write(bar, show_progress=False)

    symbols = pd.concat([symbols, pd.DataFrame(data=metas)], axis=1)
    splits, dividends = fetch_splits_and_dividends(eg, symbols)
    symbols.set_index('symbol', drop=False, inplace=True)
    asset_db_writer.write(symbols)
    adjustment_writer.write(
        splits=splits,
        dividends=dividends
    )

    eg.exit()
예제 #4
0
def tdx_bundle(assets,
               ingest_minute,  # whether to ingest minute data, default False
               fundamental,  # whether to ingest fundamental data, default False
               environ,
               asset_db_writer,
               minute_bar_writer,
               daily_bar_writer,
               adjustment_writer,
               fundamental_writer,
               calendar,
               start_session,
               end_session,
               cache,
               show_progress,
               output_dir):
    # eg = Engine(auto_retry=True, multithread=True, best_ip=True, thread_num=1)
    eg = Engine(auto_retry=True, multithread=True, best_ip=True, thread_num=1)
    eg.connect()

    symbols = fetch_symbols(eg, assets)
    metas = []

    today = pd.to_datetime('today', utc=True)
    distance = calendar.session_distance(start_session, today)

    dates_path = join(output_dir, DATE_DIR)
    if os.path.isfile(dates_path):
        with open(dates_path, 'r') as f:
            dates_json = json.load(f)
    else:
        dates_json = {
            '1d': {},
            '1m': {}
        }

    session_bars = create_engine('sqlite:///' + join(output_dir, SESSION_BAR_DB))

    def gen_symbols_data(symbol_map, freq='1d'):
        if not session_bars.has_table(SESSION_BAR_TABLE):
            Base.metadata.create_all(session_bars.connect(), checkfirst=True,
                                     tables=[Base.metadata.tables[SESSION_BAR_TABLE]])

        func = partial(fetch_single_equity, eg)
        now = pd.to_datetime('now', utc=True)
        if end_session >= now.normalize():
            end = now.normalize()
            if now.tz_convert('Asia/Shanghai').time() < datetime.time(15, 5):
                end = end - pd.Timedelta('1 D')
        else:
            end = end_session
        end_idx = calendar.all_sessions.searchsorted(end)
        if calendar.all_sessions[end_idx] > end:
            end = calendar.all_sessions[end_idx -1]

        for index, symbol in symbol_map.iteritems():
            try:
                start = pd.to_datetime(dates_json[freq][symbol], utc=True) + pd.Timedelta('1 D')
                start = calendar.all_sessions[calendar.all_sessions.searchsorted(start)]
                if start > end:
                    if freq == '1d'and symbol in dates_json[freq]:
                        data = pd.read_sql(
                            "select * from {} where id = {} order by day ASC ".format(SESSION_BAR_TABLE, int(symbol)),
                            session_bars, index_col='day')
                        data.index = pd.to_datetime(data.index)
                        yield int(symbol), data
                    else:
                        yield int(symbol), pd.DataFrame()
                    continue
            except KeyError:
                start = start_session
            if freq == '1m':
                single_distance = calendar.session_distance(start, end)
                if single_distance >= 100:
                    func = eg.get_k_data
            data = reindex_to_calendar(
                calendar,
                func(symbol, start, end, freq),
                start_session=start, end_session=end,
                freq=freq,
            )
            if data is None or data.empty:
                if freq == '1d'and symbol in dates_json[freq]:
                    data = pd.read_sql(
                        "select * from {} where id = {} order by day ASC ".format(SESSION_BAR_TABLE, int(symbol)),
                        session_bars, index_col='day')
                    data.index = pd.to_datetime(data.index)
                    yield int(symbol), data
                continue
            if freq == '1d':
                if data.close.isnull()[0]:  # padding fill error if the first is NaN
                    data2 = pd.read_sql(
                        "select * from {} where id = {} order by day desc limit 1 ".format(SESSION_BAR_TABLE, int(symbol)),
                        session_bars, index_col='day')
                    if data2.empty:
                        data = data[data.close.notnull()]
                    else:
                        data["close"][0] = data2["close"][0]
                        fillna(data)
                data.to_sql(SESSION_BAR_TABLE, session_bars.connect(), if_exists='append', index_label='day')
                if symbol in dates_json[freq]:
                    data = pd.read_sql(
                        "select * from {} where id = {} order by day ASC ".format(SESSION_BAR_TABLE, int(symbol)),
                        session_bars, index_col='day')
                    data.index = pd.to_datetime(data.index)
            dates_json[freq][symbol] = data.index[-1].strftime('%Y%m%d')
            yield int(symbol), data

            with open(dates_path, 'w') as f:
                json.dump(dates_json, f)

    symbol_map = symbols.symbol

    assets = set([int(s) for s in symbol_map])
    daily_bar_writer.write(gen_symbols_data(symbol_map, freq="1d"), assets=assets, show_progress=show_progress)

    splits, dividends, shares = fetch_splits_and_dividends(eg, symbols, start_session, end_session)
    metas = pd.read_sql("select id as symbol,min(day) as start_date,max(day) as end_date from bars group by id;",
                        session_bars,
                        parse_dates=['start_date','end_date']
                        )
    metas['symbol'] = metas['symbol'].apply(lambda x:format(x,'06'))
    metas['first_traded'] = metas['start_date']
    metas['auto_close_date'] = metas['end_date']

    symbols = symbols.set_index('symbol', drop=False).join(metas.set_index('symbol'), how='inner')
    asset_db_writer.write(symbols)
    adjustment_writer.write(
        splits=splits,
        dividends=dividends,
        shares=shares
    )

    if fundamental:
        logger.info("writing fundamental data:")
        try:
            fundamental_writer.write(start_session, end_session)
        except Exception as e:
            pass

    if ingest_minute:
        with click.progressbar(gen_symbols_data(symbol_map, freq="1m"),
                               label="Merging minute equity files:",
                               length=len(assets),
                               item_show_func=lambda e: e if e is None else str(e[0]),
                               ) as bar:
            minute_bar_writer.write(bar, show_progress=False)

    eg.exit()