def reindex_to_calendar(calendar, data, freq='1d'): start_session, end_session = data.index[[0, -1]] if not isinstance(start_session, pd.Timestamp): start_session = pd.Timestamp(start_session, unit='m') end_session = pd.Timestamp(end_session, unit='m') start_session = start_session.normalize() end_session = end_session.normalize() if freq == '1d': all_sessions = calendar.sessions_in_range( start_session, end_session).tz_localize(None) df = data.reindex(all_sessions, copy=False) df = fillna(df) df.id.fillna(method='pad', inplace=True) df.day = df.index.values.astype('datetime64[m]').astype(np.int64) else: all_sessions = calendar.minutes_for_sessions_in_range( start_session, end_session).tz_localize(None) data.index = data.index.tz_localize( pytz.timezone('Asia/Shanghai')).tz_convert('UTC').tz_localize(None) df = data.reindex(all_sessions, copy=False) df = fillna(df) return df
def reindex_to_calendar(calendar, data, freq='1d', start_session=None, end_session=None): if data.empty: return None if start_session is None: start_session, end_session = data.index[[0, -1]] if not isinstance(start_session, pd.Timestamp): start_session = pd.Timestamp(start_session, unit='m') end_session = pd.Timestamp(end_session, unit='m') start_session = start_session.normalize() end_session = end_session.normalize() if freq == '1d': all_sessions = calendar.sessions_in_range(start_session, end_session).tz_localize(None) df = data.reindex(all_sessions, copy=False) df = fillna(df) df.id.fillna(method='pad', inplace=True) df.id.fillna(method="bfill", inplace=True) df.day = df.index.values.astype('datetime64[m]').astype(np.int64) else: all_sessions = calendar.minutes_for_sessions_in_range(start_session, end_session).tz_localize(None) data.index = data.index.tz_localize(pytz.timezone('Asia/Shanghai')).tz_convert('UTC').tz_localize(None) df = data.reindex(all_sessions, copy=False) df = fillna(df) return df
def gen_symbols_data(symbol_map, freq='1d'): if not session_bars.has_table(SESSION_BAR_TABLE): Base.metadata.create_all( session_bars.connect(), checkfirst=True, tables=[Base.metadata.tables[SESSION_BAR_TABLE]]) func = partial(fetch_single_equity, eg) now = pd.to_datetime('now', utc=True) if end_session >= now.normalize(): end = now.normalize() if now.tz_convert('Asia/Shanghai').time() < datetime.time(15, 5): end = end - pd.Timedelta('1 D') else: end = end_session end_idx = calendar.all_sessions.searchsorted(end) if calendar.all_sessions[end_idx] > end: end = calendar.all_sessions[end_idx - 1] for index, symbol in symbol_map.iteritems(): try: start = pd.to_datetime(dates_json[freq][symbol], utc=True) + pd.Timedelta('1 D') start = calendar.all_sessions[ calendar.all_sessions.searchsorted(start)] if start > end: if freq == '1d' and symbol in dates_json[freq]: data = pd.read_sql( "select * from {} where id = {} order by day ASC ". format(SESSION_BAR_TABLE, int(symbol)), session_bars, index_col='day') data.index = pd.to_datetime(data.index) yield int(symbol), data else: yield int(symbol), pd.DataFrame() continue except KeyError: start = start_session if freq == '1m': single_distance = calendar.session_distance(start, end) if single_distance >= 100: func = eg.get_k_data data = reindex_to_calendar( calendar, func(symbol, start, end, freq), start_session=start, end_session=end, freq=freq, ) if data is None or data.empty: if freq == '1d' and symbol in dates_json[freq]: data = pd.read_sql( "select * from {} where id = {} order by day ASC ". format(SESSION_BAR_TABLE, int(symbol)), session_bars, index_col='day') data.index = pd.to_datetime(data.index) yield int(symbol), data continue if freq == '1d': if data.close.isnull( )[0]: # padding fill error if the first is NaN data2 = pd.read_sql( "select * from {} where id = {} order by day desc limit 1 " .format(SESSION_BAR_TABLE, int(symbol)), session_bars, index_col='day') if data2.empty: data = data[data.close.notnull()] else: data["close"][0] = data2["close"][0] fillna(data) data.to_sql(SESSION_BAR_TABLE, session_bars.connect(), if_exists='append', index_label='day') if symbol in dates_json[freq]: data = pd.read_sql( "select * from {} where id = {} order by day ASC ". format(SESSION_BAR_TABLE, int(symbol)), session_bars, index_col='day') data.index = pd.to_datetime(data.index) dates_json[freq][symbol] = data.index[-1].strftime('%Y%m%d') yield int(symbol), data with open(dates_path, 'w') as f: json.dump(dates_json, f)
def gen_symbols_data(symbol_map, freq='1d'): if not session_bars.has_table(SESSION_BAR_TABLE): Base.metadata.create_all(session_bars.connect(), checkfirst=True, tables=[Base.metadata.tables[SESSION_BAR_TABLE]]) func = partial(fetch_single_equity, eg) now = pd.to_datetime('now', utc=True) if end_session >= now.normalize(): end = now.normalize() if now.tz_convert('Asia/Shanghai').time() < datetime.time(15, 5): end = end - pd.Timedelta('1 D') else: end = end_session end_idx = calendar.all_sessions.searchsorted(end) if calendar.all_sessions[end_idx] > end: end = calendar.all_sessions[end_idx -1] for index, symbol in symbol_map.iteritems(): try: start = pd.to_datetime(dates_json[freq][symbol], utc=True) + pd.Timedelta('1 D') start = calendar.all_sessions[calendar.all_sessions.searchsorted(start)] if start > end: if freq == '1d'and symbol in dates_json[freq]: data = pd.read_sql( "select * from {} where id = {} order by day ASC ".format(SESSION_BAR_TABLE, int(symbol)), session_bars, index_col='day') data.index = pd.to_datetime(data.index) yield int(symbol), data else: yield int(symbol), pd.DataFrame() continue except KeyError: start = start_session if freq == '1m': single_distance = calendar.session_distance(start, end) if single_distance >= 100: func = eg.get_k_data data = reindex_to_calendar( calendar, func(symbol, start, end, freq), start_session=start, end_session=end, freq=freq, ) if data is None or data.empty: if freq == '1d'and symbol in dates_json[freq]: data = pd.read_sql( "select * from {} where id = {} order by day ASC ".format(SESSION_BAR_TABLE, int(symbol)), session_bars, index_col='day') data.index = pd.to_datetime(data.index) yield int(symbol), data continue if freq == '1d': if data.close.isnull()[0]: # padding fill error if the first is NaN data2 = pd.read_sql( "select * from {} where id = {} order by day desc limit 1 ".format(SESSION_BAR_TABLE, int(symbol)), session_bars, index_col='day') if data2.empty: data = data[data.close.notnull()] else: data["close"][0] = data2["close"][0] fillna(data) data.to_sql(SESSION_BAR_TABLE, session_bars.connect(), if_exists='append', index_label='day') if symbol in dates_json[freq]: data = pd.read_sql( "select * from {} where id = {} order by day ASC ".format(SESSION_BAR_TABLE, int(symbol)), session_bars, index_col='day') data.index = pd.to_datetime(data.index) dates_json[freq][symbol] = data.index[-1].strftime('%Y%m%d') yield int(symbol), data with open(dates_path, 'w') as f: json.dump(dates_json, f)