def import_csv(filename: str) -> pd.DataFrame: """ Import an historical tick file created from the export_to_csv() function. :param filename: Full file path including filename :return: (panda.DataFrame) historical limit order book data """ start_time = dt.now(tz=TIMEZONE) if 'xz' in filename: data = pd.read_csv(filepath_or_buffer=filename, index_col=0, compression='xz', engine='c') elif 'csv' in filename: data = pd.read_csv(filepath_or_buffer=filename, index_col=0, engine='c') else: LOGGER.warn('Error: file must be a csv or xz') data = None elapsed = (dt.now(tz=TIMEZONE) - start_time).seconds LOGGER.info('Imported %s from a csv in %i seconds' % (filename[-25:], elapsed)) return data
def match(self, msg: dict) -> None: """ Change volume of book. :param msg: incoming order message """ msg_order_id = msg.get('maker_order_id', None) if msg_order_id in self.order_map: old_order = self.order_map[msg_order_id] order = { 'order_id': msg_order_id, 'price': float(msg['price']), 'size': float(msg['size']), 'side': msg['side'], 'time': msg['time'], 'type': msg['type'], 'product_id': msg['product_id'] } price = order['price'] if price in self.price_dict: remove_size = order['size'] remaining_size = old_order['size'] - remove_size order['size'] = remaining_size self.order_map[old_order['order_id']] = order old_order_price = old_order.get('price', None) self.price_dict[price].add_market(quantity=remove_size, price=old_order_price) self.price_dict[price].remove_quantity(quantity=remove_size, price=old_order_price) else: LOGGER.info('\nmatch: price not in tree already [%s]\n' % msg) elif RECORD_DATA: LOGGER.warn('\n%s match: order id cannot be found for %s\n' % (self.sym, msg))
def _process_book(self, msg): """ Internal method to process FULL BOOK market data :param msg: incoming tick :return: False if re-subscribe is required """ # check for a heartbeat if msg[1] == 'hb': # render_book('heart beat %s' % msg) return True # order book message (initial snapshot) elif np.shape(msg[1])[0] > 3: LOGGER.info('%s loading book...' % self.sym) self.clear_book() self._load_book(msg) return True else: # else, the incoming message is a order update order = { "order_id": int(msg[1][0]), "price": float(msg[1][1]), "size": float(abs(msg[1][2])), "side": 'sell' if float(msg[1][2]) < float(0) else 'buy', "product_id": self.sym, "type": 'update' } self.db.new_tick(order) # order should be removed from the book if order['price'] == 0.: if order['side'] == 'buy': self.bids.remove_order(order) elif order['side'] == 'sell': self.asks.remove_order(order) # order is a new order or size update for bids elif order['side'] == 'buy': if order['order_id'] in self.bids.order_map: self.bids.change(order) else: self.bids.insert_order(order) # order is a new order or size update for asks elif order['side'] == 'sell': if order['order_id'] in self.asks.order_map: self.asks.change(order) else: self.asks.insert_order(order) # unhandled msg else: LOGGER.warn('\nUnhandled list msg %s' % msg) return True
def _process_book_replay(self, order): """ Internal method to process FULL BOOK market data :param order: incoming tick :return: False if resubscription in required """ # clean up the datatypes order['price'] = float(order['price']) order['size'] = float(order['size']) if order['type'] == 'update': # order should be removed from the book if order['price'] == float(0): if order['side'] == 'buy': self.bids.remove_order(order) elif order['side'] == 'sell': self.asks.remove_order(order) # order is a new order or size update for bids elif order['side'] == 'buy': if order['order_id'] in self.bids.order_map: self.bids.change(order) else: self.bids.insert_order(order) # order is a new order or size update for asks elif order['side'] == 'sell': if order['order_id'] in self.asks.order_map: self.asks.change(order) else: self.asks.insert_order(order) # unhandled tick message else: LOGGER.warn('_process_book_replay: unhandled message\n%s' % str(order)) elif order['type'] == 'preload': if order['side'] == 'buy': self.bids.insert_order(order) else: self.asks.insert_order(order) elif order['type'] == 'te': trade_notional = order['price'] * order['size'] if order['side'] == 'upticks': self.buy_tracker.add(notional=trade_notional) self.asks.match(order) else: self.sell_tracker.add(notional=trade_notional) self.bids.match(order) else: LOGGER.warn('\n_process_book_replay() Unhandled list msg %s' % order) return True
def init_db_connection(self) -> None: """ Initiate database connection to Arctic. :return: (void) """ LOGGER.info("init_db_connection for {}...".format(self.sym)) try: self.db = Arctic(MONGO_ENDPOINT) self.db.initialize_library(ARCTIC_NAME, lib_type=TICK_STORE) self.collection = self.db[ARCTIC_NAME] except PyMongoError as e: LOGGER.warn("Database.PyMongoError() --> {}".format(e))
async def subscribe(self) -> None: """ Subscribe to full order book. :return: (void) """ try: self.ws = await websockets.connect(self.ws_endpoint) if self.request is not None: LOGGER.info('Requesting Book: {}'.format(self.request)) await self.ws.send(self.request) LOGGER.info('BOOK %s: %s subscription request sent.' % (self.exchange.upper(), self.sym)) if self.trades_request is not None: LOGGER.info('Requesting Trades: {}'.format( self.trades_request)) await self.ws.send(self.trades_request) LOGGER.info('TRADES %s: %s subscription request sent.' % (self.exchange.upper(), self.sym)) self.last_subscribe_time = dt.now() # add incoming messages to a queue, which is consumed and processed # in the run() method. while True: self.queue.put(json.loads(await self.ws.recv())) except websockets.ConnectionClosed as exception: LOGGER.warn('%s: subscription exception %s' % (self.exchange, exception)) self.retry_counter += 1 elapsed = (dt.now() - self.last_subscribe_time).seconds if elapsed < 10: sleep_time = max(10 - elapsed, 1) time.sleep(sleep_time) LOGGER.info('%s - %s is sleeping %i seconds...' % (self.exchange, self.sym, sleep_time)) if self.retry_counter < self.max_retries: LOGGER.info('%s: Retrying to connect... attempted #%i' % (self.exchange, self.retry_counter)) await self.subscribe() # recursion else: LOGGER.warn('%s: %s Ran out of reconnection attempts. ' 'Have already tried %i times.' % (self.exchange, self.sym, self.retry_counter))
def _query_arctic(self, ccy: str, start_date: int, end_date: int) -> Union[pd.DataFrame, None]: """ Query database and return LOB messages starting from LOB reconstruction. :param ccy: currency symbol :param start_date: YYYYMMDD start date :param end_date: YYYYMMDD end date :return: (pd.DataFrame) results found in database """ assert self.collection is not None, \ "Arctic.Collection() must not be null." start_time = dt.now(tz=self.tz) try: LOGGER.info( '\nGetting {} data from Arctic Tick Store...'.format(ccy)) cursor = self.collection.read(symbol=ccy, date_range=DateRange( start_date, end_date)) # filter ticks for the first LOAD_BOOK message # (starting point for order book reconstruction) # min_datetime = cursor.loc[cursor.type == 'load_book'].index[0] dates = np.unique( cursor.loc[cursor.type == 'load_book'].index.date) start_index = cursor.loc[((cursor.index.date == dates[0]) & (cursor.type == 'load_book'))].index[-1] # cursor = cursor.loc[cursor.index >= min_datetime] cursor = cursor.loc[cursor.index >= start_index] elapsed = (dt.now(tz=self.tz) - start_time).seconds LOGGER.info('Completed querying %i %s records in %i seconds' % (cursor.shape[0], ccy, elapsed)) except Exception as ex: cursor = None LOGGER.warn('Simulator._query_arctic() thew an exception: \n%s' % str(ex)) return cursor
def new_tick(self, msg: dict): """ Method to process incoming ticks. :param msg: incoming tick :return: False if there is an exception (or need to reconnect the WebSocket) """ # check for data messages, which only come in lists if isinstance(msg, list): if msg[0] == self.channel_id['book']: return self._process_book(msg) elif msg[0] == self.channel_id['trades']: return self._process_trades(msg) # non-data messages elif isinstance(msg, dict): if 'event' in msg: return self._process_events(msg) elif msg['type'] == 'te': self.last_tick_time = msg.get('system_time', None) return self._process_trades_replay(msg) elif msg['type'] in ['update', 'preload']: self.last_tick_time = msg.get('system_time', None) return self._process_book_replay(msg) elif msg['type'] == 'load_book': self.clear_book() return True elif msg['type'] == 'book_loaded': self.bids.warming_up = False self.asks.warming_up = False return True else: LOGGER.info( 'new_tick() message does not know how to be processed = %s' % str(msg)) # unhandled exception else: LOGGER.warn('unhandled exception\n%s\n' % msg) return True
def new_tick(self, msg: dict) -> bool: """ Method to process incoming ticks. :param msg: incoming tick :return: False if there is an exception """ message_type = msg['type'] if 'sequence' not in msg: if message_type == 'subscriptions': # request an order book snapshot after the # websocket feed is established LOGGER.info('Coinbase Subscriptions successful for : %s' % self.sym) self.load_book() return True elif np.isnan(msg['sequence']): # this situation appears during data replays # (and not in live data feeds) LOGGER.warn('\n%s found a nan in the sequence' % self.sym) return True # check the incoming message sequence to verify if there # is a dropped/missed message. # If so, request a new orderbook snapshot from Coinbase Pro. new_sequence = int(msg['sequence']) self.diff = new_sequence - self.sequence if self.diff == 1: # tick sequences increase by an increment of one self.sequence = new_sequence elif message_type in ['load_book', 'book_loaded', 'preload']: # message types used for data replays self.sequence = new_sequence elif self.diff <= 0: if message_type in ['received', 'open', 'done', 'match', 'change']: LOGGER.info('%s [%s] has a stale tick: current %i | incoming %i' % ( self.sym, message_type, self.sequence, new_sequence)) return True else: LOGGER.warn('UNKNOWN-%s %s has a stale tick: current %i | incoming %i' % ( self.sym, message_type, self.sequence, new_sequence)) return True else: # when the tick sequence difference is greater than 1 LOGGER.info('sequence gap: %s missing %i messages. new_sequence: %i [%s]\n' % (self.sym, self.diff, new_sequence, message_type)) self.sequence = new_sequence return False # persist data to Arctic Tick Store self.db.new_tick(msg) self.last_tick_time = msg.get('time', None) # make sure CONFIGS.RECORDING is false when replaying data side = msg['side'] if message_type == 'received': return True elif message_type == 'open': if side == 'buy': self.bids.insert_order(msg) return True else: self.asks.insert_order(msg) return True elif message_type == 'done': if side == 'buy': self.bids.remove_order(msg) return True else: self.asks.remove_order(msg) return True elif message_type == 'match': trade_notional = float(msg['price']) * float(msg['size']) if side == 'buy': # trades matched on the bids book are considered sells self.sell_tracker.add(notional=trade_notional) self.bids.match(msg) return True else: # trades matched on the asks book are considered buys self.buy_tracker.add(notional=trade_notional) self.asks.match(msg) return True elif message_type == 'change': if side == 'buy': self.bids.change(msg) return True else: self.asks.change(msg) return True elif message_type == 'preload': if side == 'buy': self.bids.insert_order(msg) return True else: self.asks.insert_order(msg) return True elif message_type == 'load_book': self.clear_book() return True elif message_type == 'book_loaded': self.bids.warming_up = self.asks.warming_up = False LOGGER.info("Book finished loading at {}".format(self.last_tick_time)) return True else: LOGGER.warn('\n\n\nunhandled message type\n%s\n\n' % str(msg)) return False
def get_orderbook_snapshot_history(self, query: dict) -> pd.DataFrame or None: """ Function to replay historical market data and generate the features used for reinforcement learning & training. NOTE: The query can either be a single Coinbase CCY, or both Coinbase and Bitfinex, but it cannot be only a Bitfinex CCY. Later releases of this repo will support Bitfinex only order book reconstruction. :param query: (dict) query for finding tick history in Arctic TickStore :return: (pd.DataFrame) snapshots of limit order books using a stationary feature set """ self.db.init_db_connection() tick_history = self.db.get_tick_history(query=query) if tick_history is None: LOGGER.warn("Query returned no data: {}".format(query)) return None loop_length = tick_history.shape[0] # number of microseconds between LOB snapshots snapshot_interval_milliseconds = SNAPSHOT_RATE_IN_MICROSECONDS // 1000 snapshot_list = list() last_snapshot_time = None tick_types_for_warm_up = {'load_book', 'book_loaded', 'preload'} instrument_name = query['ccy'][0] assert isinstance(instrument_name, str), \ "Error: instrument_name must be a string, not -> {}".format( type(instrument_name)) LOGGER.info('querying {}'.format(instrument_name)) order_book = get_orderbook_from_symbol(symbol=instrument_name)( sym=instrument_name) start_time = dt.now(TIMEZONE) LOGGER.info( 'Starting get_orderbook_snapshot_history() loop with %i ticks for %s' % (loop_length, query['ccy'])) # loop through all ticks returned from the Arctic Tick Store query. for count, tx in enumerate(tick_history.itertuples()): # periodically print number of steps completed if count % 250000 == 0: elapsed = (dt.now(TIMEZONE) - start_time).seconds LOGGER.info('...completed %i loops in %i seconds' % (count, elapsed)) # convert to dictionary for processing tick = tx._asdict() # filter out bad ticks if 'type' not in tick: continue # flags for a order book reset if tick['type'] in tick_types_for_warm_up: order_book.new_tick(msg=tick) continue # check if the LOB is pre-loaded, if not skip message and do NOT process. if order_book.done_warming_up is False: LOGGER.info("{} order book is not done warming up: {}".format( instrument_name, tick)) continue # timestamp for incoming tick new_tick_time = parse(tick.get('system_time')) # remove ticks without timestamps (should not exist/happen) if new_tick_time is None: LOGGER.info('No tick time: {}'.format(tick)) continue # initialize the LOB snapshot timer if last_snapshot_time is None: # process first ticks and check if they're stale ticks; if so, # skip to the next loop. order_book.new_tick(tick) last_tick_time = order_book.last_tick_time if last_tick_time is None: continue last_tick_time_dt = parse(last_tick_time) last_snapshot_time = last_tick_time_dt LOGGER.info('{} first tick: {} '.format( order_book.sym, new_tick_time)) # skip to next loop continue # calculate the amount of time between the incoming # tick and tick received before that diff = self._get_microsecond_delta(new_tick_time, last_snapshot_time) # update the LOB, but do not take a LOB snapshot if the tick time is # out of sequence. This occurs when pre-loading a LOB with stale tick # times in general. if diff == -1: order_book.new_tick(msg=tick) continue # derive the number of LOB snapshot insertions for the data buffer. multiple = diff // SNAPSHOT_RATE_IN_MICROSECONDS # 1000000 is 1 second # proceed if we have one or more insertions to make if multiple <= 0: order_book.new_tick(msg=tick) continue order_book_snapshot = order_book.render_book() for i in range(multiple): last_snapshot_time += timedelta( milliseconds=snapshot_interval_milliseconds) snapshot_list.append( np.hstack((last_snapshot_time, order_book_snapshot))) # update order book with most recent tick now, so the snapshots # are up to date for the next iteration of the loop. order_book.new_tick(msg=tick) continue elapsed = max((dt.now(TIMEZONE) - start_time).seconds, 1) LOGGER.info('Completed run_simulation() with %i ticks in %i seconds ' 'at %i ticks/second' % (loop_length, elapsed, loop_length // elapsed)) orderbook_snapshot_history = pd.DataFrame( data=snapshot_list, columns=['system_time'] + order_book.render_lob_feature_names()) # remove NAs from data set (and print the amount) before_shape = orderbook_snapshot_history.shape[0] orderbook_snapshot_history = orderbook_snapshot_history.dropna(axis=0) difference_in_records = orderbook_snapshot_history.shape[ 0] - before_shape LOGGER.info("{} {} rows due to NA values".format( 'Dropping' if difference_in_records <= 0 else 'Adding', abs(difference_in_records))) return orderbook_snapshot_history