def import_csv(filename: str) -> pd.DataFrame:
        """
        Import an historical tick file created from the export_to_csv() function.

        :param filename: Full file path including filename
        :return: (panda.DataFrame) historical limit order book data
        """
        start_time = dt.now(tz=TIMEZONE)

        if 'xz' in filename:
            data = pd.read_csv(filepath_or_buffer=filename,
                               index_col=0,
                               compression='xz',
                               engine='c')
        elif 'csv' in filename:
            data = pd.read_csv(filepath_or_buffer=filename,
                               index_col=0,
                               engine='c')
        else:
            LOGGER.warn('Error: file must be a csv or xz')
            data = None

        elapsed = (dt.now(tz=TIMEZONE) - start_time).seconds
        LOGGER.info('Imported %s from a csv in %i seconds' %
                    (filename[-25:], elapsed))
        return data
Beispiel #2
0
    def match(self, msg: dict) -> None:
        """
        Change volume of book.

        :param msg: incoming order message
        """
        msg_order_id = msg.get('maker_order_id', None)
        if msg_order_id in self.order_map:
            old_order = self.order_map[msg_order_id]
            order = {
                'order_id': msg_order_id,
                'price': float(msg['price']),
                'size': float(msg['size']),
                'side': msg['side'],
                'time': msg['time'],
                'type': msg['type'],
                'product_id': msg['product_id']
            }
            price = order['price']
            if price in self.price_dict:
                remove_size = order['size']
                remaining_size = old_order['size'] - remove_size
                order['size'] = remaining_size
                self.order_map[old_order['order_id']] = order
                old_order_price = old_order.get('price', None)
                self.price_dict[price].add_market(quantity=remove_size,
                                                  price=old_order_price)
                self.price_dict[price].remove_quantity(quantity=remove_size,
                                                       price=old_order_price)
            else:
                LOGGER.info('\nmatch: price not in tree already [%s]\n' % msg)
        elif RECORD_DATA:
            LOGGER.warn('\n%s match: order id cannot be found for %s\n' %
                        (self.sym, msg))
Beispiel #3
0
    def _process_book(self, msg):
        """
        Internal method to process FULL BOOK market data
        :param msg: incoming tick
        :return: False if re-subscribe is required
        """
        # check for a heartbeat
        if msg[1] == 'hb':
            # render_book('heart beat %s' % msg)
            return True

        # order book message (initial snapshot)
        elif np.shape(msg[1])[0] > 3:
            LOGGER.info('%s loading book...' % self.sym)
            self.clear_book()
            self._load_book(msg)
            return True

        else:
            # else, the incoming message is a order update
            order = {
                "order_id": int(msg[1][0]),
                "price": float(msg[1][1]),
                "size": float(abs(msg[1][2])),
                "side": 'sell' if float(msg[1][2]) < float(0) else 'buy',
                "product_id": self.sym,
                "type": 'update'
            }

            self.db.new_tick(order)

            # order should be removed from the book
            if order['price'] == 0.:
                if order['side'] == 'buy':
                    self.bids.remove_order(order)
                elif order['side'] == 'sell':
                    self.asks.remove_order(order)

            # order is a new order or size update for bids
            elif order['side'] == 'buy':
                if order['order_id'] in self.bids.order_map:
                    self.bids.change(order)
                else:
                    self.bids.insert_order(order)

            # order is a new order or size update for asks
            elif order['side'] == 'sell':
                if order['order_id'] in self.asks.order_map:
                    self.asks.change(order)
                else:
                    self.asks.insert_order(order)

            # unhandled msg
            else:
                LOGGER.warn('\nUnhandled list msg %s' % msg)

            return True
Beispiel #4
0
    def _process_book_replay(self, order):
        """
        Internal method to process FULL BOOK market data
        :param order: incoming tick
        :return: False if resubscription in required
        """
        # clean up the datatypes
        order['price'] = float(order['price'])
        order['size'] = float(order['size'])

        if order['type'] == 'update':
            # order should be removed from the book
            if order['price'] == float(0):
                if order['side'] == 'buy':
                    self.bids.remove_order(order)
                elif order['side'] == 'sell':
                    self.asks.remove_order(order)
            # order is a new order or size update for bids
            elif order['side'] == 'buy':
                if order['order_id'] in self.bids.order_map:
                    self.bids.change(order)
                else:
                    self.bids.insert_order(order)
            # order is a new order or size update for asks
            elif order['side'] == 'sell':
                if order['order_id'] in self.asks.order_map:
                    self.asks.change(order)
                else:
                    self.asks.insert_order(order)
            # unhandled tick message
            else:
                LOGGER.warn('_process_book_replay: unhandled message\n%s' % str(order))

        elif order['type'] == 'preload':
            if order['side'] == 'buy':
                self.bids.insert_order(order)
            else:
                self.asks.insert_order(order)

        elif order['type'] == 'te':
            trade_notional = order['price'] * order['size']
            if order['side'] == 'upticks':
                self.buy_tracker.add(notional=trade_notional)
                self.asks.match(order)
            else:
                self.sell_tracker.add(notional=trade_notional)
                self.bids.match(order)

        else:
            LOGGER.warn('\n_process_book_replay() Unhandled list msg %s' % order)

        return True
Beispiel #5
0
    def init_db_connection(self) -> None:
        """
        Initiate database connection to Arctic.

        :return: (void)
        """
        LOGGER.info("init_db_connection for {}...".format(self.sym))
        try:
            self.db = Arctic(MONGO_ENDPOINT)
            self.db.initialize_library(ARCTIC_NAME, lib_type=TICK_STORE)
            self.collection = self.db[ARCTIC_NAME]
        except PyMongoError as e:
            LOGGER.warn("Database.PyMongoError() --> {}".format(e))
Beispiel #6
0
    async def subscribe(self) -> None:
        """
        Subscribe to full order book.

        :return: (void)
        """
        try:
            self.ws = await websockets.connect(self.ws_endpoint)

            if self.request is not None:
                LOGGER.info('Requesting Book: {}'.format(self.request))
                await self.ws.send(self.request)
                LOGGER.info('BOOK %s: %s subscription request sent.' %
                            (self.exchange.upper(), self.sym))

            if self.trades_request is not None:
                LOGGER.info('Requesting Trades: {}'.format(
                    self.trades_request))
                await self.ws.send(self.trades_request)
                LOGGER.info('TRADES %s: %s subscription request sent.' %
                            (self.exchange.upper(), self.sym))

            self.last_subscribe_time = dt.now()

            # add incoming messages to a queue, which is consumed and processed
            # in the run() method.
            while True:
                self.queue.put(json.loads(await self.ws.recv()))

        except websockets.ConnectionClosed as exception:
            LOGGER.warn('%s: subscription exception %s' %
                        (self.exchange, exception))
            self.retry_counter += 1
            elapsed = (dt.now() - self.last_subscribe_time).seconds

            if elapsed < 10:
                sleep_time = max(10 - elapsed, 1)
                time.sleep(sleep_time)
                LOGGER.info('%s - %s is sleeping %i seconds...' %
                            (self.exchange, self.sym, sleep_time))

            if self.retry_counter < self.max_retries:
                LOGGER.info('%s: Retrying to connect... attempted #%i' %
                            (self.exchange, self.retry_counter))
                await self.subscribe()  # recursion
            else:
                LOGGER.warn('%s: %s Ran out of reconnection attempts. '
                            'Have already tried %i times.' %
                            (self.exchange, self.sym, self.retry_counter))
Beispiel #7
0
    def _query_arctic(self, ccy: str, start_date: int,
                      end_date: int) -> Union[pd.DataFrame, None]:
        """
        Query database and return LOB messages starting from LOB reconstruction.

        :param ccy: currency symbol
        :param start_date: YYYYMMDD start date
        :param end_date: YYYYMMDD end date
        :return: (pd.DataFrame) results found in database
        """
        assert self.collection is not None, \
            "Arctic.Collection() must not be null."

        start_time = dt.now(tz=self.tz)

        try:
            LOGGER.info(
                '\nGetting {} data from Arctic Tick Store...'.format(ccy))
            cursor = self.collection.read(symbol=ccy,
                                          date_range=DateRange(
                                              start_date, end_date))

            # filter ticks for the first LOAD_BOOK message
            #   (starting point for order book reconstruction)
            # min_datetime = cursor.loc[cursor.type == 'load_book'].index[0]
            dates = np.unique(
                cursor.loc[cursor.type == 'load_book'].index.date)
            start_index = cursor.loc[((cursor.index.date == dates[0]) &
                                      (cursor.type == 'load_book'))].index[-1]
            # cursor = cursor.loc[cursor.index >= min_datetime]
            cursor = cursor.loc[cursor.index >= start_index]

            elapsed = (dt.now(tz=self.tz) - start_time).seconds
            LOGGER.info('Completed querying %i %s records in %i seconds' %
                        (cursor.shape[0], ccy, elapsed))

        except Exception as ex:
            cursor = None
            LOGGER.warn('Simulator._query_arctic() thew an exception: \n%s' %
                        str(ex))

        return cursor
    def new_tick(self, msg: dict):
        """
        Method to process incoming ticks.

        :param msg: incoming tick
        :return: False if there is an exception (or need to reconnect the WebSocket)
        """
        # check for data messages, which only come in lists
        if isinstance(msg, list):
            if msg[0] == self.channel_id['book']:
                return self._process_book(msg)
            elif msg[0] == self.channel_id['trades']:
                return self._process_trades(msg)

        # non-data messages
        elif isinstance(msg, dict):
            if 'event' in msg:
                return self._process_events(msg)
            elif msg['type'] == 'te':
                self.last_tick_time = msg.get('system_time', None)
                return self._process_trades_replay(msg)
            elif msg['type'] in ['update', 'preload']:
                self.last_tick_time = msg.get('system_time', None)
                return self._process_book_replay(msg)
            elif msg['type'] == 'load_book':
                self.clear_book()
                return True
            elif msg['type'] == 'book_loaded':
                self.bids.warming_up = False
                self.asks.warming_up = False
                return True
            else:
                LOGGER.info(
                    'new_tick() message does not know how to be processed = %s'
                    % str(msg))

        # unhandled exception
        else:
            LOGGER.warn('unhandled exception\n%s\n' % msg)
            return True
Beispiel #9
0
    def new_tick(self, msg: dict) -> bool:
        """
        Method to process incoming ticks.

        :param msg: incoming tick
        :return: False if there is an exception
        """
        message_type = msg['type']
        if 'sequence' not in msg:
            if message_type == 'subscriptions':
                # request an order book snapshot after the
                #   websocket feed is established
                LOGGER.info('Coinbase Subscriptions successful for : %s' % self.sym)
                self.load_book()
            return True
        elif np.isnan(msg['sequence']):
            # this situation appears during data replays
            #   (and not in live data feeds)
            LOGGER.warn('\n%s found a nan in the sequence' % self.sym)
            return True

        # check the incoming message sequence to verify if there
        # is a dropped/missed message.
        # If so, request a new orderbook snapshot from Coinbase Pro.
        new_sequence = int(msg['sequence'])
        self.diff = new_sequence - self.sequence

        if self.diff == 1:
            # tick sequences increase by an increment of one
            self.sequence = new_sequence
        elif message_type in ['load_book', 'book_loaded', 'preload']:
            # message types used for data replays
            self.sequence = new_sequence
        elif self.diff <= 0:
            if message_type in ['received', 'open', 'done', 'match', 'change']:
                LOGGER.info('%s [%s] has a stale tick: current %i | incoming %i' % (
                    self.sym, message_type, self.sequence, new_sequence))
                return True
            else:
                LOGGER.warn('UNKNOWN-%s %s has a stale tick: current %i | incoming %i' % (
                    self.sym, message_type, self.sequence, new_sequence))
                return True
        else:  # when the tick sequence difference is greater than 1
            LOGGER.info('sequence gap: %s missing %i messages. new_sequence: %i [%s]\n' %
                        (self.sym, self.diff, new_sequence, message_type))
            self.sequence = new_sequence
            return False

        # persist data to Arctic Tick Store
        self.db.new_tick(msg)
        self.last_tick_time = msg.get('time', None)
        # make sure CONFIGS.RECORDING is false when replaying data

        side = msg['side']
        if message_type == 'received':
            return True

        elif message_type == 'open':
            if side == 'buy':
                self.bids.insert_order(msg)
                return True
            else:
                self.asks.insert_order(msg)
                return True

        elif message_type == 'done':
            if side == 'buy':
                self.bids.remove_order(msg)
                return True
            else:
                self.asks.remove_order(msg)
                return True

        elif message_type == 'match':
            trade_notional = float(msg['price']) * float(msg['size'])
            if side == 'buy':  # trades matched on the bids book are considered sells
                self.sell_tracker.add(notional=trade_notional)
                self.bids.match(msg)
                return True
            else:  # trades matched on the asks book are considered buys
                self.buy_tracker.add(notional=trade_notional)
                self.asks.match(msg)
                return True

        elif message_type == 'change':
            if side == 'buy':
                self.bids.change(msg)
                return True
            else:
                self.asks.change(msg)
                return True

        elif message_type == 'preload':
            if side == 'buy':
                self.bids.insert_order(msg)
                return True
            else:
                self.asks.insert_order(msg)
                return True

        elif message_type == 'load_book':
            self.clear_book()
            return True

        elif message_type == 'book_loaded':
            self.bids.warming_up = self.asks.warming_up = False
            LOGGER.info("Book finished loading at {}".format(self.last_tick_time))
            return True

        else:
            LOGGER.warn('\n\n\nunhandled message type\n%s\n\n' % str(msg))
            return False
Beispiel #10
0
    def get_orderbook_snapshot_history(self,
                                       query: dict) -> pd.DataFrame or None:
        """
        Function to replay historical market data and generate the features used for
        reinforcement learning & training.

        NOTE:
            The query can either be a single Coinbase CCY, or both Coinbase and Bitfinex,
            but it cannot be only a Bitfinex CCY. Later releases of this repo will
            support Bitfinex only order book reconstruction.

        :param query: (dict) query for finding tick history in Arctic TickStore
        :return: (pd.DataFrame) snapshots of limit order books using a
                stationary feature set
        """
        self.db.init_db_connection()

        tick_history = self.db.get_tick_history(query=query)
        if tick_history is None:
            LOGGER.warn("Query returned no data: {}".format(query))
            return None

        loop_length = tick_history.shape[0]

        # number of microseconds between LOB snapshots
        snapshot_interval_milliseconds = SNAPSHOT_RATE_IN_MICROSECONDS // 1000

        snapshot_list = list()
        last_snapshot_time = None
        tick_types_for_warm_up = {'load_book', 'book_loaded', 'preload'}

        instrument_name = query['ccy'][0]
        assert isinstance(instrument_name, str), \
            "Error: instrument_name must be a string, not -> {}".format(
                type(instrument_name))

        LOGGER.info('querying {}'.format(instrument_name))

        order_book = get_orderbook_from_symbol(symbol=instrument_name)(
            sym=instrument_name)

        start_time = dt.now(TIMEZONE)
        LOGGER.info(
            'Starting get_orderbook_snapshot_history() loop with %i ticks for %s'
            % (loop_length, query['ccy']))

        # loop through all ticks returned from the Arctic Tick Store query.
        for count, tx in enumerate(tick_history.itertuples()):

            # periodically print number of steps completed
            if count % 250000 == 0:
                elapsed = (dt.now(TIMEZONE) - start_time).seconds
                LOGGER.info('...completed %i loops in %i seconds' %
                            (count, elapsed))

            # convert to dictionary for processing
            tick = tx._asdict()

            # filter out bad ticks
            if 'type' not in tick:
                continue

            # flags for a order book reset
            if tick['type'] in tick_types_for_warm_up:
                order_book.new_tick(msg=tick)
                continue

            # check if the LOB is pre-loaded, if not skip message and do NOT process.
            if order_book.done_warming_up is False:
                LOGGER.info("{} order book is not done warming up: {}".format(
                    instrument_name, tick))
                continue

            # timestamp for incoming tick
            new_tick_time = parse(tick.get('system_time'))

            # remove ticks without timestamps (should not exist/happen)
            if new_tick_time is None:
                LOGGER.info('No tick time: {}'.format(tick))
                continue

            # initialize the LOB snapshot timer
            if last_snapshot_time is None:
                # process first ticks and check if they're stale ticks; if so,
                # skip to the next loop.
                order_book.new_tick(tick)

                last_tick_time = order_book.last_tick_time
                if last_tick_time is None:
                    continue

                last_tick_time_dt = parse(last_tick_time)
                last_snapshot_time = last_tick_time_dt
                LOGGER.info('{} first tick: {} '.format(
                    order_book.sym, new_tick_time))
                # skip to next loop
                continue

            # calculate the amount of time between the incoming
            #   tick and tick received before that
            diff = self._get_microsecond_delta(new_tick_time,
                                               last_snapshot_time)

            # update the LOB, but do not take a LOB snapshot if the tick time is
            # out of sequence. This occurs when pre-loading a LOB with stale tick
            # times in general.
            if diff == -1:
                order_book.new_tick(msg=tick)
                continue

            # derive the number of LOB snapshot insertions for the data buffer.
            multiple = diff // SNAPSHOT_RATE_IN_MICROSECONDS  # 1000000 is 1 second

            # proceed if we have one or more insertions to make
            if multiple <= 0:
                order_book.new_tick(msg=tick)
                continue

            order_book_snapshot = order_book.render_book()
            for i in range(multiple):
                last_snapshot_time += timedelta(
                    milliseconds=snapshot_interval_milliseconds)
                snapshot_list.append(
                    np.hstack((last_snapshot_time, order_book_snapshot)))

            # update order book with most recent tick now, so the snapshots
            # are up to date for the next iteration of the loop.
            order_book.new_tick(msg=tick)
            continue

        elapsed = max((dt.now(TIMEZONE) - start_time).seconds, 1)
        LOGGER.info('Completed run_simulation() with %i ticks in %i seconds '
                    'at %i ticks/second' %
                    (loop_length, elapsed, loop_length // elapsed))

        orderbook_snapshot_history = pd.DataFrame(
            data=snapshot_list,
            columns=['system_time'] + order_book.render_lob_feature_names())

        # remove NAs from data set (and print the amount)
        before_shape = orderbook_snapshot_history.shape[0]
        orderbook_snapshot_history = orderbook_snapshot_history.dropna(axis=0)
        difference_in_records = orderbook_snapshot_history.shape[
            0] - before_shape
        LOGGER.info("{} {} rows due to NA values".format(
            'Dropping' if difference_in_records <= 0 else 'Adding',
            abs(difference_in_records)))

        return orderbook_snapshot_history