Beispiel #1
0
    def __init__(self, transaction_fee=LIMIT_ORDER_FEE, **kwargs):
        """
        Environment designed for automated market making.
        :param kwargs: refer to BaseEnvironment.py
        """
        super(MarketMaker, self).__init__(**kwargs)

        # environment attributes to override in sub-class
        self.actions = np.eye(17, dtype=np.float32)

        # get Broker class to keep track of PnL and orders
        self.broker = Broker(max_position=self.max_position,
                             transaction_fee=transaction_fee)

        self.action_space = spaces.Discrete(len(self.actions))
        self.reset()  # reset to load observation.shape
        self.observation_space = spaces.Box(low=-10,
                                            high=10,
                                            shape=self.observation.shape,
                                            dtype=np.float32)

        print(
            '{} MarketMaker #{} instantiated\nobservation_space: {}'.format(
                self.sym, self._seed, self.observation_space.shape),
            'reward_type = {}'.format(self.reward_type))
Beispiel #2
0
    def test_case_one(self):
        print('\nTest_Case_One')

        test_position = Broker()
        midpoint = 100.
        fee = .003

        order_open = Order(ccy='BTC-USD', side='long', price=midpoint, step=1)
        test_position.add(order=order_open)

        self.assertEqual(1, test_position.long_inventory.position_count)
        print('LONG Unrealized_pnl: %f' %
              test_position.long_inventory.get_unrealized_pnl())

        self.assertEqual(0, test_position.short_inventory.position_count)
        self.assertEqual(0.,
                         test_position.short_inventory.get_unrealized_pnl())

        order_close = Order(ccy='BTC-USD',
                            side='long',
                            price=midpoint + (midpoint * fee * 5),
                            step=100)

        test_position.remove(order=order_close)
        self.assertEqual(0, test_position.long_inventory.position_count)
        print('LONG Unrealized_pnl: %f' %
              test_position.long_inventory.get_unrealized_pnl())

        self.assertEqual(test_position.short_inventory.position_count, 0)
        self.assertEqual(test_position.short_inventory.get_unrealized_pnl(),
                         0.)
        print('LONG Realized_pnl: %f' % test_position.get_realized_pnl())
Beispiel #3
0
    def test_case_two(self):
        print('\nTest_Case_Two')

        test_position = Broker()
        midpoint = 100.
        fee = .003

        order_open = Order(ccy='BTC-USD', side='short', price=midpoint, step=1)
        test_position.add(order=order_open)
        self.assertEqual(1, test_position.short_inventory.position_count)
        self.assertEqual(0, test_position.long_inventory.position_count)
        self.assertEqual(0., test_position.long_inventory.get_unrealized_pnl())
        print('SHORT Unrealized_pnl: %f' %
              test_position.short_inventory.get_unrealized_pnl())

        order_close = Order(ccy='BTC-USD',
                            side='short',
                            price=midpoint - (midpoint * fee * 15),
                            step=100)
        test_position.remove(order=order_close)
        self.assertEqual(0, test_position.short_inventory.position_count)
        self.assertEqual(0, test_position.long_inventory.position_count)
        self.assertEqual(0., test_position.long_inventory.get_unrealized_pnl())
        print('SHORT Unrealized_pnl: %f' %
              test_position.short_inventory.get_unrealized_pnl())
        print('SHORT Realized_pnl: %f' % test_position.get_realized_pnl())
Beispiel #4
0
    def __init__(self,
                 *,
                 fitting_file='LTC-USD_2019-04-07.csv.xz',
                 testing_file='LTC-USD_2019-04-08.csv.xz',
                 step_size=1,
                 max_position=5,
                 window_size=10,
                 seed=1,
                 action_repeats=10,
                 training=True,
                 format_3d=False,
                 z_score=True,
                 reward_type='trade_completion',
                 scale_rewards=True):
        super(PriceJump, self).__init__(fitting_file=fitting_file,
                                        testing_file=testing_file,
                                        step_size=step_size,
                                        max_position=max_position,
                                        window_size=window_size,
                                        seed=seed,
                                        action_repeats=action_repeats,
                                        training=training,
                                        format_3d=format_3d,
                                        z_score=z_score,
                                        reward_type=reward_type,
                                        scale_rewards=scale_rewards)

        self.actions = np.eye(3, dtype=np.float32)

        # get Broker class to keep track of PnL and orders
        self.broker = Broker(max_position=max_position,
                             transaction_fee=MARKET_ORDER_FEE)

        self.action_space = spaces.Discrete(len(self.actions))
        self.reset()  # reset to load observation.shape
        self.observation_space = spaces.Box(low=-10,
                                            high=10,
                                            shape=self.observation.shape,
                                            dtype=np.float32)

        print(
            '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}'
            .format(self.sym, self._seed, self.observation_space.shape))
Beispiel #5
0
    def test_case_three(self):
        print('\nTest_Case_Three')

        test_position = Broker(5)
        midpoint = 100.

        for i in range(10):
            order_open = MarketOrder(ccy='BTC-USD',
                                     side='long',
                                     price=midpoint - i,
                                     step=i)
            test_position.add(order=order_open)

        self.assertEqual(5, test_position.long_inventory.position_count)
        self.assertEqual(0, test_position.short_inventory.position_count)
        print('Confirm we have 5 positions: %i' %
              test_position.long_inventory.position_count)

        for i in range(10):
            order_open = MarketOrder(ccy='BTC-USD',
                                     side='long',
                                     price=midpoint + i,
                                     step=i)
            test_position.remove(order=order_open)

        self.assertEqual(0, test_position.long_inventory.position_count)
        self.assertEqual(0, test_position.short_inventory.position_count)
Beispiel #6
0
    def test_avg_exe(self):
        test_position = Broker()

        # perform a partial fill on the first order
        step = 0
        bid_price = 101.
        ask_price = 102.
        buy_volume = 500
        sell_volume = 500
        pnl = 0.

        test_position.add(order=LimitOrder(ccy='BTC-USD',
                                           side='long',
                                           price=bid_price,
                                           step=step,
                                           queue_ahead=0))

        print("taking first step...")
        step += 1
        pnl += test_position.step_limit_order_pnl(bid_price=bid_price,
                                                  ask_price=ask_price,
                                                  buy_volume=buy_volume,
                                                  sell_volume=sell_volume,
                                                  step=step)
        self.assertEqual(500, test_position.long_inventory.order.executed)
        self.assertEqual(0, test_position.long_inventory_count)

        # if order gets filled with a bid below the order's price, the order should NOT
        # receive any price improvement during the execution.
        bid_price = 99.
        ask_price = 100.
        test_position.add(order=LimitOrder(ccy='BTC-USD',
                                           side='long',
                                           price=bid_price,
                                           step=step,
                                           queue_ahead=0))

        print("taking second step...")
        step += 1
        pnl += test_position.step_limit_order_pnl(bid_price=bid_price,
                                                  ask_price=ask_price,
                                                  buy_volume=buy_volume,
                                                  sell_volume=sell_volume,
                                                  step=step)
        self.assertEqual(1, test_position.long_inventory_count)
        self.assertEqual(100., test_position.long_inventory.average_price)
        print("PnL: {}".format(pnl))
Beispiel #7
0
    def test_long_pnl(self):
        test_position = Broker()
        step = 0
        bid_price = 101.
        ask_price = 102.
        buy_volume = 100
        sell_volume = 100
        pnl = 0.

        def walk_forward(pnl, step, bid_price, ask_price, buy_volume, sell_volume,
                         down=True):
            for i in range(50):
                step += 1
                if down:
                    bid_price *= 0.99
                    ask_price *= 0.99
                else:
                    bid_price *= 1.01
                    ask_price *= 1.01

                pnl, is_long_order_filled, is_short_order_filled = \
                    test_position.step_limit_order_pnl(
                    bid_price=bid_price, ask_price=ask_price, buy_volume=buy_volume,
                    sell_volume=sell_volume, step=step)
                pnl += pnl
                if i % 10 == 0:
                    print('bid_price={:.2f} | ask_price={:.2f}'.format(bid_price,
                                                                       ask_price))
            return step, bid_price, ask_price, buy_volume, sell_volume, pnl

        test_position.add(
            order=LimitOrder(ccy='BTC-USD', side='long', price=100., step=step,
                             queue_ahead=1000))

        step, _, _, buy_volume, sell_volume, pnl = walk_forward(pnl, step, bid_price,
                                                                ask_price, buy_volume,
                                                                sell_volume, down=True)
        self.assertEqual(1, test_position.long_inventory_count)

        test_position.add(
            order=LimitOrder(ccy='BTC-USD', side='short', price=105., step=step,
                             queue_ahead=0))
        _, _, _, _, _, pnl = walk_forward(pnl, step, bid_price, ask_price, buy_volume,
                                          sell_volume, down=False)
        realized_pnl = test_position.realized_pnl

        self.assertEqual(0.05, realized_pnl,
                         "Expected Realized PnL of 0.5 and got {}".format(realized_pnl))
        self.assertEqual(0,
                         test_position.short_inventory_count +
                         test_position.long_inventory_count)
        print("PnL: {}".format(pnl))
Beispiel #8
0
class PriceJump(BaseEnvironment):
    id = 'long-short-v0'

    def __init__(self,
                 *,
                 fitting_file='LTC-USD_2019-04-07.csv.xz',
                 testing_file='LTC-USD_2019-04-08.csv.xz',
                 step_size=1,
                 max_position=5,
                 window_size=10,
                 seed=1,
                 action_repeats=10,
                 training=True,
                 format_3d=False,
                 z_score=True,
                 reward_type='trade_completion',
                 scale_rewards=True):
        super(PriceJump, self).__init__(fitting_file=fitting_file,
                                        testing_file=testing_file,
                                        step_size=step_size,
                                        max_position=max_position,
                                        window_size=window_size,
                                        seed=seed,
                                        action_repeats=action_repeats,
                                        training=training,
                                        format_3d=format_3d,
                                        z_score=z_score,
                                        reward_type=reward_type,
                                        scale_rewards=scale_rewards)

        self.actions = np.eye(3, dtype=np.float32)

        # get Broker class to keep track of PnL and orders
        self.broker = Broker(max_position=max_position,
                             transaction_fee=MARKET_ORDER_FEE)

        self.action_space = spaces.Discrete(len(self.actions))
        self.reset()  # reset to load observation.shape
        self.observation_space = spaces.Box(low=-10,
                                            high=10,
                                            shape=self.observation.shape,
                                            dtype=np.float32)

        print(
            '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}'
            .format(self.sym, self._seed, self.observation_space.shape))

    def __str__(self):
        return '{} | {}-{}'.format(PriceJump.id, self.sym, self._seed)

    def map_action_to_broker(self, action: int):
        """
        Create or adjust orders per a specified action and adjust for penalties.
        :param action: (int) current step's action
        :return: (float) reward
        """
        reward = pnl = 0.0
        discouragement = 0.000000000001

        if action == 0:  # do nothing
            reward += discouragement

        elif action == 1:  # buy
            if self.broker.short_inventory_count > 0:
                order = MarketOrder(ccy=self.sym,
                                    side='short',
                                    price=self.midpoint,
                                    step=self.local_step_number)
                pnl += self.broker.remove(order=order)

            elif self.broker.long_inventory_count >= 0:
                order = MarketOrder(
                    ccy=self.sym,
                    side='long',
                    price=self.midpoint,
                    # price_fee_adjusted,
                    step=self.local_step_number)
                if self.broker.add(order=order) is False:
                    reward -= discouragement

            else:
                print(
                    ('gym_trading.get_reward() ' + 'Error for action #{} - ' +
                     'unable to place an order with broker').format(action))

        elif action == 2:  # sell
            if self.broker.long_inventory_count > 0:
                order = MarketOrder(ccy=self.sym,
                                    side='long',
                                    price=self.midpoint,
                                    step=self.local_step_number)
                pnl += self.broker.remove(order=order)

            elif self.broker.short_inventory_count >= 0:
                order = MarketOrder(ccy=self.sym,
                                    side='short',
                                    price=self.midpoint,
                                    step=self.local_step_number)
                if self.broker.add(order=order) is False:
                    reward -= discouragement

            else:
                print(
                    ('gym_trading.get_reward() ' + 'Error for action #{} - ' +
                     'unable to place an order with broker').format(action))

        else:
            print(('Unknown action to take in get_reward(): ' +
                   'action={} | midpoint={}').format(action, self.midpoint))

        return reward, pnl

    def _create_position_features(self):
        """
        Create an array with features related to the agent's inventory
        :return: (np.array) normalized position features
        """
        return np.array(
            (self.broker.long_inventory.position_count / self.max_position,
             self.broker.short_inventory.position_count / self.max_position,
             self.broker.realized_pnl / self.broker.reward_scale,
             self.broker.get_unrealized_pnl(self.best_bid, self.best_ask) /
             self.broker.reward_scale),
            dtype=np.float32)
Beispiel #9
0
class MarketMaker(BaseEnvironment):
    id = 'market-maker-v0'

    def __init__(self, transaction_fee=LIMIT_ORDER_FEE, **kwargs):
        """
        Environment designed for automated market making.
        :param kwargs: refer to BaseEnvironment.py
        """
        super(MarketMaker, self).__init__(**kwargs)

        # environment attributes to override in sub-class
        self.actions = np.eye(17, dtype=np.float32)

        # get Broker class to keep track of PnL and orders
        self.broker = Broker(max_position=self.max_position,
                             transaction_fee=transaction_fee)

        self.action_space = spaces.Discrete(len(self.actions))
        self.reset()  # reset to load observation.shape
        self.observation_space = spaces.Box(low=-10,
                                            high=10,
                                            shape=self.observation.shape,
                                            dtype=np.float32)

        print(
            '{} MarketMaker #{} instantiated\nobservation_space: {}'.format(
                self.sym, self._seed, self.observation_space.shape),
            'reward_type = {}'.format(self.reward_type))

    def __str__(self):
        return '{} | {}-{}'.format(MarketMaker.id, self.sym, self._seed)

    def map_action_to_broker(self, action: int):
        """
        Create or adjust orders per a specified action and adjust for penalties.
        :param action: (int) current step's action
        :return: (float) reward
        """
        reward = pnl = 0.0
        discouragement = 0.000000000001

        if action == 0:  # do nothing
            reward += discouragement

        elif action == 1:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=0,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='short')

        elif action == 2:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=0,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='short')
        elif action == 3:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=0,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='short')

        elif action == 4:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=0,
                                                  side='short')

        elif action == 5:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='short')

        elif action == 6:

            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='short')
        elif action == 7:

            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='short')

        elif action == 8:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=0,
                                                  side='short')

        elif action == 9:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='short')

        elif action == 10:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='short')

        elif action == 11:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='short')

        elif action == 12:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=0,
                                                  side='short')

        elif action == 13:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='short')

        elif action == 14:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='short')

        elif action == 15:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='short')
        elif action == 16:
            reward += self.broker.flatten_inventory(self.best_bid,
                                                    self.best_ask)
        else:
            print("L'action n'exist pas ! Il faut faire attention !!!")

        return reward, pnl

    def _create_position_features(self):
        """
        Create an array with features related to the agent's inventory
        :return: (np.array) normalized position features
        """
        return np.array(
            (self.broker.long_inventory.position_count / self.max_position,
             self.broker.short_inventory.position_count / self.max_position,
             self.broker.get_total_pnl(self.best_bid, self.best_ask) *
             self.broker.reward_scale,
             self.broker.long_inventory.get_unrealized_pnl(self.best_bid) *
             self.broker.reward_scale,
             self.broker.short_inventory.get_unrealized_pnl(self.best_ask) *
             self.broker.reward_scale,
             self.broker.get_long_order_distance_to_midpoint(
                 midpoint=self.midpoint) * self.broker.reward_scale,
             self.broker.get_short_order_distance_to_midpoint(
                 midpoint=self.midpoint) * self.broker.reward_scale,
             *self.broker.get_queues_ahead_features()),
            dtype=np.float32)

    def _create_order_at_level(self,
                               reward: float,
                               discouragement: float,
                               level=0,
                               side='long'):
        """
        Create a new order at a specified LOB level
        :param reward: (float) current step reward
        :param discouragement: (float) penalty deducted from reward for erroneous actions
        :param level: (int) level in the limit order book
        :param side: (str) direction of trade e.g., 'long' or 'short'
        :return: (float) reward with penalties added
        """
        adjustment = 1 if level > 0 else 0

        if side == 'long':
            best = self._get_book_data(MarketMaker.best_bid_index - level)
            denormalized_best = round(self.midpoint * (best + 1), 2)
            inside_best = self._get_book_data(MarketMaker.best_bid_index -
                                              level + adjustment)
            denormalized_inside_best = round(self.midpoint * (inside_best + 1),
                                             2)
            plus_one = denormalized_best + 0.01

            if denormalized_inside_best == plus_one:
                # stick to best bid
                bid_price = denormalized_best
                # since LOB is rendered as cumulative notional, deduct the prior price
                # level to derive the notional value of orders ahead in the queue
                bid_queue_ahead = self._get_book_data(
                    MarketMaker.notional_bid_index -
                    level) - self._get_book_data(
                        MarketMaker.notional_bid_index - level + adjustment)
            else:
                # insert a cent ahead to jump a queue
                bid_price = plus_one
                bid_queue_ahead = 0.

            bid_order = LimitOrder(ccy=self.sym,
                                   side='long',
                                   price=bid_price,
                                   step=self.local_step_number,
                                   queue_ahead=bid_queue_ahead)

            if self.broker.add(order=bid_order) is False:
                reward -= discouragement
            else:
                reward += discouragement

        if side == 'short':
            best = self._get_book_data(MarketMaker.best_ask_index + level)
            denormalized_best = round(self.midpoint * (best + 1), 2)
            inside_best = self._get_book_data(MarketMaker.best_ask_index +
                                              level - adjustment)
            denormalized_inside_best = round(self.midpoint * (inside_best + 1),
                                             2)
            plus_one = denormalized_best - 0.01

            if denormalized_inside_best == plus_one:
                ask_price = denormalized_best
                # since LOB is rendered as cumulative notional, deduct the prior price
                # level to derive the notional value of orders ahead in the queue
                ask_queue_ahead = self._get_book_data(
                    MarketMaker.notional_ask_index +
                    level) - self._get_book_data(
                        MarketMaker.notional_ask_index + level - adjustment)
            else:
                ask_price = plus_one
                ask_queue_ahead = 0.

            ask_order = LimitOrder(ccy=self.sym,
                                   side='short',
                                   price=ask_price,
                                   step=self.local_step_number,
                                   queue_ahead=ask_queue_ahead)

            if self.broker.add(order=ask_order) is False:
                reward -= discouragement
            else:
                reward += discouragement

        return reward
Beispiel #10
0
    def test_queues_ahead_features(self):
        test_position = Broker()

        # perform a partial fill on the first order
        step = 0
        bid_price = 100.
        ask_price = 200.
        buy_volume = 0
        sell_volume = 0

        order_open_long = LimitOrder(ccy='BTC-USD',
                                     side='long',
                                     price=bid_price,
                                     step=step,
                                     queue_ahead=0)
        order_open_short = LimitOrder(ccy='BTC-USD',
                                      side='short',
                                      price=ask_price,
                                      step=step,
                                      queue_ahead=2000)
        print('opening long position = {}'.format(order_open_long))
        test_position.add(order=order_open_long)
        print('opening short position = {}'.format(order_open_short))
        test_position.add(order=order_open_short)

        print('\ntaking first step...')
        step += 1
        pnl, is_long_order_filled, is_short_order_filled = \
            test_position.step_limit_order_pnl(
                bid_price=bid_price, ask_price=ask_price, buy_volume=buy_volume,
                sell_volume=sell_volume, step=step)
        pnl += pnl

        print("#1 long_inventory.order = \n{}".format(
            test_position.long_inventory.order))
        print("#1 short_inventory.order = \n{}".format(
            test_position.short_inventory.order))
        bid_queue, ask_queue = test_position.get_queues_ahead_features()
        print("#1 get_queues_ahead_features:\nbid_queue={} || ask_queue={}".
              format(bid_queue, ask_queue))
        self.assertEqual(0., bid_queue)
        self.assertEqual(-0.67, round(ask_queue, 2))

        print('\ntaking second step...')
        buy_volume = 500
        sell_volume = 500
        step += 1
        pnl, is_long_order_filled, is_short_order_filled = \
            test_position.step_limit_order_pnl(
                bid_price=bid_price, ask_price=ask_price, buy_volume=buy_volume,
                sell_volume=sell_volume, step=step)
        pnl += pnl

        print("#2 long_inventory.order = \n{}".format(
            test_position.long_inventory.order))
        print("#2 short_inventory.order = \n{}".format(
            test_position.short_inventory.order))
        bid_queue, ask_queue = test_position.get_queues_ahead_features()
        print("#2 get_queues_ahead_features:\nbid_queue={} || ask_queue={}".
              format(bid_queue, ask_queue))
        self.assertEqual(0.5, bid_queue)
        self.assertEqual(-0.6, round(ask_queue, 2))

        print('\ntaking third step...')
        buy_volume = 500
        sell_volume = 499
        step += 1
        pnl, is_long_order_filled, is_short_order_filled = \
            test_position.step_limit_order_pnl(
                bid_price=bid_price, ask_price=ask_price, buy_volume=buy_volume,
                sell_volume=sell_volume, step=step)
        pnl += pnl

        print("#3 long_inventory.order = \n{}".format(
            test_position.long_inventory.order))
        print("#3 short_inventory.order = \n{}".format(
            test_position.short_inventory.order))
        bid_queue, ask_queue = test_position.get_queues_ahead_features()
        print("#3 get_queues_ahead_features:\nbid_queue={} || ask_queue={}".
              format(bid_queue, ask_queue))
        self.assertEqual(0.999, bid_queue)
        self.assertEqual(-0.5, round(ask_queue, 2))

        print('\ntaking fourth step...')
        buy_volume = 500
        sell_volume = 500
        step += 1
        pnl, is_long_order_filled, is_short_order_filled = \
            test_position.step_limit_order_pnl(
                bid_price=bid_price, ask_price=ask_price, buy_volume=buy_volume,
                sell_volume=sell_volume, step=step)
        pnl += pnl

        print("#4 long_inventory.order = \n{}".format(
            test_position.long_inventory.order))
        print("#4 short_inventory.order = \n{}".format(
            test_position.short_inventory.order))
        bid_queue, ask_queue = test_position.get_queues_ahead_features()
        print("#4 get_queues_ahead_features:\nbid_queue={} || ask_queue={}".
              format(bid_queue, ask_queue))
        self.assertEqual(0.0, bid_queue)
        self.assertEqual(-0.33, round(ask_queue, 2))
        print("PnL: {}".format(pnl))
Beispiel #11
0
class PriceJump(Env):

    metadata = {'render.modes': ['human']}
    id = 'long-short-v0'
    # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True)
    features = Sim.get_feature_labels(include_system_time=False,
                                      include_bitfinex=False)
    best_bid_index = features.index('coinbase_bid_distance_0')
    best_ask_index = features.index('coinbase_ask_distance_0')
    notional_bid_index = features.index('coinbase_bid_notional_0')
    notional_ask_index = features.index('coinbase_ask_notional_0')

    buy_trade_index = features.index('coinbase_buys')
    sell_trade_index = features.index('coinbase_sells')

    target_pnl = 0.03  # 3.0% gain per episode (i.e., day)
    fee = MARKET_ORDER_FEE

    def __init__(self,
                 *,
                 fitting_file='LTC-USD_2019-04-07.csv.xz',
                 testing_file='LTC-USD_2019-04-08.csv.xz',
                 step_size=1,
                 max_position=5,
                 window_size=10,
                 seed=1,
                 action_repeats=10,
                 training=True,
                 format_3d=False,
                 z_score=True):

        # properties required for instantiation
        self.action_repeats = action_repeats
        self._seed = seed
        self._random_state = np.random.RandomState(seed=self._seed)
        self.training = training
        self.step_size = step_size
        self.max_position = max_position
        self.window_size = window_size
        self.format_3d = format_3d  # e.g., [window, features, *NEW_AXIS*]

        self.action = 0
        # derive gym.env properties
        self.actions = np.eye(3, dtype=np.float32)

        self.sym = testing_file[:7]  # slice the CCY from the filename

        # properties that get reset()
        self.reward = 0.0
        self.done = False
        self.local_step_number = 0
        self.midpoint = 0.0
        self.observation = None

        # get Broker class to keep track of PnL and orders
        self.broker = Broker(max_position=max_position)
        # get historical data for simulations
        self.sim = Sim(use_arctic=False, z_score=z_score)

        self.prices_, self.data, self.normalized_data = self.sim.load_environment_data(
            fitting_file, testing_file)

        self.max_steps = self.data.shape[0] - self.step_size * \
            self.action_repeats - 1

        # load indicators into the indicator manager
        self.tns = IndicatorManager()
        self.rsi = IndicatorManager()
        for window in INDICATOR_WINDOW:
            self.tns.add(('tns_{}'.format(window), TnS(window=window)))
            self.rsi.add(('rsi_{}'.format(window), RSI(window=window)))

        # rendering class
        self._render = TradingGraph(sym=self.sym)

        # graph midpoint prices
        self._render.reset_render_data(
            y_vec=self.prices_[:np.shape(self._render.x_vec)[0]])

        # buffer for appending lags
        self.data_buffer = list()

        self.action_space = spaces.Discrete(len(self.actions))
        self.reset()  # reset to load observation.shape
        self.observation_space = spaces.Box(low=-10,
                                            high=10,
                                            shape=self.observation.shape,
                                            dtype=np.float32)

        print(
            '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}'
            .format(self.sym, self._seed, self.observation_space.shape))

    def __str__(self):
        return '{} | {}-{}'.format(PriceJump.id, self.sym, self._seed)

    def step(self, action: int):
        for current_step in range(self.action_repeats):

            if self.done:
                self.reset()
                return self.observation, self.reward, self.done

            # reset the reward if there ARE action repeats
            if current_step == 0:
                self.reward = 0.
                step_action = action
            else:
                step_action = 0

            # Get current step's midpoint
            self.midpoint = self.prices_[self.local_step_number]
            # Pass current time step midpoint to broker to calculate PnL,
            # or if any open orders are to be filled
            buy_volume = self._get_book_data(PriceJump.buy_trade_index)
            sell_volume = self._get_book_data(PriceJump.sell_trade_index)

            self.tns.step(buys=buy_volume, sells=sell_volume)
            self.rsi.step(price=self.midpoint)

            self.broker.step(midpoint=self.midpoint)

            self.reward += self._send_to_broker_and_get_reward(
                action=step_action)

            step_observation = self._get_step_observation(action=action)
            self.data_buffer.append(step_observation)

            if len(self.data_buffer) > self.window_size:
                del self.data_buffer[0]

            self.local_step_number += self.step_size

        self.observation = self._get_observation()

        if self.local_step_number > self.max_steps:
            self.done = True
            order = Order(ccy=self.sym,
                          side=None,
                          price=self.midpoint,
                          step=self.local_step_number)
            self.reward = self.broker.flatten_inventory(order=order)

        return self.observation, self.reward, self.done, {}

    def reset(self):
        if self.training:
            self.local_step_number = self._random_state.randint(
                low=1, high=self.data.shape[0] // 4)
        else:
            self.local_step_number = 0

        msg = ' {}-{} reset. Episode pnl: {:.4f} with {} trades. First step: {}'.format(
            self.sym, self._seed,
            self.broker.get_total_pnl(midpoint=self.midpoint),
            self.broker.get_total_trade_count(), self.local_step_number)
        logger.info(msg)

        self.reward = 0.0
        self.done = False
        self.broker.reset()
        self.data_buffer.clear()
        self.rsi.reset()
        self.tns.reset()

        for step in range(self.window_size + INDICATOR_WINDOW_MAX):
            self.midpoint = self.prices_[self.local_step_number]

            step_buy_volume = self._get_book_data(PriceJump.buy_trade_index)
            step_sell_volume = self._get_book_data(PriceJump.sell_trade_index)
            self.tns.step(buys=step_buy_volume, sells=step_sell_volume)
            self.rsi.step(price=self.midpoint)

            step_observation = self._get_step_observation(action=0)
            self.data_buffer.append(step_observation)

            self.local_step_number += self.step_size
            if len(self.data_buffer) > self.window_size:
                del self.data_buffer[0]

        self.observation = self._get_observation()

        return self.observation

    def render(self, mode='human'):
        self._render.render(midpoint=self.midpoint, mode=mode)

    def close(self):
        logger.info('{}-{} is being closed.'.format(self.id, self.sym))
        self.data = None
        self.normalized_data = None
        self.prices_ = None
        self.broker = None
        self.sim = None
        self.data_buffer = None
        self.tns = None
        self.rsi = None
        return

    def seed(self, seed=1):
        self._random_state = np.random.RandomState(seed=seed)
        self._seed = seed
        logger.info('Setting seed in PriceJump.seed({})'.format(seed))
        return [seed]

    @staticmethod
    def _process_data(_next_state):
        """
        Reshape observation and clip outliers (values +/- 10)
        :param _next_state: observation space
        :return: (np.array) clipped observation space
        """
        return np.clip(_next_state.reshape((1, -1)), -10., 10.)

    def _send_to_broker_and_get_reward(self, action: int):
        """
        Create or adjust orders per a specified action and adjust for penalties.
        :param action: (int) current step's action
        :return: (float) reward
        """
        reward = 0.0
        discouragement = 0.000000000001

        if action == 0:  # do nothing
            reward += discouragement

        elif action == 1:  # buy
            price_fee_adjusted = self.midpoint + (PriceJump.fee *
                                                  self.midpoint)
            if self.broker.short_inventory_count > 0:
                order = Order(ccy=self.sym,
                              side='short',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                self.broker.remove(order=order)
                reward += self.broker.get_reward(side=order.side) / \
                    self.broker.reward_scale  # scale realized PnL

            elif self.broker.long_inventory_count >= 0:
                order = Order(ccy=self.sym,
                              side='long',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                if self.broker.add(order=order) is False:
                    reward -= discouragement

            else:
                logger.info(
                    ('gym_trading.get_reward() ' + 'Error for action #{} - ' +
                     'unable to place an order with broker').format(action))

        elif action == 2:  # sell
            price_fee_adjusted = self.midpoint - (PriceJump.fee *
                                                  self.midpoint)
            if self.broker.long_inventory_count > 0:
                order = Order(ccy=self.sym,
                              side='long',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                self.broker.remove(order=order)
                reward += self.broker.get_reward(side=order.side) / \
                    self.broker.reward_scale  # scale realized PnL
            elif self.broker.short_inventory_count >= 0:
                order = Order(ccy=self.sym,
                              side='short',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                if self.broker.add(order=order) is False:
                    reward -= discouragement

            else:
                logger.info(
                    ('gym_trading.get_reward() ' + 'Error for action #{} - ' +
                     'unable to place an order with broker').format(action))

        else:
            logger.info(
                ('Unknown action to take in get_reward(): ' +
                 'action={} | midpoint={}').format(action, self.midpoint))

        return reward

    def _create_position_features(self):
        """
        Create an array with features related to the agent's inventory
        :return: (np.array) normalized position features
        """
        return np.array(
            (self.broker.long_inventory.position_count / self.max_position,
             self.broker.short_inventory.position_count / self.max_position,
             self.broker.get_total_pnl(midpoint=self.midpoint) /
             PriceJump.target_pnl,
             self.broker.long_inventory.get_unrealized_pnl(self.midpoint) /
             self.broker.reward_scale,
             self.broker.short_inventory.get_unrealized_pnl(self.midpoint) /
             self.broker.reward_scale),
            dtype=np.float32)

    def _create_action_features(self, action):
        """
        Create a features array for the current time step's action.
        :param action: (int) action number
        :return: (np.array) One-hot of current action
        """
        return self.actions[action]

    def _create_indicator_features(self):
        """
        Create features vector with environment indicators.
        :return: (np.array) Indicator values for current time step
        """
        return np.array((*self.tns.get_value(), *self.rsi.get_value()),
                        dtype=np.float32)

    def _get_nbbo(self):
        """
        Get best bid and offer
        :return: (tuple) best bid and offer
        """
        best_bid = round(
            self.midpoint - self._get_book_data(PriceJump.best_bid_index), 2)
        best_ask = round(
            self.midpoint + self._get_book_data(PriceJump.best_ask_index), 2)
        return best_bid, best_ask

    def _get_book_data(self, index=0):
        """
        Return step 'n' of order book snapshot data
        :param index: (int) step 'n' to look up in order book snapshot history
        :return: (np.array) order book snapshot vector
        """
        return self.data[self.local_step_number][index]

    def _get_step_observation(self, action=0):
        """
        Current step observation, NOT including historical data.
        :param action: (int) current step action
        :return: (np.array) Current step observation
        """
        step_position_features = self._create_position_features()
        step_action_features = self._create_action_features(action=action)
        step_indicator_features = self._create_indicator_features()
        return np.concatenate(
            (self._process_data(self.normalized_data[self.local_step_number]),
             step_indicator_features, step_position_features,
             step_action_features, np.array([self.reward])),
            axis=None)

    def _get_observation(self):
        """
        Current step observation, including historical data.

        If format_3d is TRUE: Expand the observation space from 2 to 3 dimensions.
        (note: This is necessary for conv nets in Baselines.)
        :return: (np.array) Observation state for current time step
        """
        observation = np.array(self.data_buffer, dtype=np.float32)
        if self.format_3d:
            observation = np.expand_dims(observation, axis=-1)
        return observation
Beispiel #12
0
    def __init__(self,
                 *,
                 fitting_file='ETH-USD_2018-12-31.xz',
                 testing_file='ETH-USD_2019-01-01.xz',
                 step_size=1,
                 max_position=5,
                 window_size=10,
                 seed=1,
                 action_repeats=10,
                 training=True,
                 format_3d=False,
                 z_score=True):

        # properties required for instantiation
        self.action_repeats = action_repeats
        self._seed = seed
        self._random_state = np.random.RandomState(seed=self._seed)
        self.training = training
        self.step_size = step_size
        self.max_position = max_position
        self.window_size = window_size
        self.format_3d = format_3d  # e.g., [window, features, *NEW_AXIS*]

        self.action = 0
        # derive gym.env properties
        self.actions = np.eye(3)

        self.sym = testing_file[:7]  # slice the CCY from the filename

        # properties that get reset()
        self.reward = 0.0
        self.done = False
        self.local_step_number = 0
        self.midpoint = 0.0
        self.observation = None

        # get Broker class to keep track of PnL and orders
        self.broker = Broker(max_position=max_position)
        # get historical data for simulations
        self.sim = Sim(use_arctic=False)

        self.data = self._load_environment_data(fitting_file, testing_file)
        self.prices_ = self.data[
            'coinbase_midpoint'].values  # used to calculate PnL

        self.normalized_data = self.data.copy()
        self.data = self.data.values

        self.max_steps = self.data.shape[0] - self.step_size * \
                         self.action_repeats - 1

        # normalize midpoint data
        self.normalized_data['coinbase_midpoint'] = \
            np.log(self.normalized_data['coinbase_midpoint'].values)
        self.normalized_data['coinbase_midpoint'] = (
            self.normalized_data['coinbase_midpoint'] -
            self.normalized_data['coinbase_midpoint'].shift(1)).fillna(0.)

        # load indicators into the indicator manager
        self.tns = IndicatorManager()
        self.rsi = IndicatorManager()
        for window in INDICATOR_WINDOW:
            self.tns.add(('tns_{}'.format(window), TnS(window=window)))
            self.rsi.add(('rsi_{}'.format(window), RSI(window=window)))

        if z_score:
            logger.info("Pre-scaling {}-{} data...".format(
                self.sym, self._seed))
            self.normalized_data = self.normalized_data.apply(self.sim.z_score,
                                                              axis=1).values
            logger.info("...{}-{} pre-scaling complete.".format(
                self.sym, self._seed))
        else:
            self.normalized_data = self.normalized_data.values

        # rendering class
        self._render = TradingGraph(sym=self.sym)
        # graph midpoint prices
        self._render.reset_render_data(
            y_vec=self.prices_[:np.shape(self._render.x_vec)[0]])
        # buffer for appending lags
        self.data_buffer = list()

        self.action_space = spaces.Discrete(len(self.actions))
        self.reset()  # reset to load observation.shape
        self.observation_space = spaces.Box(low=-10,
                                            high=10,
                                            shape=self.observation.shape,
                                            dtype=np.float32)

        print(
            '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}'
            .format(self.sym, self._seed, self.observation_space.shape))
Beispiel #13
0
class PriceJump(Env):

    metadata = {'render.modes': ['human']}
    id = 'long-short-v0'
    # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True)
    features = Sim.get_feature_labels(include_system_time=False,
                                      include_bitfinex=False)
    best_bid_index = features.index('coinbase-bid-distance-0')
    best_ask_index = features.index('coinbase-ask-distance-0')
    notional_bid_index = features.index('coinbase-bid-notional-0')
    notional_ask_index = features.index('coinbase-ask-notional-0')

    buy_trade_index = features.index('coinbase-buys')
    sell_trade_index = features.index('coinbase-sells')

    target_pnl = BROKER_FEE * 10 * 5  # e.g., 5 for max_positions
    fee = BROKER_FEE

    def __init__(self,
                 *,
                 fitting_file='ETH-USD_2018-12-31.xz',
                 testing_file='ETH-USD_2019-01-01.xz',
                 step_size=1,
                 max_position=5,
                 window_size=10,
                 seed=1,
                 action_repeats=10,
                 training=True,
                 format_3d=False,
                 z_score=True):

        # properties required for instantiation
        self.action_repeats = action_repeats
        self._seed = seed
        self._random_state = np.random.RandomState(seed=self._seed)
        self.training = training
        self.step_size = step_size
        self.max_position = max_position
        self.window_size = window_size
        self.format_3d = format_3d  # e.g., [window, features, *NEW_AXIS*]

        self.action = 0
        # derive gym.env properties
        self.actions = np.eye(3)

        self.sym = testing_file[:7]  # slice the CCY from the filename

        # properties that get reset()
        self.reward = 0.0
        self.done = False
        self.local_step_number = 0
        self.midpoint = 0.0
        self.observation = None

        # get Broker class to keep track of PnL and orders
        self.broker = Broker(max_position=max_position)
        # get historical data for simulations
        self.sim = Sim(use_arctic=False)

        self.data = self._load_environment_data(fitting_file, testing_file)
        self.prices_ = self.data[
            'coinbase_midpoint'].values  # used to calculate PnL

        self.normalized_data = self.data.copy()
        self.data = self.data.values

        self.max_steps = self.data.shape[0] - self.step_size * \
                         self.action_repeats - 1

        # normalize midpoint data
        self.normalized_data['coinbase_midpoint'] = \
            np.log(self.normalized_data['coinbase_midpoint'].values)
        self.normalized_data['coinbase_midpoint'] = (
            self.normalized_data['coinbase_midpoint'] -
            self.normalized_data['coinbase_midpoint'].shift(1)).fillna(0.)

        # load indicators into the indicator manager
        self.tns = IndicatorManager()
        self.rsi = IndicatorManager()
        for window in INDICATOR_WINDOW:
            self.tns.add(('tns_{}'.format(window), TnS(window=window)))
            self.rsi.add(('rsi_{}'.format(window), RSI(window=window)))

        if z_score:
            logger.info("Pre-scaling {}-{} data...".format(
                self.sym, self._seed))
            self.normalized_data = self.normalized_data.apply(self.sim.z_score,
                                                              axis=1).values
            logger.info("...{}-{} pre-scaling complete.".format(
                self.sym, self._seed))
        else:
            self.normalized_data = self.normalized_data.values

        # rendering class
        self._render = TradingGraph(sym=self.sym)
        # graph midpoint prices
        self._render.reset_render_data(
            y_vec=self.prices_[:np.shape(self._render.x_vec)[0]])
        # buffer for appending lags
        self.data_buffer = list()

        self.action_space = spaces.Discrete(len(self.actions))
        self.reset()  # reset to load observation.shape
        self.observation_space = spaces.Box(low=-10,
                                            high=10,
                                            shape=self.observation.shape,
                                            dtype=np.float32)

        print(
            '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}'
            .format(self.sym, self._seed, self.observation_space.shape))

    def __str__(self):
        return '{} | {}-{}'.format(PriceJump.id, self.sym, self._seed)

    def step(self, action: int):
        for current_step in range(self.action_repeats):

            if self.done:
                self.reset()
                return self.observation, self.reward, self.done

            # reset the reward if there ARE action repeats
            if current_step == 0:
                self.reward = 0.
                step_action = action
            else:
                step_action = 0

            # Get current step's midpoint
            self.midpoint = self.prices_[self.local_step_number]
            # Pass current time step midpoint to broker to calculate PnL,
            # or if any open orders are to be filled
            buy_volume = self._get_book_data(PriceJump.buy_trade_index)
            sell_volume = self._get_book_data(PriceJump.sell_trade_index)

            self.tns.step(buys=buy_volume, sells=sell_volume)
            self.rsi.step(price=self.midpoint)

            self.broker.step(midpoint=self.midpoint)

            self.reward += self._send_to_broker_and_get_reward(
                action=step_action)

            step_observation = self._get_step_observation(action=action)
            self.data_buffer.append(step_observation)

            if len(self.data_buffer) > self.window_size:
                del self.data_buffer[0]

            self.local_step_number += self.step_size

        self.observation = self._get_observation()

        if self.local_step_number > self.max_steps:
            self.done = True
            order = Order(ccy=self.sym,
                          side=None,
                          price=self.midpoint,
                          step=self.local_step_number)
            self.reward = self.broker.flatten_inventory(order=order)

        return self.observation, self.reward, self.done, {}

    def reset(self):
        if self.training:
            self.local_step_number = self._random_state.randint(
                low=1, high=self.data.shape[0] // 4)
        else:
            self.local_step_number = 0

        msg = ' {}-{} reset. Episode pnl: {:.4f} with {} trades | First step: {}'.format(
            self.sym, self._seed,
            self.broker.get_total_pnl(midpoint=self.midpoint),
            self.broker.get_total_trade_count(), self.local_step_number)
        logger.info(msg)

        self.reward = 0.0
        self.done = False
        self.broker.reset()
        self.data_buffer.clear()
        self.rsi.reset()
        self.tns.reset()

        for step in range(self.window_size + INDICATOR_WINDOW_MAX):
            self.midpoint = self.prices_[self.local_step_number]

            step_buy_volume = self._get_book_data(PriceJump.buy_trade_index)
            step_sell_volume = self._get_book_data(PriceJump.sell_trade_index)
            self.tns.step(buys=step_buy_volume, sells=step_sell_volume)
            self.rsi.step(price=self.midpoint)

            step_observation = self._get_step_observation(action=0)
            self.data_buffer.append(step_observation)

            self.local_step_number += self.step_size
            if len(self.data_buffer) > self.window_size:
                del self.data_buffer[0]

        self.observation = self._get_observation()

        return self.observation

    def render(self, mode='human'):
        self._render.render(midpoint=self.midpoint, mode=mode)

    def close(self):
        logger.info('{}-{} is being closed.'.format(self.id, self.sym))
        self.data = None
        self.normalized_data = None
        self.prices_ = None
        self.broker = None
        self.sim = None
        self.data_buffer = None
        self.tns = None
        self.rsi = None
        return

    def seed(self, seed=1):
        self._random_state = np.random.RandomState(seed=seed)
        self._seed = seed
        logger.info('Setting seed in PriceJump.seed({})'.format(seed))
        return [seed]

    @staticmethod
    def _process_data(_next_state):
        return np.clip(_next_state.reshape((1, -1)), -10., 10.)

    # def _process_data(self, _next_state):
    #     # return self.sim.scale_state(_next_state).values.reshape((1, -1))
    #     return np.reshape(_next_state, (1, -1))

    def _send_to_broker_and_get_reward(self, action):
        reward = 0.0
        discouragement = 0.000000000001

        if action == 0:  # do nothing
            reward += discouragement

        elif action == 1:  # buy
            price_fee_adjusted = self.midpoint + (PriceJump.fee *
                                                  self.midpoint)
            if self.broker.short_inventory_count > 0:
                order = Order(ccy=self.sym,
                              side='short',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                self.broker.remove(order=order)
                reward += self.broker.get_reward(side=order.side)

            elif self.broker.long_inventory_count >= 0:
                order = Order(ccy=self.sym,
                              side='long',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                if self.broker.add(order=order) is False:
                    reward -= discouragement

            else:
                logger.info(
                    ('gym_trading.get_reward() ' + 'Error for action #{} - ' +
                     'unable to place an order with broker').format(action))

        elif action == 2:  # sell
            price_fee_adjusted = self.midpoint - (PriceJump.fee *
                                                  self.midpoint)
            if self.broker.long_inventory_count > 0:
                order = Order(ccy=self.sym,
                              side='long',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                self.broker.remove(order=order)
                reward += self.broker.get_reward(side=order.side)
            elif self.broker.short_inventory_count >= 0:
                order = Order(ccy=self.sym,
                              side='short',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                if self.broker.add(order=order) is False:
                    reward -= discouragement

            else:
                logger.info(
                    'gym_trading.get_reward() ' + 'Error for action #{} - ' +
                    'unable to place an order with broker'.format(action))

        else:
            logger.info(
                ('Unknown action to take in get_reward(): ' +
                 'action={} | midpoint={}').format(action, self.midpoint))

        return reward

    def _create_position_features(self):
        return np.array(
            (self.broker.long_inventory.position_count / self.max_position,
             self.broker.short_inventory.position_count / self.max_position,
             self.broker.get_total_pnl(midpoint=self.midpoint) /
             PriceJump.target_pnl,
             self.broker.long_inventory.get_unrealized_pnl(self.midpoint) /
             self.broker.reward_scale,
             self.broker.short_inventory.get_unrealized_pnl(self.midpoint) /
             self.broker.reward_scale),
            dtype=np.float32)

    def _create_action_features(self, action):
        return self.actions[action]

    def _create_indicator_features(self):
        return np.array((*self.tns.get_value(), *self.rsi.get_value()),
                        dtype=np.float32)

    def _get_nbbo(self):
        best_bid = round(
            self.midpoint - self._get_book_data(PriceJump.best_bid_index), 2)
        best_ask = round(
            self.midpoint + self._get_book_data(PriceJump.best_ask_index), 2)
        return best_bid, best_ask

    def _get_book_data(self, index=0):
        return self.data[self.local_step_number][index]

    def _get_step_observation(self, action=0):
        step_position_features = self._create_position_features()
        step_action_features = self._create_action_features(action=action)
        step_indicator_features = self._create_indicator_features()
        return np.concatenate(
            (self._process_data(self.normalized_data[self.local_step_number]),
             step_indicator_features, step_position_features,
             step_action_features, np.array([self.reward])),
            axis=None)

    def _get_observation(self):
        observation = np.array(self.data_buffer, dtype=np.float32)
        # Expand the observation space from 2 to 3 dimensions.
        # This is necessary for conv nets in Baselines.
        if self.format_3d:
            observation = np.expand_dims(observation, axis=-1)
        return observation

    def _load_environment_data(self, fitting_file, testing_file):
        fitting_data_filepath = '{}/data_exports/{}'.format(
            self.sim.cwd, fitting_file)
        data_used_in_environment = '{}/data_exports/{}'.format(
            self.sim.cwd, testing_file)
        fitting_data = self.sim.import_csv(filename=fitting_data_filepath)
        fitting_data['coinbase_midpoint'] = np.log(
            fitting_data['coinbase_midpoint'].values)
        fitting_data['coinbase_midpoint'] = (
            fitting_data['coinbase_midpoint'] -
            fitting_data['coinbase_midpoint'].shift(1)).fillna(method='bfill')
        self.sim.fit_scaler(fitting_data)
        del fitting_data
        return self.sim.import_csv(filename=data_used_in_environment)
Beispiel #14
0
class PriceJump(BaseEnvironment):
    id = 'long-short-v0'

    def __init__(self, transaction_fee=MARKET_ORDER_FEE, **kwargs):
        """
        Environment designed to trade price jumps using market orders
        :param kwargs: refer to BaseEnvironment.py
        """
        super(PriceJump, self).__init__(**kwargs)

        # environment attributes to override in sub-class
        self.actions = np.eye(3, dtype=np.float32)

        # get Broker class to keep track of PnL and orders
        self.broker = Broker(max_position=self.max_position,
                             transaction_fee=transaction_fee)

        self.action_space = spaces.Discrete(len(self.actions))
        self.reset()  # reset to load observation.shape
        self.observation_space = spaces.Box(low=-10,
                                            high=10,
                                            shape=self.observation.shape,
                                            dtype=np.float32)

        print(
            '{} PriceJump #{} instantiated.\nobservation_space : {}'.format(
                self.sym, self._seed, self.observation_space.shape),
            'reward_type = {}'.format(self.reward_type))

    def __str__(self):
        return '{} | {}-{}'.format(PriceJump.id, self.sym, self._seed)

    def map_action_to_broker(self, action: int):
        """
        Create or adjust orders per a specified action and adjust for penalties.
        :param action: (int) current step's action
        :return: (float) reward
        """
        reward = pnl = 0.0
        discouragement = 0.000000000001

        if action == 0:  # do nothing
            reward += discouragement

        elif action == 1:  # buy
            if self.broker.short_inventory_count > 0:
                order = MarketOrder(ccy=self.sym,
                                    side='short',
                                    price=self.midpoint,
                                    step=self.local_step_number)
                pnl += self.broker.remove(order=order)

            elif self.broker.long_inventory_count >= 0:
                order = MarketOrder(
                    ccy=self.sym,
                    side='long',
                    price=self.midpoint,
                    # price_fee_adjusted,
                    step=self.local_step_number)
                if self.broker.add(order=order) is False:
                    reward -= discouragement

            else:
                print((
                    'gym_trading.get_reward() Error for action #{} - unable to place '
                    'an order with broker').format(action))

        elif action == 2:  # sell
            if self.broker.long_inventory_count > 0:
                order = MarketOrder(ccy=self.sym,
                                    side='long',
                                    price=self.midpoint,
                                    step=self.local_step_number)
                pnl += self.broker.remove(order=order)

            elif self.broker.short_inventory_count >= 0:
                order = MarketOrder(ccy=self.sym,
                                    side='short',
                                    price=self.midpoint,
                                    step=self.local_step_number)
                if self.broker.add(order=order) is False:
                    reward -= discouragement

            else:
                print((
                    'gym_trading.get_reward() Error for action #{} - unable to place '
                    'an order with broker').format(action))

        else:
            print((
                'Unknown action to take in get_reward(): action={} | midpoint={}'
            ).format(action, self.midpoint))

        return reward, pnl

    def _create_position_features(self):
        """
        Create an array with features related to the agent's inventory
        :return: (np.array) normalized position features
        """
        return np.array(
            (self.broker.long_inventory.position_count / self.max_position,
             self.broker.short_inventory.position_count / self.max_position,
             self.broker.realized_pnl * self.broker.reward_scale,
             self.broker.get_unrealized_pnl(self.best_bid, self.best_ask) *
             self.broker.reward_scale),
            dtype=np.float32)
Beispiel #15
0
    def __init__(self,
                 *,
                 fitting_file='LTC-USD_2019-04-07.csv.xz',
                 testing_file='LTC-USD_2019-04-08.csv.xz',
                 step_size=1,
                 max_position=5,
                 window_size=10,
                 seed=1,
                 action_repeats=10,
                 training=True,
                 format_3d=False,
                 z_score=True):

        # properties required for instantiation
        self.action_repeats = action_repeats
        self._seed = seed
        self._random_state = np.random.RandomState(seed=self._seed)
        self.training = training
        self.step_size = step_size
        self.max_position = max_position
        self.window_size = window_size
        self.format_3d = format_3d  # e.g., [window, features, *NEW_AXIS*]

        self.action = 0
        # derive gym.env properties
        self.actions = np.eye(3, dtype=np.float32)

        self.sym = testing_file[:7]  # slice the CCY from the filename

        # properties that get reset()
        self.reward = 0.0
        self.done = False
        self.local_step_number = 0
        self.midpoint = 0.0
        self.observation = None

        # get Broker class to keep track of PnL and orders
        self.broker = Broker(max_position=max_position)
        # get historical data for simulations
        self.sim = Sim(use_arctic=False, z_score=z_score)

        self.prices_, self.data, self.normalized_data = self.sim.load_environment_data(
            fitting_file, testing_file)

        self.max_steps = self.data.shape[0] - self.step_size * \
            self.action_repeats - 1

        # load indicators into the indicator manager
        self.tns = IndicatorManager()
        self.rsi = IndicatorManager()
        for window in INDICATOR_WINDOW:
            self.tns.add(('tns_{}'.format(window), TnS(window=window)))
            self.rsi.add(('rsi_{}'.format(window), RSI(window=window)))

        # rendering class
        self._render = TradingGraph(sym=self.sym)

        # graph midpoint prices
        self._render.reset_render_data(
            y_vec=self.prices_[:np.shape(self._render.x_vec)[0]])

        # buffer for appending lags
        self.data_buffer = list()

        self.action_space = spaces.Discrete(len(self.actions))
        self.reset()  # reset to load observation.shape
        self.observation_space = spaces.Box(low=-10,
                                            high=10,
                                            shape=self.observation.shape,
                                            dtype=np.float32)

        print(
            '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}'
            .format(self.sym, self._seed, self.observation_space.shape))
class BaseEnvironment(Env, ABC):
    metadata = {'render.modes': ['human']}

    def __init__(self,
                 symbol: str,
                 fitting_file: str,
                 testing_file: str,
                 max_position: int = 10,
                 window_size: int = 100,
                 seed: int = 1,
                 action_repeats: int = 5,
                 training: bool = True,
                 format_3d: bool = False,
                 reward_type: str = 'default',
                 transaction_fee: bool = True,
                 ema_alpha: list or float or None = EMA_ALPHA):
        """
        Base class for creating environments extending OpenAI's GYM framework.

        :param symbol: currency pair to trade / experiment
        :param fitting_file: prior trading day (e.g., T-1)
        :param testing_file: current trading day (e.g., T)
        :param max_position: maximum number of positions able to hold in inventory
        :param window_size: number of lags to include in observation space
        :param seed: random seed number
        :param action_repeats: number of steps to take in environment after a given action
        :param training: if TRUE, then randomize starting point in environment
        :param format_3d: if TRUE, reshape observation space from matrix to tensor
        :param reward_type: method for calculating the environment's reward:
            1) 'default' --> inventory count * change in midpoint price returns
            2) 'default_with_fills' --> inventory count * change in midpoint price returns
                + closed trade PnL
            3) 'realized_pnl' --> change in realized pnl between time steps
            4) 'differential_sharpe_ratio' -->
        http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.1.7210&rep=rep1&type=pdf
            5) 'asymmetrical' --> extended version of *default* and enhanced with a
                    reward for being filled above or below midpoint, and returns only
                    negative rewards for Unrealized PnL to discourage long-term
                    speculation.
            6) 'trade_completion' --> reward is generated per trade's round trip

        :param ema_alpha: decay factor for EMA, usually between 0.9 and 0.9999; if NONE,
            raw values are returned in place of smoothed values
        """
        assert reward_type in VALID_REWARD_TYPES, \
            'Error: {} is not a valid reward type. Value must be in:\n{}'.format(
                reward_type, VALID_REWARD_TYPES)

        self.viz = Visualize(
            columns=['midpoint', 'buys', 'sells', 'inventory', 'realized_pnl'],
            store_historical_observations=True)

        # get Broker class to keep track of PnL and orders
        self.broker = Broker(max_position=max_position,
                             transaction_fee=transaction_fee)

        # properties required for instantiation
        self.symbol = symbol
        self.action_repeats = action_repeats
        self._seed = seed
        self._random_state = np.random.RandomState(seed=self._seed)
        self.training = training
        self.max_position = max_position
        self.window_size = window_size
        self.reward_type = reward_type
        self.format_3d = format_3d  # e.g., [window, features, *NEW_AXIS*]
        self.testing_file = testing_file

        # properties that get reset()
        self.reward = np.array([0.0], dtype=np.float32)
        self.step_reward = np.array([0.0], dtype=np.float32)
        self.done = False
        self.local_step_number = 0
        self.midpoint = 0.0
        self.observation = None
        self.action = 0
        self.last_pnl = 0.
        self.last_midpoint = None
        self.midpoint_change = None
        self.A_t, self.B_t = 0., 0.  # variables for Differential Sharpe Ratio
        self.episode_stats = ExperimentStatistics()
        self.best_bid = self.best_ask = None

        # properties to override in sub-classes
        self.actions = None
        self.action_space = None
        self.observation_space = None

        # get historical data for simulations
        self.data_pipeline = DataPipeline(alpha=ema_alpha)

        # three different data sets, for different purposes:
        #   1) midpoint_prices - midpoint prices that have not been transformed
        #   2) raw_data - raw limit order book data, not including imbalances
        #   3) normalized_data - z-scored limit order book and order flow imbalance
        #       data, also midpoint price feature is replace by midpoint log price change
        self._midpoint_prices, self._raw_data, self._normalized_data = \
            self.data_pipeline.load_environment_data(
                fitting_file=fitting_file,
                testing_file=testing_file,
                include_imbalances=True,
                as_pandas=True,
            )
        # derive best bid and offer
        self._best_bids = self._raw_data['midpoint'] - (
            self._raw_data['spread'] / 2)
        self._best_asks = self._raw_data['midpoint'] + (
            self._raw_data['spread'] / 2)

        self.max_steps = self._raw_data.shape[0] - self.action_repeats - 1

        # load indicators into the indicator manager
        self.tns = IndicatorManager()
        self.rsi = IndicatorManager()
        for window in INDICATOR_WINDOW:
            self.tns.add(
                ('tns_{}'.format(window), TnS(window=window, alpha=ema_alpha)))
            self.rsi.add(
                ('rsi_{}'.format(window), RSI(window=window, alpha=ema_alpha)))

        # buffer for appending lags
        self.data_buffer = deque(maxlen=self.window_size)

        # Index of specific data points used to generate the observation space
        features = self._raw_data.columns.tolist()
        self.best_bid_index = features.index('bids_distance_0')
        self.best_ask_index = features.index('asks_distance_0')
        self.notional_bid_index = features.index('bids_notional_0')
        self.notional_ask_index = features.index('asks_notional_0')
        self.buy_trade_index = features.index('buys')
        self.sell_trade_index = features.index('sells')

        # typecast all data sets to numpy
        self._raw_data = self._raw_data.to_numpy(dtype=np.float32)
        self._normalized_data = self._normalized_data.to_numpy(
            dtype=np.float32)
        self._midpoint_prices = self._midpoint_prices.to_numpy(
            dtype=np.float64)
        self._best_bids = self._best_bids.to_numpy(dtype=np.float32)
        self._best_asks = self._best_asks.to_numpy(dtype=np.float32)

        # rendering class
        self._render = TradingGraph(sym=self.symbol)

        # graph midpoint prices
        self._render.reset_render_data(
            y_vec=self._midpoint_prices[:np.shape(self._render.x_vec)[0]])

    @abstractmethod
    def map_action_to_broker(self, action: int) -> (float, float):
        """
        Translate agent's action into an order and submit order to broker.

        :param action: (int) agent's action for current step
        :return: (tuple) reward, pnl
        """
        return 0., 0.

    @abstractmethod
    def _create_position_features(self) -> np.ndarray:
        """
        Create agent space feature set reflecting the positions held in inventory.

        :return: (np.array) position features
        """
        return np.array([np.nan], dtype=np.float32)

    def _get_step_reward(self, step_pnl: float, step_penalty: float,
                         long_filled: bool, short_filled: bool) -> float:
        """
        Calculate current step reward using a reward function.

        :param step_pnl: PnL realized from an open position that's been closed in the
        current time step
        :param step_penalty: Penalty signal for agent to discourage erroneous actions
        :param long_filled: TRUE if open long limit order was filled in current time step
        :param short_filled: TRUE if open short limit order was filled in current time
        step
        :return: reward for current time step
        """
        reward = 0.

        if self.reward_type == 'default':
            reward += reward_types.default(
                inventory_count=self.broker.net_inventory_count,
                midpoint_change=self.midpoint_change) * 100. + step_penalty

        elif self.reward_type == 'default_with_fills':
            reward += reward_types.default_with_fills(
                inventory_count=self.broker.net_inventory_count,
                midpoint_change=self.midpoint_change,
                step_pnl=step_pnl) * 100. + step_penalty

        elif self.reward_type == 'asymmetrical':
            reward += reward_types.asymmetrical(
                inventory_count=self.broker.net_inventory_count,
                midpoint_change=self.midpoint_change,
                half_spread_pct=(self.midpoint / self.best_bid) - 1.,
                long_filled=long_filled,
                short_filled=short_filled,
                step_pnl=step_pnl,
                dampening=0.6) * 100. + step_penalty

        elif self.reward_type == 'realized_pnl':
            current_pnl = self.broker.realized_pnl
            reward += reward_types.realized_pnl(
                current_pnl=current_pnl,
                last_pnl=self.last_pnl) * 100. + step_penalty
            self.last_pnl = current_pnl

        elif self.reward_type == 'differential_sharpe_ratio':
            tmp_reward, self.A_t, self.B_t = reward_types.differential_sharpe_ratio(
                R_t=self.midpoint_change * self.broker.net_inventory_count,
                A_tm1=self.A_t,
                B_tm1=self.B_t)
            reward += tmp_reward + step_penalty

        elif self.reward_type == 'trade_completion':
            reward += reward_types.trade_completion(
                step_pnl=step_pnl,
                market_order_fee=MARKET_ORDER_FEE,
                profit_ratio=2.) + step_penalty

        else:  # Default implementation
            reward += reward_types.default(
                inventory_count=self.broker.net_inventory_count,
                midpoint_change=self.midpoint_change) * 100. + step_penalty

        return reward

    def step(self, action: int = 0) -> (np.ndarray, np.ndarray, bool, dict):
        """
        Step through environment with action.

        :param action: (int) action to take in environment
        :return: (tuple) observation, reward, is_done, and empty `dict`
        """
        for current_step in range(self.action_repeats):

            if self.done:
                self.reset()
                return self.observation, self.reward, self.done

            # reset the reward if there ARE action repeats
            if current_step == 0:
                self.reward = 0.
                step_action = action
            else:
                step_action = 0

            # Get current step's midpoint and change in midpoint price percentage
            self.midpoint = self._midpoint_prices[self.local_step_number]
            self.midpoint_change = (self.midpoint / self.last_midpoint) - 1.

            # Pass current time step bid/ask prices to broker to calculate PnL,
            # or if any open orders are to be filled
            self.best_bid, self.best_ask = self._get_nbbo()

            # verify the data integrity
            assert self.best_bid <= self.best_ask, (
                "Error: best bid is more expensive than the best Ask:"
                "\nBid = {}\nAsk = {}").format(self.best_bid, self.best_ask)

            # get buy and sell trade volume to use by indicators and 'broker' to
            # execute any open orders the agent has
            buy_volume = self._get_book_data(index=self.buy_trade_index)
            sell_volume = self._get_book_data(index=self.sell_trade_index)

            # Update indicators
            self.tns.step(buys=buy_volume, sells=sell_volume)
            self.rsi.step(price=self.midpoint)

            # Get PnL from any filled LIMIT orders, which is calculated by netting out
            # whatever open position the agent already has in FIFO order
            limit_pnl, long_filled, short_filled = self.broker.step_limit_order_pnl(
                bid_price=self.best_bid,
                ask_price=self.best_ask,
                buy_volume=buy_volume,
                sell_volume=sell_volume,
                step=self.local_step_number)

            # Get PnL from any filled MARKET orders AND action penalties for invalid
            # actions made by the agent for future discouragement
            action_penalty_reward, market_pnl = self.map_action_to_broker(
                action=step_action)
            step_pnl = limit_pnl + market_pnl
            self.step_reward = self._get_step_reward(
                step_pnl=step_pnl,
                step_penalty=action_penalty_reward,
                long_filled=long_filled,
                short_filled=short_filled)

            # Add current step's observation to the data buffer
            step_observation = self._get_step_observation(
                step_action=step_action)
            self.data_buffer.append(step_observation)

            # Store for visualization AFTER the episode
            self.viz.add_observation(obs=step_observation)
            self.viz.add(
                self.midpoint,  # arguments map to the column names in _init_
                int(long_filled),
                int(short_filled),
                self.broker.net_inventory_count,
                (self.broker.realized_pnl * 100) / self.max_position)

            self.reward += self.step_reward
            self.local_step_number += 1
            self.last_midpoint = self.midpoint

        self.observation = self._get_observation()

        if self.local_step_number > self.max_steps:
            self.done = True

            had_long_positions = 1 if self.broker.long_inventory_count > 0 else 0
            had_short_positions = 1 if self.broker.short_inventory_count > 0 else 0

            flatten_pnl = self.broker.flatten_inventory(
                bid_price=self.best_bid, ask_price=self.best_ask)
            self.reward += self._get_step_reward(step_pnl=flatten_pnl,
                                                 step_penalty=0.,
                                                 long_filled=False,
                                                 short_filled=False)

            # store for visualization after the episode
            self.viz.add(
                self.midpoint,  # arguments map to the column names in _init_
                had_long_positions,
                had_short_positions,
                self.broker.net_inventory_count,
                (self.broker.realized_pnl * 100) / self.max_position)

        # save rewards to derive cumulative reward
        self.episode_stats.reward += self.reward

        return self.observation, self.reward, self.done, {}

    def reset(self) -> np.ndarray:
        """
        Reset the environment.

        :return: (np.array) Observation at first step
        """
        if self.training:
            self.local_step_number = self._random_state.randint(
                low=0, high=self.max_steps // 5)
        else:
            self.local_step_number = 0

        # print out episode statistics if there was any activity by the agent
        if self.broker.total_trade_count > 0 or self.broker.realized_pnl != 0.:
            self.episode_stats.number_of_episodes += 1
            print(('-' * 25),
                  '{}-{} {} EPISODE RESET'.format(self.symbol, self._seed,
                                                  self.reward_type.upper()),
                  ('-' * 25))
            print('Episode Reward: {:.4f}'.format(self.episode_stats.reward))
            print('Episode PnL: {:.2f}%'.format(
                (self.broker.realized_pnl / self.max_position) * 100.))
            print('Trade Count: {}'.format(self.broker.total_trade_count))
            print('Average PnL per Trade: {:.4f}%'.format(
                self.broker.average_trade_pnl * 100.))
            print('Total # of episodes: {}'.format(
                self.episode_stats.number_of_episodes))
            print('\n'.join([
                '{}\t=\t{}'.format(k, v)
                for k, v in self.broker.get_statistics().items()
            ]))
            print('First step:\t{}'.format(self.local_step_number))
            print(('=' * 75))
        else:
            print('Resetting environment #{} on episode #{}.'.format(
                self._seed, self.episode_stats.number_of_episodes))

        self.A_t, self.B_t = 0., 0.
        self.reward = 0.0
        self.done = False
        self.broker.reset()
        self.data_buffer.clear()
        self.episode_stats.reset()
        self.rsi.reset()
        self.tns.reset()
        self.viz.reset()

        for step in range(self.window_size + INDICATOR_WINDOW_MAX + 1):
            self.midpoint = self._midpoint_prices[self.local_step_number]

            if self.last_midpoint is None:
                self.last_midpoint = self.midpoint

            self.midpoint_change = (self.midpoint / self.last_midpoint) - 1.
            self.best_bid, self.best_ask = self._get_nbbo()
            step_buy_volume = self._get_book_data(index=self.buy_trade_index)
            step_sell_volume = self._get_book_data(index=self.sell_trade_index)
            self.tns.step(buys=step_buy_volume, sells=step_sell_volume)
            self.rsi.step(price=self.midpoint)

            # Add current step's observation to the data buffer
            step_observation = self._get_step_observation(step_action=0)
            self.data_buffer.append(step_observation)

            self.local_step_number += 1
            self.last_midpoint = self.midpoint

        self.observation = self._get_observation()

        return self.observation

    def render(self, mode: str = 'human') -> None:
        """
        Render midpoint prices.

        :param mode: (str) flag for type of rendering. Only 'human' supported.
        :return: (void)
        """
        self._render.render(midpoint=self.midpoint, mode=mode)

    def close(self) -> None:
        """
        Free clear memory when closing environment.

        :return: (void)
        """
        self.broker.reset()
        self.data_buffer.clear()
        self.episode_stats = None
        self._raw_data = None
        self._normalized_data = None
        self._midpoint_prices = None
        self.tns = None
        self.rsi = None

    def seed(self, seed: int = 1) -> list:
        """
        Set random seed in environment.

        :param seed: (int) random seed number
        :return: (list) seed number in a list
        """
        self._random_state = np.random.RandomState(seed=seed)
        self._seed = seed
        return [seed]

    def _get_nbbo(self) -> (float, float):
        """
        Get best bid and offer.

        :return: (tuple) best bid and offer
        """
        best_bid = self._best_bids[self.local_step_number]
        best_ask = self._best_asks[self.local_step_number]
        return best_bid, best_ask

    def _get_book_data(self, index: int = 0) -> np.ndarray or float:
        """
        Return step 'n' of order book snapshot data.

        :param index: (int) step 'n' to look up in order book snapshot history
        :return: (np.array) order book snapshot vector
        """
        return self._raw_data[self.local_step_number][index]

    @staticmethod
    def _process_data(observation: np.ndarray) -> np.ndarray:
        """
        Reshape observation for function approximator.

        :param observation: observation space
        :return: (np.array) clipped observation space
        """
        return np.clip(observation, -10., 10.)

    def _create_action_features(self, action: int) -> np.ndarray:
        """
        Create a features array for the current time step's action.

        :param action: (int) action number
        :return: (np.array) One-hot of current action
        """
        return self.actions[action]

    def _create_indicator_features(self) -> np.ndarray:
        """
        Create features vector with environment indicators.

        :return: (np.array) Indicator values for current time step
        """
        return np.array((*self.tns.get_value(), *self.rsi.get_value()),
                        dtype=np.float32).reshape(1, -1)

    def _get_step_observation(self, step_action: int = 0) -> np.ndarray:
        """
        Current step observation, NOT including historical data.

        :param step_action: (int) current step action
        :return: (np.array) Current step observation
        """
        step_position_features = self._create_position_features()
        step_action_features = self._create_action_features(action=step_action)
        step_indicator_features = self._create_indicator_features()
        step_environment_observation = self._normalized_data[
            self.local_step_number]
        observation = np.concatenate(
            (step_environment_observation, step_indicator_features,
             step_position_features, step_action_features, self.step_reward),
            axis=None)
        return self._process_data(observation)

    def _get_observation(self) -> np.ndarray:
        """
        Current step observation, including historical data.

        If format_3d is TRUE: Expand the observation space from 2 to 3 dimensions.
        (note: This is necessary for conv nets in Baselines.)

        :return: (np.array) Observation state for current time step
        """
        # Note: reversing the data to chronological order is actually faster when
        # making an array in Python / Numpy, which is odd. #timeit
        observation = np.asarray(self.data_buffer, dtype=np.float32)
        if self.format_3d:
            observation = np.expand_dims(observation, axis=-1)
        return observation

    def get_trade_history(self) -> pd.DataFrame:
        """
        Get DataFrame with trades from most recent episode.

        :return: midpoint prices, and buy & sell trades
        """
        return self.viz.to_df()

    def plot_trade_history(self, save_filename: str or None = None) -> None:
        """
        Plot history from back-test with trade executions, total inventory, and PnL.

        :param save_filename: filename for saving the image
        """
        self.viz.plot(save_filename=save_filename)

    def plot_observation_history(self) -> None:
        """
        Plot observation space as an image.
        """
        return self.viz.plot_obs()
    def __init__(self,
                 symbol: str,
                 fitting_file: str,
                 testing_file: str,
                 max_position: int = 10,
                 window_size: int = 100,
                 seed: int = 1,
                 action_repeats: int = 5,
                 training: bool = True,
                 format_3d: bool = False,
                 reward_type: str = 'default',
                 transaction_fee: bool = True,
                 ema_alpha: list or float or None = EMA_ALPHA):
        """
        Base class for creating environments extending OpenAI's GYM framework.

        :param symbol: currency pair to trade / experiment
        :param fitting_file: prior trading day (e.g., T-1)
        :param testing_file: current trading day (e.g., T)
        :param max_position: maximum number of positions able to hold in inventory
        :param window_size: number of lags to include in observation space
        :param seed: random seed number
        :param action_repeats: number of steps to take in environment after a given action
        :param training: if TRUE, then randomize starting point in environment
        :param format_3d: if TRUE, reshape observation space from matrix to tensor
        :param reward_type: method for calculating the environment's reward:
            1) 'default' --> inventory count * change in midpoint price returns
            2) 'default_with_fills' --> inventory count * change in midpoint price returns
                + closed trade PnL
            3) 'realized_pnl' --> change in realized pnl between time steps
            4) 'differential_sharpe_ratio' -->
        http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.1.7210&rep=rep1&type=pdf
            5) 'asymmetrical' --> extended version of *default* and enhanced with a
                    reward for being filled above or below midpoint, and returns only
                    negative rewards for Unrealized PnL to discourage long-term
                    speculation.
            6) 'trade_completion' --> reward is generated per trade's round trip

        :param ema_alpha: decay factor for EMA, usually between 0.9 and 0.9999; if NONE,
            raw values are returned in place of smoothed values
        """
        assert reward_type in VALID_REWARD_TYPES, \
            'Error: {} is not a valid reward type. Value must be in:\n{}'.format(
                reward_type, VALID_REWARD_TYPES)

        self.viz = Visualize(
            columns=['midpoint', 'buys', 'sells', 'inventory', 'realized_pnl'],
            store_historical_observations=True)

        # get Broker class to keep track of PnL and orders
        self.broker = Broker(max_position=max_position,
                             transaction_fee=transaction_fee)

        # properties required for instantiation
        self.symbol = symbol
        self.action_repeats = action_repeats
        self._seed = seed
        self._random_state = np.random.RandomState(seed=self._seed)
        self.training = training
        self.max_position = max_position
        self.window_size = window_size
        self.reward_type = reward_type
        self.format_3d = format_3d  # e.g., [window, features, *NEW_AXIS*]
        self.testing_file = testing_file

        # properties that get reset()
        self.reward = np.array([0.0], dtype=np.float32)
        self.step_reward = np.array([0.0], dtype=np.float32)
        self.done = False
        self.local_step_number = 0
        self.midpoint = 0.0
        self.observation = None
        self.action = 0
        self.last_pnl = 0.
        self.last_midpoint = None
        self.midpoint_change = None
        self.A_t, self.B_t = 0., 0.  # variables for Differential Sharpe Ratio
        self.episode_stats = ExperimentStatistics()
        self.best_bid = self.best_ask = None

        # properties to override in sub-classes
        self.actions = None
        self.action_space = None
        self.observation_space = None

        # get historical data for simulations
        self.data_pipeline = DataPipeline(alpha=ema_alpha)

        # three different data sets, for different purposes:
        #   1) midpoint_prices - midpoint prices that have not been transformed
        #   2) raw_data - raw limit order book data, not including imbalances
        #   3) normalized_data - z-scored limit order book and order flow imbalance
        #       data, also midpoint price feature is replace by midpoint log price change
        self._midpoint_prices, self._raw_data, self._normalized_data = \
            self.data_pipeline.load_environment_data(
                fitting_file=fitting_file,
                testing_file=testing_file,
                include_imbalances=True,
                as_pandas=True,
            )
        # derive best bid and offer
        self._best_bids = self._raw_data['midpoint'] - (
            self._raw_data['spread'] / 2)
        self._best_asks = self._raw_data['midpoint'] + (
            self._raw_data['spread'] / 2)

        self.max_steps = self._raw_data.shape[0] - self.action_repeats - 1

        # load indicators into the indicator manager
        self.tns = IndicatorManager()
        self.rsi = IndicatorManager()
        for window in INDICATOR_WINDOW:
            self.tns.add(
                ('tns_{}'.format(window), TnS(window=window, alpha=ema_alpha)))
            self.rsi.add(
                ('rsi_{}'.format(window), RSI(window=window, alpha=ema_alpha)))

        # buffer for appending lags
        self.data_buffer = deque(maxlen=self.window_size)

        # Index of specific data points used to generate the observation space
        features = self._raw_data.columns.tolist()
        self.best_bid_index = features.index('bids_distance_0')
        self.best_ask_index = features.index('asks_distance_0')
        self.notional_bid_index = features.index('bids_notional_0')
        self.notional_ask_index = features.index('asks_notional_0')
        self.buy_trade_index = features.index('buys')
        self.sell_trade_index = features.index('sells')

        # typecast all data sets to numpy
        self._raw_data = self._raw_data.to_numpy(dtype=np.float32)
        self._normalized_data = self._normalized_data.to_numpy(
            dtype=np.float32)
        self._midpoint_prices = self._midpoint_prices.to_numpy(
            dtype=np.float64)
        self._best_bids = self._best_bids.to_numpy(dtype=np.float32)
        self._best_asks = self._best_asks.to_numpy(dtype=np.float32)

        # rendering class
        self._render = TradingGraph(sym=self.symbol)

        # graph midpoint prices
        self._render.reset_render_data(
            y_vec=self._midpoint_prices[:np.shape(self._render.x_vec)[0]])
Beispiel #18
0
    def test_lob_queuing(self):
        test_position = Broker()

        # perform a partial fill on the first order
        step = 0
        bid_price = 102.
        ask_price = 103.
        buy_volume = 500
        sell_volume = 500
        queue_ahead = 800

        order_open = LimitOrder(ccy='BTC-USD',
                                side='long',
                                price=bid_price,
                                step=step,
                                queue_ahead=queue_ahead)
        test_position.add(order=order_open)

        step += 1
        pnl, is_long_order_filled, is_short_order_filled = \
            test_position.step_limit_order_pnl(
                bid_price=bid_price, ask_price=ask_price, buy_volume=buy_volume,
                sell_volume=sell_volume, step=step)
        pnl += pnl

        print("#1 long_inventory.order = \n{}".format(
            test_position.long_inventory.order))
        self.assertEqual(300, test_position.long_inventory.order.queue_ahead)
        self.assertEqual(0, test_position.long_inventory.order.executed)
        self.assertEqual(0, test_position.long_inventory_count)

        step += 1
        pnl, is_long_order_filled, is_short_order_filled = \
            test_position.step_limit_order_pnl(
                bid_price=bid_price, ask_price=ask_price, buy_volume=buy_volume,
                sell_volume=sell_volume, step=step)
        pnl += pnl

        print("#2 long_inventory.order = \n{}".format(
            test_position.long_inventory.order))
        self.assertEqual(200, test_position.long_inventory.order.executed)
        self.assertEqual(0, test_position.long_inventory_count)

        # if order gets filled with a bid below the order's price, the order should NOT
        # receive any price improvement during the execution.
        bid_price = 100.
        ask_price = 102.
        order_open = LimitOrder(ccy='BTC-USD',
                                side='long',
                                price=bid_price,
                                step=step,
                                queue_ahead=queue_ahead)
        test_position.add(order=order_open)
        print("#3 long_inventory.order = \n{}".format(
            test_position.long_inventory.order))
        self.assertEqual(0, test_position.long_inventory_count)

        bid_price = 100.
        for i in range(5):
            step += 1
            pnl, is_long_order_filled, is_short_order_filled = \
                test_position.step_limit_order_pnl(
                    bid_price=bid_price, ask_price=ask_price, buy_volume=buy_volume,
                    sell_volume=sell_volume, step=step)
            pnl += pnl

        self.assertEqual(1, test_position.long_inventory_count)
        self.assertEqual(100.40,
                         round(test_position.long_inventory.average_price, 2))
        print("PnL: {}".format(pnl))