def __init__(self, transaction_fee=LIMIT_ORDER_FEE, **kwargs): """ Environment designed for automated market making. :param kwargs: refer to BaseEnvironment.py """ super(MarketMaker, self).__init__(**kwargs) # environment attributes to override in sub-class self.actions = np.eye(17, dtype=np.float32) # get Broker class to keep track of PnL and orders self.broker = Broker(max_position=self.max_position, transaction_fee=transaction_fee) self.action_space = spaces.Discrete(len(self.actions)) self.reset() # reset to load observation.shape self.observation_space = spaces.Box(low=-10, high=10, shape=self.observation.shape, dtype=np.float32) print( '{} MarketMaker #{} instantiated\nobservation_space: {}'.format( self.sym, self._seed, self.observation_space.shape), 'reward_type = {}'.format(self.reward_type))
def test_case_one(self): print('\nTest_Case_One') test_position = Broker() midpoint = 100. fee = .003 order_open = Order(ccy='BTC-USD', side='long', price=midpoint, step=1) test_position.add(order=order_open) self.assertEqual(1, test_position.long_inventory.position_count) print('LONG Unrealized_pnl: %f' % test_position.long_inventory.get_unrealized_pnl()) self.assertEqual(0, test_position.short_inventory.position_count) self.assertEqual(0., test_position.short_inventory.get_unrealized_pnl()) order_close = Order(ccy='BTC-USD', side='long', price=midpoint + (midpoint * fee * 5), step=100) test_position.remove(order=order_close) self.assertEqual(0, test_position.long_inventory.position_count) print('LONG Unrealized_pnl: %f' % test_position.long_inventory.get_unrealized_pnl()) self.assertEqual(test_position.short_inventory.position_count, 0) self.assertEqual(test_position.short_inventory.get_unrealized_pnl(), 0.) print('LONG Realized_pnl: %f' % test_position.get_realized_pnl())
def test_case_two(self): print('\nTest_Case_Two') test_position = Broker() midpoint = 100. fee = .003 order_open = Order(ccy='BTC-USD', side='short', price=midpoint, step=1) test_position.add(order=order_open) self.assertEqual(1, test_position.short_inventory.position_count) self.assertEqual(0, test_position.long_inventory.position_count) self.assertEqual(0., test_position.long_inventory.get_unrealized_pnl()) print('SHORT Unrealized_pnl: %f' % test_position.short_inventory.get_unrealized_pnl()) order_close = Order(ccy='BTC-USD', side='short', price=midpoint - (midpoint * fee * 15), step=100) test_position.remove(order=order_close) self.assertEqual(0, test_position.short_inventory.position_count) self.assertEqual(0, test_position.long_inventory.position_count) self.assertEqual(0., test_position.long_inventory.get_unrealized_pnl()) print('SHORT Unrealized_pnl: %f' % test_position.short_inventory.get_unrealized_pnl()) print('SHORT Realized_pnl: %f' % test_position.get_realized_pnl())
def __init__(self, *, fitting_file='LTC-USD_2019-04-07.csv.xz', testing_file='LTC-USD_2019-04-08.csv.xz', step_size=1, max_position=5, window_size=10, seed=1, action_repeats=10, training=True, format_3d=False, z_score=True, reward_type='trade_completion', scale_rewards=True): super(PriceJump, self).__init__(fitting_file=fitting_file, testing_file=testing_file, step_size=step_size, max_position=max_position, window_size=window_size, seed=seed, action_repeats=action_repeats, training=training, format_3d=format_3d, z_score=z_score, reward_type=reward_type, scale_rewards=scale_rewards) self.actions = np.eye(3, dtype=np.float32) # get Broker class to keep track of PnL and orders self.broker = Broker(max_position=max_position, transaction_fee=MARKET_ORDER_FEE) self.action_space = spaces.Discrete(len(self.actions)) self.reset() # reset to load observation.shape self.observation_space = spaces.Box(low=-10, high=10, shape=self.observation.shape, dtype=np.float32) print( '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}' .format(self.sym, self._seed, self.observation_space.shape))
def test_case_three(self): print('\nTest_Case_Three') test_position = Broker(5) midpoint = 100. for i in range(10): order_open = MarketOrder(ccy='BTC-USD', side='long', price=midpoint - i, step=i) test_position.add(order=order_open) self.assertEqual(5, test_position.long_inventory.position_count) self.assertEqual(0, test_position.short_inventory.position_count) print('Confirm we have 5 positions: %i' % test_position.long_inventory.position_count) for i in range(10): order_open = MarketOrder(ccy='BTC-USD', side='long', price=midpoint + i, step=i) test_position.remove(order=order_open) self.assertEqual(0, test_position.long_inventory.position_count) self.assertEqual(0, test_position.short_inventory.position_count)
def test_avg_exe(self): test_position = Broker() # perform a partial fill on the first order step = 0 bid_price = 101. ask_price = 102. buy_volume = 500 sell_volume = 500 pnl = 0. test_position.add(order=LimitOrder(ccy='BTC-USD', side='long', price=bid_price, step=step, queue_ahead=0)) print("taking first step...") step += 1 pnl += test_position.step_limit_order_pnl(bid_price=bid_price, ask_price=ask_price, buy_volume=buy_volume, sell_volume=sell_volume, step=step) self.assertEqual(500, test_position.long_inventory.order.executed) self.assertEqual(0, test_position.long_inventory_count) # if order gets filled with a bid below the order's price, the order should NOT # receive any price improvement during the execution. bid_price = 99. ask_price = 100. test_position.add(order=LimitOrder(ccy='BTC-USD', side='long', price=bid_price, step=step, queue_ahead=0)) print("taking second step...") step += 1 pnl += test_position.step_limit_order_pnl(bid_price=bid_price, ask_price=ask_price, buy_volume=buy_volume, sell_volume=sell_volume, step=step) self.assertEqual(1, test_position.long_inventory_count) self.assertEqual(100., test_position.long_inventory.average_price) print("PnL: {}".format(pnl))
def test_long_pnl(self): test_position = Broker() step = 0 bid_price = 101. ask_price = 102. buy_volume = 100 sell_volume = 100 pnl = 0. def walk_forward(pnl, step, bid_price, ask_price, buy_volume, sell_volume, down=True): for i in range(50): step += 1 if down: bid_price *= 0.99 ask_price *= 0.99 else: bid_price *= 1.01 ask_price *= 1.01 pnl, is_long_order_filled, is_short_order_filled = \ test_position.step_limit_order_pnl( bid_price=bid_price, ask_price=ask_price, buy_volume=buy_volume, sell_volume=sell_volume, step=step) pnl += pnl if i % 10 == 0: print('bid_price={:.2f} | ask_price={:.2f}'.format(bid_price, ask_price)) return step, bid_price, ask_price, buy_volume, sell_volume, pnl test_position.add( order=LimitOrder(ccy='BTC-USD', side='long', price=100., step=step, queue_ahead=1000)) step, _, _, buy_volume, sell_volume, pnl = walk_forward(pnl, step, bid_price, ask_price, buy_volume, sell_volume, down=True) self.assertEqual(1, test_position.long_inventory_count) test_position.add( order=LimitOrder(ccy='BTC-USD', side='short', price=105., step=step, queue_ahead=0)) _, _, _, _, _, pnl = walk_forward(pnl, step, bid_price, ask_price, buy_volume, sell_volume, down=False) realized_pnl = test_position.realized_pnl self.assertEqual(0.05, realized_pnl, "Expected Realized PnL of 0.5 and got {}".format(realized_pnl)) self.assertEqual(0, test_position.short_inventory_count + test_position.long_inventory_count) print("PnL: {}".format(pnl))
class PriceJump(BaseEnvironment): id = 'long-short-v0' def __init__(self, *, fitting_file='LTC-USD_2019-04-07.csv.xz', testing_file='LTC-USD_2019-04-08.csv.xz', step_size=1, max_position=5, window_size=10, seed=1, action_repeats=10, training=True, format_3d=False, z_score=True, reward_type='trade_completion', scale_rewards=True): super(PriceJump, self).__init__(fitting_file=fitting_file, testing_file=testing_file, step_size=step_size, max_position=max_position, window_size=window_size, seed=seed, action_repeats=action_repeats, training=training, format_3d=format_3d, z_score=z_score, reward_type=reward_type, scale_rewards=scale_rewards) self.actions = np.eye(3, dtype=np.float32) # get Broker class to keep track of PnL and orders self.broker = Broker(max_position=max_position, transaction_fee=MARKET_ORDER_FEE) self.action_space = spaces.Discrete(len(self.actions)) self.reset() # reset to load observation.shape self.observation_space = spaces.Box(low=-10, high=10, shape=self.observation.shape, dtype=np.float32) print( '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}' .format(self.sym, self._seed, self.observation_space.shape)) def __str__(self): return '{} | {}-{}'.format(PriceJump.id, self.sym, self._seed) def map_action_to_broker(self, action: int): """ Create or adjust orders per a specified action and adjust for penalties. :param action: (int) current step's action :return: (float) reward """ reward = pnl = 0.0 discouragement = 0.000000000001 if action == 0: # do nothing reward += discouragement elif action == 1: # buy if self.broker.short_inventory_count > 0: order = MarketOrder(ccy=self.sym, side='short', price=self.midpoint, step=self.local_step_number) pnl += self.broker.remove(order=order) elif self.broker.long_inventory_count >= 0: order = MarketOrder( ccy=self.sym, side='long', price=self.midpoint, # price_fee_adjusted, step=self.local_step_number) if self.broker.add(order=order) is False: reward -= discouragement else: print( ('gym_trading.get_reward() ' + 'Error for action #{} - ' + 'unable to place an order with broker').format(action)) elif action == 2: # sell if self.broker.long_inventory_count > 0: order = MarketOrder(ccy=self.sym, side='long', price=self.midpoint, step=self.local_step_number) pnl += self.broker.remove(order=order) elif self.broker.short_inventory_count >= 0: order = MarketOrder(ccy=self.sym, side='short', price=self.midpoint, step=self.local_step_number) if self.broker.add(order=order) is False: reward -= discouragement else: print( ('gym_trading.get_reward() ' + 'Error for action #{} - ' + 'unable to place an order with broker').format(action)) else: print(('Unknown action to take in get_reward(): ' + 'action={} | midpoint={}').format(action, self.midpoint)) return reward, pnl def _create_position_features(self): """ Create an array with features related to the agent's inventory :return: (np.array) normalized position features """ return np.array( (self.broker.long_inventory.position_count / self.max_position, self.broker.short_inventory.position_count / self.max_position, self.broker.realized_pnl / self.broker.reward_scale, self.broker.get_unrealized_pnl(self.best_bid, self.best_ask) / self.broker.reward_scale), dtype=np.float32)
class MarketMaker(BaseEnvironment): id = 'market-maker-v0' def __init__(self, transaction_fee=LIMIT_ORDER_FEE, **kwargs): """ Environment designed for automated market making. :param kwargs: refer to BaseEnvironment.py """ super(MarketMaker, self).__init__(**kwargs) # environment attributes to override in sub-class self.actions = np.eye(17, dtype=np.float32) # get Broker class to keep track of PnL and orders self.broker = Broker(max_position=self.max_position, transaction_fee=transaction_fee) self.action_space = spaces.Discrete(len(self.actions)) self.reset() # reset to load observation.shape self.observation_space = spaces.Box(low=-10, high=10, shape=self.observation.shape, dtype=np.float32) print( '{} MarketMaker #{} instantiated\nobservation_space: {}'.format( self.sym, self._seed, self.observation_space.shape), 'reward_type = {}'.format(self.reward_type)) def __str__(self): return '{} | {}-{}'.format(MarketMaker.id, self.sym, self._seed) def map_action_to_broker(self, action: int): """ Create or adjust orders per a specified action and adjust for penalties. :param action: (int) current step's action :return: (float) reward """ reward = pnl = 0.0 discouragement = 0.000000000001 if action == 0: # do nothing reward += discouragement elif action == 1: reward += self._create_order_at_level(reward, discouragement, level=0, side='long') reward += self._create_order_at_level(reward, discouragement, level=4, side='short') elif action == 2: reward += self._create_order_at_level(reward, discouragement, level=0, side='long') reward += self._create_order_at_level(reward, discouragement, level=9, side='short') elif action == 3: reward += self._create_order_at_level(reward, discouragement, level=0, side='long') reward += self._create_order_at_level(reward, discouragement, level=14, side='short') elif action == 4: reward += self._create_order_at_level(reward, discouragement, level=4, side='long') reward += self._create_order_at_level(reward, discouragement, level=0, side='short') elif action == 5: reward += self._create_order_at_level(reward, discouragement, level=4, side='long') reward += self._create_order_at_level(reward, discouragement, level=4, side='short') elif action == 6: reward += self._create_order_at_level(reward, discouragement, level=4, side='long') reward += self._create_order_at_level(reward, discouragement, level=9, side='short') elif action == 7: reward += self._create_order_at_level(reward, discouragement, level=4, side='long') reward += self._create_order_at_level(reward, discouragement, level=14, side='short') elif action == 8: reward += self._create_order_at_level(reward, discouragement, level=9, side='long') reward += self._create_order_at_level(reward, discouragement, level=0, side='short') elif action == 9: reward += self._create_order_at_level(reward, discouragement, level=9, side='long') reward += self._create_order_at_level(reward, discouragement, level=4, side='short') elif action == 10: reward += self._create_order_at_level(reward, discouragement, level=9, side='long') reward += self._create_order_at_level(reward, discouragement, level=9, side='short') elif action == 11: reward += self._create_order_at_level(reward, discouragement, level=9, side='long') reward += self._create_order_at_level(reward, discouragement, level=14, side='short') elif action == 12: reward += self._create_order_at_level(reward, discouragement, level=14, side='long') reward += self._create_order_at_level(reward, discouragement, level=0, side='short') elif action == 13: reward += self._create_order_at_level(reward, discouragement, level=14, side='long') reward += self._create_order_at_level(reward, discouragement, level=4, side='short') elif action == 14: reward += self._create_order_at_level(reward, discouragement, level=14, side='long') reward += self._create_order_at_level(reward, discouragement, level=9, side='short') elif action == 15: reward += self._create_order_at_level(reward, discouragement, level=14, side='long') reward += self._create_order_at_level(reward, discouragement, level=14, side='short') elif action == 16: reward += self.broker.flatten_inventory(self.best_bid, self.best_ask) else: print("L'action n'exist pas ! Il faut faire attention !!!") return reward, pnl def _create_position_features(self): """ Create an array with features related to the agent's inventory :return: (np.array) normalized position features """ return np.array( (self.broker.long_inventory.position_count / self.max_position, self.broker.short_inventory.position_count / self.max_position, self.broker.get_total_pnl(self.best_bid, self.best_ask) * self.broker.reward_scale, self.broker.long_inventory.get_unrealized_pnl(self.best_bid) * self.broker.reward_scale, self.broker.short_inventory.get_unrealized_pnl(self.best_ask) * self.broker.reward_scale, self.broker.get_long_order_distance_to_midpoint( midpoint=self.midpoint) * self.broker.reward_scale, self.broker.get_short_order_distance_to_midpoint( midpoint=self.midpoint) * self.broker.reward_scale, *self.broker.get_queues_ahead_features()), dtype=np.float32) def _create_order_at_level(self, reward: float, discouragement: float, level=0, side='long'): """ Create a new order at a specified LOB level :param reward: (float) current step reward :param discouragement: (float) penalty deducted from reward for erroneous actions :param level: (int) level in the limit order book :param side: (str) direction of trade e.g., 'long' or 'short' :return: (float) reward with penalties added """ adjustment = 1 if level > 0 else 0 if side == 'long': best = self._get_book_data(MarketMaker.best_bid_index - level) denormalized_best = round(self.midpoint * (best + 1), 2) inside_best = self._get_book_data(MarketMaker.best_bid_index - level + adjustment) denormalized_inside_best = round(self.midpoint * (inside_best + 1), 2) plus_one = denormalized_best + 0.01 if denormalized_inside_best == plus_one: # stick to best bid bid_price = denormalized_best # since LOB is rendered as cumulative notional, deduct the prior price # level to derive the notional value of orders ahead in the queue bid_queue_ahead = self._get_book_data( MarketMaker.notional_bid_index - level) - self._get_book_data( MarketMaker.notional_bid_index - level + adjustment) else: # insert a cent ahead to jump a queue bid_price = plus_one bid_queue_ahead = 0. bid_order = LimitOrder(ccy=self.sym, side='long', price=bid_price, step=self.local_step_number, queue_ahead=bid_queue_ahead) if self.broker.add(order=bid_order) is False: reward -= discouragement else: reward += discouragement if side == 'short': best = self._get_book_data(MarketMaker.best_ask_index + level) denormalized_best = round(self.midpoint * (best + 1), 2) inside_best = self._get_book_data(MarketMaker.best_ask_index + level - adjustment) denormalized_inside_best = round(self.midpoint * (inside_best + 1), 2) plus_one = denormalized_best - 0.01 if denormalized_inside_best == plus_one: ask_price = denormalized_best # since LOB is rendered as cumulative notional, deduct the prior price # level to derive the notional value of orders ahead in the queue ask_queue_ahead = self._get_book_data( MarketMaker.notional_ask_index + level) - self._get_book_data( MarketMaker.notional_ask_index + level - adjustment) else: ask_price = plus_one ask_queue_ahead = 0. ask_order = LimitOrder(ccy=self.sym, side='short', price=ask_price, step=self.local_step_number, queue_ahead=ask_queue_ahead) if self.broker.add(order=ask_order) is False: reward -= discouragement else: reward += discouragement return reward
def test_queues_ahead_features(self): test_position = Broker() # perform a partial fill on the first order step = 0 bid_price = 100. ask_price = 200. buy_volume = 0 sell_volume = 0 order_open_long = LimitOrder(ccy='BTC-USD', side='long', price=bid_price, step=step, queue_ahead=0) order_open_short = LimitOrder(ccy='BTC-USD', side='short', price=ask_price, step=step, queue_ahead=2000) print('opening long position = {}'.format(order_open_long)) test_position.add(order=order_open_long) print('opening short position = {}'.format(order_open_short)) test_position.add(order=order_open_short) print('\ntaking first step...') step += 1 pnl, is_long_order_filled, is_short_order_filled = \ test_position.step_limit_order_pnl( bid_price=bid_price, ask_price=ask_price, buy_volume=buy_volume, sell_volume=sell_volume, step=step) pnl += pnl print("#1 long_inventory.order = \n{}".format( test_position.long_inventory.order)) print("#1 short_inventory.order = \n{}".format( test_position.short_inventory.order)) bid_queue, ask_queue = test_position.get_queues_ahead_features() print("#1 get_queues_ahead_features:\nbid_queue={} || ask_queue={}". format(bid_queue, ask_queue)) self.assertEqual(0., bid_queue) self.assertEqual(-0.67, round(ask_queue, 2)) print('\ntaking second step...') buy_volume = 500 sell_volume = 500 step += 1 pnl, is_long_order_filled, is_short_order_filled = \ test_position.step_limit_order_pnl( bid_price=bid_price, ask_price=ask_price, buy_volume=buy_volume, sell_volume=sell_volume, step=step) pnl += pnl print("#2 long_inventory.order = \n{}".format( test_position.long_inventory.order)) print("#2 short_inventory.order = \n{}".format( test_position.short_inventory.order)) bid_queue, ask_queue = test_position.get_queues_ahead_features() print("#2 get_queues_ahead_features:\nbid_queue={} || ask_queue={}". format(bid_queue, ask_queue)) self.assertEqual(0.5, bid_queue) self.assertEqual(-0.6, round(ask_queue, 2)) print('\ntaking third step...') buy_volume = 500 sell_volume = 499 step += 1 pnl, is_long_order_filled, is_short_order_filled = \ test_position.step_limit_order_pnl( bid_price=bid_price, ask_price=ask_price, buy_volume=buy_volume, sell_volume=sell_volume, step=step) pnl += pnl print("#3 long_inventory.order = \n{}".format( test_position.long_inventory.order)) print("#3 short_inventory.order = \n{}".format( test_position.short_inventory.order)) bid_queue, ask_queue = test_position.get_queues_ahead_features() print("#3 get_queues_ahead_features:\nbid_queue={} || ask_queue={}". format(bid_queue, ask_queue)) self.assertEqual(0.999, bid_queue) self.assertEqual(-0.5, round(ask_queue, 2)) print('\ntaking fourth step...') buy_volume = 500 sell_volume = 500 step += 1 pnl, is_long_order_filled, is_short_order_filled = \ test_position.step_limit_order_pnl( bid_price=bid_price, ask_price=ask_price, buy_volume=buy_volume, sell_volume=sell_volume, step=step) pnl += pnl print("#4 long_inventory.order = \n{}".format( test_position.long_inventory.order)) print("#4 short_inventory.order = \n{}".format( test_position.short_inventory.order)) bid_queue, ask_queue = test_position.get_queues_ahead_features() print("#4 get_queues_ahead_features:\nbid_queue={} || ask_queue={}". format(bid_queue, ask_queue)) self.assertEqual(0.0, bid_queue) self.assertEqual(-0.33, round(ask_queue, 2)) print("PnL: {}".format(pnl))
class PriceJump(Env): metadata = {'render.modes': ['human']} id = 'long-short-v0' # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True) features = Sim.get_feature_labels(include_system_time=False, include_bitfinex=False) best_bid_index = features.index('coinbase_bid_distance_0') best_ask_index = features.index('coinbase_ask_distance_0') notional_bid_index = features.index('coinbase_bid_notional_0') notional_ask_index = features.index('coinbase_ask_notional_0') buy_trade_index = features.index('coinbase_buys') sell_trade_index = features.index('coinbase_sells') target_pnl = 0.03 # 3.0% gain per episode (i.e., day) fee = MARKET_ORDER_FEE def __init__(self, *, fitting_file='LTC-USD_2019-04-07.csv.xz', testing_file='LTC-USD_2019-04-08.csv.xz', step_size=1, max_position=5, window_size=10, seed=1, action_repeats=10, training=True, format_3d=False, z_score=True): # properties required for instantiation self.action_repeats = action_repeats self._seed = seed self._random_state = np.random.RandomState(seed=self._seed) self.training = training self.step_size = step_size self.max_position = max_position self.window_size = window_size self.format_3d = format_3d # e.g., [window, features, *NEW_AXIS*] self.action = 0 # derive gym.env properties self.actions = np.eye(3, dtype=np.float32) self.sym = testing_file[:7] # slice the CCY from the filename # properties that get reset() self.reward = 0.0 self.done = False self.local_step_number = 0 self.midpoint = 0.0 self.observation = None # get Broker class to keep track of PnL and orders self.broker = Broker(max_position=max_position) # get historical data for simulations self.sim = Sim(use_arctic=False, z_score=z_score) self.prices_, self.data, self.normalized_data = self.sim.load_environment_data( fitting_file, testing_file) self.max_steps = self.data.shape[0] - self.step_size * \ self.action_repeats - 1 # load indicators into the indicator manager self.tns = IndicatorManager() self.rsi = IndicatorManager() for window in INDICATOR_WINDOW: self.tns.add(('tns_{}'.format(window), TnS(window=window))) self.rsi.add(('rsi_{}'.format(window), RSI(window=window))) # rendering class self._render = TradingGraph(sym=self.sym) # graph midpoint prices self._render.reset_render_data( y_vec=self.prices_[:np.shape(self._render.x_vec)[0]]) # buffer for appending lags self.data_buffer = list() self.action_space = spaces.Discrete(len(self.actions)) self.reset() # reset to load observation.shape self.observation_space = spaces.Box(low=-10, high=10, shape=self.observation.shape, dtype=np.float32) print( '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}' .format(self.sym, self._seed, self.observation_space.shape)) def __str__(self): return '{} | {}-{}'.format(PriceJump.id, self.sym, self._seed) def step(self, action: int): for current_step in range(self.action_repeats): if self.done: self.reset() return self.observation, self.reward, self.done # reset the reward if there ARE action repeats if current_step == 0: self.reward = 0. step_action = action else: step_action = 0 # Get current step's midpoint self.midpoint = self.prices_[self.local_step_number] # Pass current time step midpoint to broker to calculate PnL, # or if any open orders are to be filled buy_volume = self._get_book_data(PriceJump.buy_trade_index) sell_volume = self._get_book_data(PriceJump.sell_trade_index) self.tns.step(buys=buy_volume, sells=sell_volume) self.rsi.step(price=self.midpoint) self.broker.step(midpoint=self.midpoint) self.reward += self._send_to_broker_and_get_reward( action=step_action) step_observation = self._get_step_observation(action=action) self.data_buffer.append(step_observation) if len(self.data_buffer) > self.window_size: del self.data_buffer[0] self.local_step_number += self.step_size self.observation = self._get_observation() if self.local_step_number > self.max_steps: self.done = True order = Order(ccy=self.sym, side=None, price=self.midpoint, step=self.local_step_number) self.reward = self.broker.flatten_inventory(order=order) return self.observation, self.reward, self.done, {} def reset(self): if self.training: self.local_step_number = self._random_state.randint( low=1, high=self.data.shape[0] // 4) else: self.local_step_number = 0 msg = ' {}-{} reset. Episode pnl: {:.4f} with {} trades. First step: {}'.format( self.sym, self._seed, self.broker.get_total_pnl(midpoint=self.midpoint), self.broker.get_total_trade_count(), self.local_step_number) logger.info(msg) self.reward = 0.0 self.done = False self.broker.reset() self.data_buffer.clear() self.rsi.reset() self.tns.reset() for step in range(self.window_size + INDICATOR_WINDOW_MAX): self.midpoint = self.prices_[self.local_step_number] step_buy_volume = self._get_book_data(PriceJump.buy_trade_index) step_sell_volume = self._get_book_data(PriceJump.sell_trade_index) self.tns.step(buys=step_buy_volume, sells=step_sell_volume) self.rsi.step(price=self.midpoint) step_observation = self._get_step_observation(action=0) self.data_buffer.append(step_observation) self.local_step_number += self.step_size if len(self.data_buffer) > self.window_size: del self.data_buffer[0] self.observation = self._get_observation() return self.observation def render(self, mode='human'): self._render.render(midpoint=self.midpoint, mode=mode) def close(self): logger.info('{}-{} is being closed.'.format(self.id, self.sym)) self.data = None self.normalized_data = None self.prices_ = None self.broker = None self.sim = None self.data_buffer = None self.tns = None self.rsi = None return def seed(self, seed=1): self._random_state = np.random.RandomState(seed=seed) self._seed = seed logger.info('Setting seed in PriceJump.seed({})'.format(seed)) return [seed] @staticmethod def _process_data(_next_state): """ Reshape observation and clip outliers (values +/- 10) :param _next_state: observation space :return: (np.array) clipped observation space """ return np.clip(_next_state.reshape((1, -1)), -10., 10.) def _send_to_broker_and_get_reward(self, action: int): """ Create or adjust orders per a specified action and adjust for penalties. :param action: (int) current step's action :return: (float) reward """ reward = 0.0 discouragement = 0.000000000001 if action == 0: # do nothing reward += discouragement elif action == 1: # buy price_fee_adjusted = self.midpoint + (PriceJump.fee * self.midpoint) if self.broker.short_inventory_count > 0: order = Order(ccy=self.sym, side='short', price=price_fee_adjusted, step=self.local_step_number) self.broker.remove(order=order) reward += self.broker.get_reward(side=order.side) / \ self.broker.reward_scale # scale realized PnL elif self.broker.long_inventory_count >= 0: order = Order(ccy=self.sym, side='long', price=price_fee_adjusted, step=self.local_step_number) if self.broker.add(order=order) is False: reward -= discouragement else: logger.info( ('gym_trading.get_reward() ' + 'Error for action #{} - ' + 'unable to place an order with broker').format(action)) elif action == 2: # sell price_fee_adjusted = self.midpoint - (PriceJump.fee * self.midpoint) if self.broker.long_inventory_count > 0: order = Order(ccy=self.sym, side='long', price=price_fee_adjusted, step=self.local_step_number) self.broker.remove(order=order) reward += self.broker.get_reward(side=order.side) / \ self.broker.reward_scale # scale realized PnL elif self.broker.short_inventory_count >= 0: order = Order(ccy=self.sym, side='short', price=price_fee_adjusted, step=self.local_step_number) if self.broker.add(order=order) is False: reward -= discouragement else: logger.info( ('gym_trading.get_reward() ' + 'Error for action #{} - ' + 'unable to place an order with broker').format(action)) else: logger.info( ('Unknown action to take in get_reward(): ' + 'action={} | midpoint={}').format(action, self.midpoint)) return reward def _create_position_features(self): """ Create an array with features related to the agent's inventory :return: (np.array) normalized position features """ return np.array( (self.broker.long_inventory.position_count / self.max_position, self.broker.short_inventory.position_count / self.max_position, self.broker.get_total_pnl(midpoint=self.midpoint) / PriceJump.target_pnl, self.broker.long_inventory.get_unrealized_pnl(self.midpoint) / self.broker.reward_scale, self.broker.short_inventory.get_unrealized_pnl(self.midpoint) / self.broker.reward_scale), dtype=np.float32) def _create_action_features(self, action): """ Create a features array for the current time step's action. :param action: (int) action number :return: (np.array) One-hot of current action """ return self.actions[action] def _create_indicator_features(self): """ Create features vector with environment indicators. :return: (np.array) Indicator values for current time step """ return np.array((*self.tns.get_value(), *self.rsi.get_value()), dtype=np.float32) def _get_nbbo(self): """ Get best bid and offer :return: (tuple) best bid and offer """ best_bid = round( self.midpoint - self._get_book_data(PriceJump.best_bid_index), 2) best_ask = round( self.midpoint + self._get_book_data(PriceJump.best_ask_index), 2) return best_bid, best_ask def _get_book_data(self, index=0): """ Return step 'n' of order book snapshot data :param index: (int) step 'n' to look up in order book snapshot history :return: (np.array) order book snapshot vector """ return self.data[self.local_step_number][index] def _get_step_observation(self, action=0): """ Current step observation, NOT including historical data. :param action: (int) current step action :return: (np.array) Current step observation """ step_position_features = self._create_position_features() step_action_features = self._create_action_features(action=action) step_indicator_features = self._create_indicator_features() return np.concatenate( (self._process_data(self.normalized_data[self.local_step_number]), step_indicator_features, step_position_features, step_action_features, np.array([self.reward])), axis=None) def _get_observation(self): """ Current step observation, including historical data. If format_3d is TRUE: Expand the observation space from 2 to 3 dimensions. (note: This is necessary for conv nets in Baselines.) :return: (np.array) Observation state for current time step """ observation = np.array(self.data_buffer, dtype=np.float32) if self.format_3d: observation = np.expand_dims(observation, axis=-1) return observation
def __init__(self, *, fitting_file='ETH-USD_2018-12-31.xz', testing_file='ETH-USD_2019-01-01.xz', step_size=1, max_position=5, window_size=10, seed=1, action_repeats=10, training=True, format_3d=False, z_score=True): # properties required for instantiation self.action_repeats = action_repeats self._seed = seed self._random_state = np.random.RandomState(seed=self._seed) self.training = training self.step_size = step_size self.max_position = max_position self.window_size = window_size self.format_3d = format_3d # e.g., [window, features, *NEW_AXIS*] self.action = 0 # derive gym.env properties self.actions = np.eye(3) self.sym = testing_file[:7] # slice the CCY from the filename # properties that get reset() self.reward = 0.0 self.done = False self.local_step_number = 0 self.midpoint = 0.0 self.observation = None # get Broker class to keep track of PnL and orders self.broker = Broker(max_position=max_position) # get historical data for simulations self.sim = Sim(use_arctic=False) self.data = self._load_environment_data(fitting_file, testing_file) self.prices_ = self.data[ 'coinbase_midpoint'].values # used to calculate PnL self.normalized_data = self.data.copy() self.data = self.data.values self.max_steps = self.data.shape[0] - self.step_size * \ self.action_repeats - 1 # normalize midpoint data self.normalized_data['coinbase_midpoint'] = \ np.log(self.normalized_data['coinbase_midpoint'].values) self.normalized_data['coinbase_midpoint'] = ( self.normalized_data['coinbase_midpoint'] - self.normalized_data['coinbase_midpoint'].shift(1)).fillna(0.) # load indicators into the indicator manager self.tns = IndicatorManager() self.rsi = IndicatorManager() for window in INDICATOR_WINDOW: self.tns.add(('tns_{}'.format(window), TnS(window=window))) self.rsi.add(('rsi_{}'.format(window), RSI(window=window))) if z_score: logger.info("Pre-scaling {}-{} data...".format( self.sym, self._seed)) self.normalized_data = self.normalized_data.apply(self.sim.z_score, axis=1).values logger.info("...{}-{} pre-scaling complete.".format( self.sym, self._seed)) else: self.normalized_data = self.normalized_data.values # rendering class self._render = TradingGraph(sym=self.sym) # graph midpoint prices self._render.reset_render_data( y_vec=self.prices_[:np.shape(self._render.x_vec)[0]]) # buffer for appending lags self.data_buffer = list() self.action_space = spaces.Discrete(len(self.actions)) self.reset() # reset to load observation.shape self.observation_space = spaces.Box(low=-10, high=10, shape=self.observation.shape, dtype=np.float32) print( '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}' .format(self.sym, self._seed, self.observation_space.shape))
class PriceJump(Env): metadata = {'render.modes': ['human']} id = 'long-short-v0' # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True) features = Sim.get_feature_labels(include_system_time=False, include_bitfinex=False) best_bid_index = features.index('coinbase-bid-distance-0') best_ask_index = features.index('coinbase-ask-distance-0') notional_bid_index = features.index('coinbase-bid-notional-0') notional_ask_index = features.index('coinbase-ask-notional-0') buy_trade_index = features.index('coinbase-buys') sell_trade_index = features.index('coinbase-sells') target_pnl = BROKER_FEE * 10 * 5 # e.g., 5 for max_positions fee = BROKER_FEE def __init__(self, *, fitting_file='ETH-USD_2018-12-31.xz', testing_file='ETH-USD_2019-01-01.xz', step_size=1, max_position=5, window_size=10, seed=1, action_repeats=10, training=True, format_3d=False, z_score=True): # properties required for instantiation self.action_repeats = action_repeats self._seed = seed self._random_state = np.random.RandomState(seed=self._seed) self.training = training self.step_size = step_size self.max_position = max_position self.window_size = window_size self.format_3d = format_3d # e.g., [window, features, *NEW_AXIS*] self.action = 0 # derive gym.env properties self.actions = np.eye(3) self.sym = testing_file[:7] # slice the CCY from the filename # properties that get reset() self.reward = 0.0 self.done = False self.local_step_number = 0 self.midpoint = 0.0 self.observation = None # get Broker class to keep track of PnL and orders self.broker = Broker(max_position=max_position) # get historical data for simulations self.sim = Sim(use_arctic=False) self.data = self._load_environment_data(fitting_file, testing_file) self.prices_ = self.data[ 'coinbase_midpoint'].values # used to calculate PnL self.normalized_data = self.data.copy() self.data = self.data.values self.max_steps = self.data.shape[0] - self.step_size * \ self.action_repeats - 1 # normalize midpoint data self.normalized_data['coinbase_midpoint'] = \ np.log(self.normalized_data['coinbase_midpoint'].values) self.normalized_data['coinbase_midpoint'] = ( self.normalized_data['coinbase_midpoint'] - self.normalized_data['coinbase_midpoint'].shift(1)).fillna(0.) # load indicators into the indicator manager self.tns = IndicatorManager() self.rsi = IndicatorManager() for window in INDICATOR_WINDOW: self.tns.add(('tns_{}'.format(window), TnS(window=window))) self.rsi.add(('rsi_{}'.format(window), RSI(window=window))) if z_score: logger.info("Pre-scaling {}-{} data...".format( self.sym, self._seed)) self.normalized_data = self.normalized_data.apply(self.sim.z_score, axis=1).values logger.info("...{}-{} pre-scaling complete.".format( self.sym, self._seed)) else: self.normalized_data = self.normalized_data.values # rendering class self._render = TradingGraph(sym=self.sym) # graph midpoint prices self._render.reset_render_data( y_vec=self.prices_[:np.shape(self._render.x_vec)[0]]) # buffer for appending lags self.data_buffer = list() self.action_space = spaces.Discrete(len(self.actions)) self.reset() # reset to load observation.shape self.observation_space = spaces.Box(low=-10, high=10, shape=self.observation.shape, dtype=np.float32) print( '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}' .format(self.sym, self._seed, self.observation_space.shape)) def __str__(self): return '{} | {}-{}'.format(PriceJump.id, self.sym, self._seed) def step(self, action: int): for current_step in range(self.action_repeats): if self.done: self.reset() return self.observation, self.reward, self.done # reset the reward if there ARE action repeats if current_step == 0: self.reward = 0. step_action = action else: step_action = 0 # Get current step's midpoint self.midpoint = self.prices_[self.local_step_number] # Pass current time step midpoint to broker to calculate PnL, # or if any open orders are to be filled buy_volume = self._get_book_data(PriceJump.buy_trade_index) sell_volume = self._get_book_data(PriceJump.sell_trade_index) self.tns.step(buys=buy_volume, sells=sell_volume) self.rsi.step(price=self.midpoint) self.broker.step(midpoint=self.midpoint) self.reward += self._send_to_broker_and_get_reward( action=step_action) step_observation = self._get_step_observation(action=action) self.data_buffer.append(step_observation) if len(self.data_buffer) > self.window_size: del self.data_buffer[0] self.local_step_number += self.step_size self.observation = self._get_observation() if self.local_step_number > self.max_steps: self.done = True order = Order(ccy=self.sym, side=None, price=self.midpoint, step=self.local_step_number) self.reward = self.broker.flatten_inventory(order=order) return self.observation, self.reward, self.done, {} def reset(self): if self.training: self.local_step_number = self._random_state.randint( low=1, high=self.data.shape[0] // 4) else: self.local_step_number = 0 msg = ' {}-{} reset. Episode pnl: {:.4f} with {} trades | First step: {}'.format( self.sym, self._seed, self.broker.get_total_pnl(midpoint=self.midpoint), self.broker.get_total_trade_count(), self.local_step_number) logger.info(msg) self.reward = 0.0 self.done = False self.broker.reset() self.data_buffer.clear() self.rsi.reset() self.tns.reset() for step in range(self.window_size + INDICATOR_WINDOW_MAX): self.midpoint = self.prices_[self.local_step_number] step_buy_volume = self._get_book_data(PriceJump.buy_trade_index) step_sell_volume = self._get_book_data(PriceJump.sell_trade_index) self.tns.step(buys=step_buy_volume, sells=step_sell_volume) self.rsi.step(price=self.midpoint) step_observation = self._get_step_observation(action=0) self.data_buffer.append(step_observation) self.local_step_number += self.step_size if len(self.data_buffer) > self.window_size: del self.data_buffer[0] self.observation = self._get_observation() return self.observation def render(self, mode='human'): self._render.render(midpoint=self.midpoint, mode=mode) def close(self): logger.info('{}-{} is being closed.'.format(self.id, self.sym)) self.data = None self.normalized_data = None self.prices_ = None self.broker = None self.sim = None self.data_buffer = None self.tns = None self.rsi = None return def seed(self, seed=1): self._random_state = np.random.RandomState(seed=seed) self._seed = seed logger.info('Setting seed in PriceJump.seed({})'.format(seed)) return [seed] @staticmethod def _process_data(_next_state): return np.clip(_next_state.reshape((1, -1)), -10., 10.) # def _process_data(self, _next_state): # # return self.sim.scale_state(_next_state).values.reshape((1, -1)) # return np.reshape(_next_state, (1, -1)) def _send_to_broker_and_get_reward(self, action): reward = 0.0 discouragement = 0.000000000001 if action == 0: # do nothing reward += discouragement elif action == 1: # buy price_fee_adjusted = self.midpoint + (PriceJump.fee * self.midpoint) if self.broker.short_inventory_count > 0: order = Order(ccy=self.sym, side='short', price=price_fee_adjusted, step=self.local_step_number) self.broker.remove(order=order) reward += self.broker.get_reward(side=order.side) elif self.broker.long_inventory_count >= 0: order = Order(ccy=self.sym, side='long', price=price_fee_adjusted, step=self.local_step_number) if self.broker.add(order=order) is False: reward -= discouragement else: logger.info( ('gym_trading.get_reward() ' + 'Error for action #{} - ' + 'unable to place an order with broker').format(action)) elif action == 2: # sell price_fee_adjusted = self.midpoint - (PriceJump.fee * self.midpoint) if self.broker.long_inventory_count > 0: order = Order(ccy=self.sym, side='long', price=price_fee_adjusted, step=self.local_step_number) self.broker.remove(order=order) reward += self.broker.get_reward(side=order.side) elif self.broker.short_inventory_count >= 0: order = Order(ccy=self.sym, side='short', price=price_fee_adjusted, step=self.local_step_number) if self.broker.add(order=order) is False: reward -= discouragement else: logger.info( 'gym_trading.get_reward() ' + 'Error for action #{} - ' + 'unable to place an order with broker'.format(action)) else: logger.info( ('Unknown action to take in get_reward(): ' + 'action={} | midpoint={}').format(action, self.midpoint)) return reward def _create_position_features(self): return np.array( (self.broker.long_inventory.position_count / self.max_position, self.broker.short_inventory.position_count / self.max_position, self.broker.get_total_pnl(midpoint=self.midpoint) / PriceJump.target_pnl, self.broker.long_inventory.get_unrealized_pnl(self.midpoint) / self.broker.reward_scale, self.broker.short_inventory.get_unrealized_pnl(self.midpoint) / self.broker.reward_scale), dtype=np.float32) def _create_action_features(self, action): return self.actions[action] def _create_indicator_features(self): return np.array((*self.tns.get_value(), *self.rsi.get_value()), dtype=np.float32) def _get_nbbo(self): best_bid = round( self.midpoint - self._get_book_data(PriceJump.best_bid_index), 2) best_ask = round( self.midpoint + self._get_book_data(PriceJump.best_ask_index), 2) return best_bid, best_ask def _get_book_data(self, index=0): return self.data[self.local_step_number][index] def _get_step_observation(self, action=0): step_position_features = self._create_position_features() step_action_features = self._create_action_features(action=action) step_indicator_features = self._create_indicator_features() return np.concatenate( (self._process_data(self.normalized_data[self.local_step_number]), step_indicator_features, step_position_features, step_action_features, np.array([self.reward])), axis=None) def _get_observation(self): observation = np.array(self.data_buffer, dtype=np.float32) # Expand the observation space from 2 to 3 dimensions. # This is necessary for conv nets in Baselines. if self.format_3d: observation = np.expand_dims(observation, axis=-1) return observation def _load_environment_data(self, fitting_file, testing_file): fitting_data_filepath = '{}/data_exports/{}'.format( self.sim.cwd, fitting_file) data_used_in_environment = '{}/data_exports/{}'.format( self.sim.cwd, testing_file) fitting_data = self.sim.import_csv(filename=fitting_data_filepath) fitting_data['coinbase_midpoint'] = np.log( fitting_data['coinbase_midpoint'].values) fitting_data['coinbase_midpoint'] = ( fitting_data['coinbase_midpoint'] - fitting_data['coinbase_midpoint'].shift(1)).fillna(method='bfill') self.sim.fit_scaler(fitting_data) del fitting_data return self.sim.import_csv(filename=data_used_in_environment)
class PriceJump(BaseEnvironment): id = 'long-short-v0' def __init__(self, transaction_fee=MARKET_ORDER_FEE, **kwargs): """ Environment designed to trade price jumps using market orders :param kwargs: refer to BaseEnvironment.py """ super(PriceJump, self).__init__(**kwargs) # environment attributes to override in sub-class self.actions = np.eye(3, dtype=np.float32) # get Broker class to keep track of PnL and orders self.broker = Broker(max_position=self.max_position, transaction_fee=transaction_fee) self.action_space = spaces.Discrete(len(self.actions)) self.reset() # reset to load observation.shape self.observation_space = spaces.Box(low=-10, high=10, shape=self.observation.shape, dtype=np.float32) print( '{} PriceJump #{} instantiated.\nobservation_space : {}'.format( self.sym, self._seed, self.observation_space.shape), 'reward_type = {}'.format(self.reward_type)) def __str__(self): return '{} | {}-{}'.format(PriceJump.id, self.sym, self._seed) def map_action_to_broker(self, action: int): """ Create or adjust orders per a specified action and adjust for penalties. :param action: (int) current step's action :return: (float) reward """ reward = pnl = 0.0 discouragement = 0.000000000001 if action == 0: # do nothing reward += discouragement elif action == 1: # buy if self.broker.short_inventory_count > 0: order = MarketOrder(ccy=self.sym, side='short', price=self.midpoint, step=self.local_step_number) pnl += self.broker.remove(order=order) elif self.broker.long_inventory_count >= 0: order = MarketOrder( ccy=self.sym, side='long', price=self.midpoint, # price_fee_adjusted, step=self.local_step_number) if self.broker.add(order=order) is False: reward -= discouragement else: print(( 'gym_trading.get_reward() Error for action #{} - unable to place ' 'an order with broker').format(action)) elif action == 2: # sell if self.broker.long_inventory_count > 0: order = MarketOrder(ccy=self.sym, side='long', price=self.midpoint, step=self.local_step_number) pnl += self.broker.remove(order=order) elif self.broker.short_inventory_count >= 0: order = MarketOrder(ccy=self.sym, side='short', price=self.midpoint, step=self.local_step_number) if self.broker.add(order=order) is False: reward -= discouragement else: print(( 'gym_trading.get_reward() Error for action #{} - unable to place ' 'an order with broker').format(action)) else: print(( 'Unknown action to take in get_reward(): action={} | midpoint={}' ).format(action, self.midpoint)) return reward, pnl def _create_position_features(self): """ Create an array with features related to the agent's inventory :return: (np.array) normalized position features """ return np.array( (self.broker.long_inventory.position_count / self.max_position, self.broker.short_inventory.position_count / self.max_position, self.broker.realized_pnl * self.broker.reward_scale, self.broker.get_unrealized_pnl(self.best_bid, self.best_ask) * self.broker.reward_scale), dtype=np.float32)
def __init__(self, *, fitting_file='LTC-USD_2019-04-07.csv.xz', testing_file='LTC-USD_2019-04-08.csv.xz', step_size=1, max_position=5, window_size=10, seed=1, action_repeats=10, training=True, format_3d=False, z_score=True): # properties required for instantiation self.action_repeats = action_repeats self._seed = seed self._random_state = np.random.RandomState(seed=self._seed) self.training = training self.step_size = step_size self.max_position = max_position self.window_size = window_size self.format_3d = format_3d # e.g., [window, features, *NEW_AXIS*] self.action = 0 # derive gym.env properties self.actions = np.eye(3, dtype=np.float32) self.sym = testing_file[:7] # slice the CCY from the filename # properties that get reset() self.reward = 0.0 self.done = False self.local_step_number = 0 self.midpoint = 0.0 self.observation = None # get Broker class to keep track of PnL and orders self.broker = Broker(max_position=max_position) # get historical data for simulations self.sim = Sim(use_arctic=False, z_score=z_score) self.prices_, self.data, self.normalized_data = self.sim.load_environment_data( fitting_file, testing_file) self.max_steps = self.data.shape[0] - self.step_size * \ self.action_repeats - 1 # load indicators into the indicator manager self.tns = IndicatorManager() self.rsi = IndicatorManager() for window in INDICATOR_WINDOW: self.tns.add(('tns_{}'.format(window), TnS(window=window))) self.rsi.add(('rsi_{}'.format(window), RSI(window=window))) # rendering class self._render = TradingGraph(sym=self.sym) # graph midpoint prices self._render.reset_render_data( y_vec=self.prices_[:np.shape(self._render.x_vec)[0]]) # buffer for appending lags self.data_buffer = list() self.action_space = spaces.Discrete(len(self.actions)) self.reset() # reset to load observation.shape self.observation_space = spaces.Box(low=-10, high=10, shape=self.observation.shape, dtype=np.float32) print( '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}' .format(self.sym, self._seed, self.observation_space.shape))
class BaseEnvironment(Env, ABC): metadata = {'render.modes': ['human']} def __init__(self, symbol: str, fitting_file: str, testing_file: str, max_position: int = 10, window_size: int = 100, seed: int = 1, action_repeats: int = 5, training: bool = True, format_3d: bool = False, reward_type: str = 'default', transaction_fee: bool = True, ema_alpha: list or float or None = EMA_ALPHA): """ Base class for creating environments extending OpenAI's GYM framework. :param symbol: currency pair to trade / experiment :param fitting_file: prior trading day (e.g., T-1) :param testing_file: current trading day (e.g., T) :param max_position: maximum number of positions able to hold in inventory :param window_size: number of lags to include in observation space :param seed: random seed number :param action_repeats: number of steps to take in environment after a given action :param training: if TRUE, then randomize starting point in environment :param format_3d: if TRUE, reshape observation space from matrix to tensor :param reward_type: method for calculating the environment's reward: 1) 'default' --> inventory count * change in midpoint price returns 2) 'default_with_fills' --> inventory count * change in midpoint price returns + closed trade PnL 3) 'realized_pnl' --> change in realized pnl between time steps 4) 'differential_sharpe_ratio' --> http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.1.7210&rep=rep1&type=pdf 5) 'asymmetrical' --> extended version of *default* and enhanced with a reward for being filled above or below midpoint, and returns only negative rewards for Unrealized PnL to discourage long-term speculation. 6) 'trade_completion' --> reward is generated per trade's round trip :param ema_alpha: decay factor for EMA, usually between 0.9 and 0.9999; if NONE, raw values are returned in place of smoothed values """ assert reward_type in VALID_REWARD_TYPES, \ 'Error: {} is not a valid reward type. Value must be in:\n{}'.format( reward_type, VALID_REWARD_TYPES) self.viz = Visualize( columns=['midpoint', 'buys', 'sells', 'inventory', 'realized_pnl'], store_historical_observations=True) # get Broker class to keep track of PnL and orders self.broker = Broker(max_position=max_position, transaction_fee=transaction_fee) # properties required for instantiation self.symbol = symbol self.action_repeats = action_repeats self._seed = seed self._random_state = np.random.RandomState(seed=self._seed) self.training = training self.max_position = max_position self.window_size = window_size self.reward_type = reward_type self.format_3d = format_3d # e.g., [window, features, *NEW_AXIS*] self.testing_file = testing_file # properties that get reset() self.reward = np.array([0.0], dtype=np.float32) self.step_reward = np.array([0.0], dtype=np.float32) self.done = False self.local_step_number = 0 self.midpoint = 0.0 self.observation = None self.action = 0 self.last_pnl = 0. self.last_midpoint = None self.midpoint_change = None self.A_t, self.B_t = 0., 0. # variables for Differential Sharpe Ratio self.episode_stats = ExperimentStatistics() self.best_bid = self.best_ask = None # properties to override in sub-classes self.actions = None self.action_space = None self.observation_space = None # get historical data for simulations self.data_pipeline = DataPipeline(alpha=ema_alpha) # three different data sets, for different purposes: # 1) midpoint_prices - midpoint prices that have not been transformed # 2) raw_data - raw limit order book data, not including imbalances # 3) normalized_data - z-scored limit order book and order flow imbalance # data, also midpoint price feature is replace by midpoint log price change self._midpoint_prices, self._raw_data, self._normalized_data = \ self.data_pipeline.load_environment_data( fitting_file=fitting_file, testing_file=testing_file, include_imbalances=True, as_pandas=True, ) # derive best bid and offer self._best_bids = self._raw_data['midpoint'] - ( self._raw_data['spread'] / 2) self._best_asks = self._raw_data['midpoint'] + ( self._raw_data['spread'] / 2) self.max_steps = self._raw_data.shape[0] - self.action_repeats - 1 # load indicators into the indicator manager self.tns = IndicatorManager() self.rsi = IndicatorManager() for window in INDICATOR_WINDOW: self.tns.add( ('tns_{}'.format(window), TnS(window=window, alpha=ema_alpha))) self.rsi.add( ('rsi_{}'.format(window), RSI(window=window, alpha=ema_alpha))) # buffer for appending lags self.data_buffer = deque(maxlen=self.window_size) # Index of specific data points used to generate the observation space features = self._raw_data.columns.tolist() self.best_bid_index = features.index('bids_distance_0') self.best_ask_index = features.index('asks_distance_0') self.notional_bid_index = features.index('bids_notional_0') self.notional_ask_index = features.index('asks_notional_0') self.buy_trade_index = features.index('buys') self.sell_trade_index = features.index('sells') # typecast all data sets to numpy self._raw_data = self._raw_data.to_numpy(dtype=np.float32) self._normalized_data = self._normalized_data.to_numpy( dtype=np.float32) self._midpoint_prices = self._midpoint_prices.to_numpy( dtype=np.float64) self._best_bids = self._best_bids.to_numpy(dtype=np.float32) self._best_asks = self._best_asks.to_numpy(dtype=np.float32) # rendering class self._render = TradingGraph(sym=self.symbol) # graph midpoint prices self._render.reset_render_data( y_vec=self._midpoint_prices[:np.shape(self._render.x_vec)[0]]) @abstractmethod def map_action_to_broker(self, action: int) -> (float, float): """ Translate agent's action into an order and submit order to broker. :param action: (int) agent's action for current step :return: (tuple) reward, pnl """ return 0., 0. @abstractmethod def _create_position_features(self) -> np.ndarray: """ Create agent space feature set reflecting the positions held in inventory. :return: (np.array) position features """ return np.array([np.nan], dtype=np.float32) def _get_step_reward(self, step_pnl: float, step_penalty: float, long_filled: bool, short_filled: bool) -> float: """ Calculate current step reward using a reward function. :param step_pnl: PnL realized from an open position that's been closed in the current time step :param step_penalty: Penalty signal for agent to discourage erroneous actions :param long_filled: TRUE if open long limit order was filled in current time step :param short_filled: TRUE if open short limit order was filled in current time step :return: reward for current time step """ reward = 0. if self.reward_type == 'default': reward += reward_types.default( inventory_count=self.broker.net_inventory_count, midpoint_change=self.midpoint_change) * 100. + step_penalty elif self.reward_type == 'default_with_fills': reward += reward_types.default_with_fills( inventory_count=self.broker.net_inventory_count, midpoint_change=self.midpoint_change, step_pnl=step_pnl) * 100. + step_penalty elif self.reward_type == 'asymmetrical': reward += reward_types.asymmetrical( inventory_count=self.broker.net_inventory_count, midpoint_change=self.midpoint_change, half_spread_pct=(self.midpoint / self.best_bid) - 1., long_filled=long_filled, short_filled=short_filled, step_pnl=step_pnl, dampening=0.6) * 100. + step_penalty elif self.reward_type == 'realized_pnl': current_pnl = self.broker.realized_pnl reward += reward_types.realized_pnl( current_pnl=current_pnl, last_pnl=self.last_pnl) * 100. + step_penalty self.last_pnl = current_pnl elif self.reward_type == 'differential_sharpe_ratio': tmp_reward, self.A_t, self.B_t = reward_types.differential_sharpe_ratio( R_t=self.midpoint_change * self.broker.net_inventory_count, A_tm1=self.A_t, B_tm1=self.B_t) reward += tmp_reward + step_penalty elif self.reward_type == 'trade_completion': reward += reward_types.trade_completion( step_pnl=step_pnl, market_order_fee=MARKET_ORDER_FEE, profit_ratio=2.) + step_penalty else: # Default implementation reward += reward_types.default( inventory_count=self.broker.net_inventory_count, midpoint_change=self.midpoint_change) * 100. + step_penalty return reward def step(self, action: int = 0) -> (np.ndarray, np.ndarray, bool, dict): """ Step through environment with action. :param action: (int) action to take in environment :return: (tuple) observation, reward, is_done, and empty `dict` """ for current_step in range(self.action_repeats): if self.done: self.reset() return self.observation, self.reward, self.done # reset the reward if there ARE action repeats if current_step == 0: self.reward = 0. step_action = action else: step_action = 0 # Get current step's midpoint and change in midpoint price percentage self.midpoint = self._midpoint_prices[self.local_step_number] self.midpoint_change = (self.midpoint / self.last_midpoint) - 1. # Pass current time step bid/ask prices to broker to calculate PnL, # or if any open orders are to be filled self.best_bid, self.best_ask = self._get_nbbo() # verify the data integrity assert self.best_bid <= self.best_ask, ( "Error: best bid is more expensive than the best Ask:" "\nBid = {}\nAsk = {}").format(self.best_bid, self.best_ask) # get buy and sell trade volume to use by indicators and 'broker' to # execute any open orders the agent has buy_volume = self._get_book_data(index=self.buy_trade_index) sell_volume = self._get_book_data(index=self.sell_trade_index) # Update indicators self.tns.step(buys=buy_volume, sells=sell_volume) self.rsi.step(price=self.midpoint) # Get PnL from any filled LIMIT orders, which is calculated by netting out # whatever open position the agent already has in FIFO order limit_pnl, long_filled, short_filled = self.broker.step_limit_order_pnl( bid_price=self.best_bid, ask_price=self.best_ask, buy_volume=buy_volume, sell_volume=sell_volume, step=self.local_step_number) # Get PnL from any filled MARKET orders AND action penalties for invalid # actions made by the agent for future discouragement action_penalty_reward, market_pnl = self.map_action_to_broker( action=step_action) step_pnl = limit_pnl + market_pnl self.step_reward = self._get_step_reward( step_pnl=step_pnl, step_penalty=action_penalty_reward, long_filled=long_filled, short_filled=short_filled) # Add current step's observation to the data buffer step_observation = self._get_step_observation( step_action=step_action) self.data_buffer.append(step_observation) # Store for visualization AFTER the episode self.viz.add_observation(obs=step_observation) self.viz.add( self.midpoint, # arguments map to the column names in _init_ int(long_filled), int(short_filled), self.broker.net_inventory_count, (self.broker.realized_pnl * 100) / self.max_position) self.reward += self.step_reward self.local_step_number += 1 self.last_midpoint = self.midpoint self.observation = self._get_observation() if self.local_step_number > self.max_steps: self.done = True had_long_positions = 1 if self.broker.long_inventory_count > 0 else 0 had_short_positions = 1 if self.broker.short_inventory_count > 0 else 0 flatten_pnl = self.broker.flatten_inventory( bid_price=self.best_bid, ask_price=self.best_ask) self.reward += self._get_step_reward(step_pnl=flatten_pnl, step_penalty=0., long_filled=False, short_filled=False) # store for visualization after the episode self.viz.add( self.midpoint, # arguments map to the column names in _init_ had_long_positions, had_short_positions, self.broker.net_inventory_count, (self.broker.realized_pnl * 100) / self.max_position) # save rewards to derive cumulative reward self.episode_stats.reward += self.reward return self.observation, self.reward, self.done, {} def reset(self) -> np.ndarray: """ Reset the environment. :return: (np.array) Observation at first step """ if self.training: self.local_step_number = self._random_state.randint( low=0, high=self.max_steps // 5) else: self.local_step_number = 0 # print out episode statistics if there was any activity by the agent if self.broker.total_trade_count > 0 or self.broker.realized_pnl != 0.: self.episode_stats.number_of_episodes += 1 print(('-' * 25), '{}-{} {} EPISODE RESET'.format(self.symbol, self._seed, self.reward_type.upper()), ('-' * 25)) print('Episode Reward: {:.4f}'.format(self.episode_stats.reward)) print('Episode PnL: {:.2f}%'.format( (self.broker.realized_pnl / self.max_position) * 100.)) print('Trade Count: {}'.format(self.broker.total_trade_count)) print('Average PnL per Trade: {:.4f}%'.format( self.broker.average_trade_pnl * 100.)) print('Total # of episodes: {}'.format( self.episode_stats.number_of_episodes)) print('\n'.join([ '{}\t=\t{}'.format(k, v) for k, v in self.broker.get_statistics().items() ])) print('First step:\t{}'.format(self.local_step_number)) print(('=' * 75)) else: print('Resetting environment #{} on episode #{}.'.format( self._seed, self.episode_stats.number_of_episodes)) self.A_t, self.B_t = 0., 0. self.reward = 0.0 self.done = False self.broker.reset() self.data_buffer.clear() self.episode_stats.reset() self.rsi.reset() self.tns.reset() self.viz.reset() for step in range(self.window_size + INDICATOR_WINDOW_MAX + 1): self.midpoint = self._midpoint_prices[self.local_step_number] if self.last_midpoint is None: self.last_midpoint = self.midpoint self.midpoint_change = (self.midpoint / self.last_midpoint) - 1. self.best_bid, self.best_ask = self._get_nbbo() step_buy_volume = self._get_book_data(index=self.buy_trade_index) step_sell_volume = self._get_book_data(index=self.sell_trade_index) self.tns.step(buys=step_buy_volume, sells=step_sell_volume) self.rsi.step(price=self.midpoint) # Add current step's observation to the data buffer step_observation = self._get_step_observation(step_action=0) self.data_buffer.append(step_observation) self.local_step_number += 1 self.last_midpoint = self.midpoint self.observation = self._get_observation() return self.observation def render(self, mode: str = 'human') -> None: """ Render midpoint prices. :param mode: (str) flag for type of rendering. Only 'human' supported. :return: (void) """ self._render.render(midpoint=self.midpoint, mode=mode) def close(self) -> None: """ Free clear memory when closing environment. :return: (void) """ self.broker.reset() self.data_buffer.clear() self.episode_stats = None self._raw_data = None self._normalized_data = None self._midpoint_prices = None self.tns = None self.rsi = None def seed(self, seed: int = 1) -> list: """ Set random seed in environment. :param seed: (int) random seed number :return: (list) seed number in a list """ self._random_state = np.random.RandomState(seed=seed) self._seed = seed return [seed] def _get_nbbo(self) -> (float, float): """ Get best bid and offer. :return: (tuple) best bid and offer """ best_bid = self._best_bids[self.local_step_number] best_ask = self._best_asks[self.local_step_number] return best_bid, best_ask def _get_book_data(self, index: int = 0) -> np.ndarray or float: """ Return step 'n' of order book snapshot data. :param index: (int) step 'n' to look up in order book snapshot history :return: (np.array) order book snapshot vector """ return self._raw_data[self.local_step_number][index] @staticmethod def _process_data(observation: np.ndarray) -> np.ndarray: """ Reshape observation for function approximator. :param observation: observation space :return: (np.array) clipped observation space """ return np.clip(observation, -10., 10.) def _create_action_features(self, action: int) -> np.ndarray: """ Create a features array for the current time step's action. :param action: (int) action number :return: (np.array) One-hot of current action """ return self.actions[action] def _create_indicator_features(self) -> np.ndarray: """ Create features vector with environment indicators. :return: (np.array) Indicator values for current time step """ return np.array((*self.tns.get_value(), *self.rsi.get_value()), dtype=np.float32).reshape(1, -1) def _get_step_observation(self, step_action: int = 0) -> np.ndarray: """ Current step observation, NOT including historical data. :param step_action: (int) current step action :return: (np.array) Current step observation """ step_position_features = self._create_position_features() step_action_features = self._create_action_features(action=step_action) step_indicator_features = self._create_indicator_features() step_environment_observation = self._normalized_data[ self.local_step_number] observation = np.concatenate( (step_environment_observation, step_indicator_features, step_position_features, step_action_features, self.step_reward), axis=None) return self._process_data(observation) def _get_observation(self) -> np.ndarray: """ Current step observation, including historical data. If format_3d is TRUE: Expand the observation space from 2 to 3 dimensions. (note: This is necessary for conv nets in Baselines.) :return: (np.array) Observation state for current time step """ # Note: reversing the data to chronological order is actually faster when # making an array in Python / Numpy, which is odd. #timeit observation = np.asarray(self.data_buffer, dtype=np.float32) if self.format_3d: observation = np.expand_dims(observation, axis=-1) return observation def get_trade_history(self) -> pd.DataFrame: """ Get DataFrame with trades from most recent episode. :return: midpoint prices, and buy & sell trades """ return self.viz.to_df() def plot_trade_history(self, save_filename: str or None = None) -> None: """ Plot history from back-test with trade executions, total inventory, and PnL. :param save_filename: filename for saving the image """ self.viz.plot(save_filename=save_filename) def plot_observation_history(self) -> None: """ Plot observation space as an image. """ return self.viz.plot_obs()
def __init__(self, symbol: str, fitting_file: str, testing_file: str, max_position: int = 10, window_size: int = 100, seed: int = 1, action_repeats: int = 5, training: bool = True, format_3d: bool = False, reward_type: str = 'default', transaction_fee: bool = True, ema_alpha: list or float or None = EMA_ALPHA): """ Base class for creating environments extending OpenAI's GYM framework. :param symbol: currency pair to trade / experiment :param fitting_file: prior trading day (e.g., T-1) :param testing_file: current trading day (e.g., T) :param max_position: maximum number of positions able to hold in inventory :param window_size: number of lags to include in observation space :param seed: random seed number :param action_repeats: number of steps to take in environment after a given action :param training: if TRUE, then randomize starting point in environment :param format_3d: if TRUE, reshape observation space from matrix to tensor :param reward_type: method for calculating the environment's reward: 1) 'default' --> inventory count * change in midpoint price returns 2) 'default_with_fills' --> inventory count * change in midpoint price returns + closed trade PnL 3) 'realized_pnl' --> change in realized pnl between time steps 4) 'differential_sharpe_ratio' --> http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.1.7210&rep=rep1&type=pdf 5) 'asymmetrical' --> extended version of *default* and enhanced with a reward for being filled above or below midpoint, and returns only negative rewards for Unrealized PnL to discourage long-term speculation. 6) 'trade_completion' --> reward is generated per trade's round trip :param ema_alpha: decay factor for EMA, usually between 0.9 and 0.9999; if NONE, raw values are returned in place of smoothed values """ assert reward_type in VALID_REWARD_TYPES, \ 'Error: {} is not a valid reward type. Value must be in:\n{}'.format( reward_type, VALID_REWARD_TYPES) self.viz = Visualize( columns=['midpoint', 'buys', 'sells', 'inventory', 'realized_pnl'], store_historical_observations=True) # get Broker class to keep track of PnL and orders self.broker = Broker(max_position=max_position, transaction_fee=transaction_fee) # properties required for instantiation self.symbol = symbol self.action_repeats = action_repeats self._seed = seed self._random_state = np.random.RandomState(seed=self._seed) self.training = training self.max_position = max_position self.window_size = window_size self.reward_type = reward_type self.format_3d = format_3d # e.g., [window, features, *NEW_AXIS*] self.testing_file = testing_file # properties that get reset() self.reward = np.array([0.0], dtype=np.float32) self.step_reward = np.array([0.0], dtype=np.float32) self.done = False self.local_step_number = 0 self.midpoint = 0.0 self.observation = None self.action = 0 self.last_pnl = 0. self.last_midpoint = None self.midpoint_change = None self.A_t, self.B_t = 0., 0. # variables for Differential Sharpe Ratio self.episode_stats = ExperimentStatistics() self.best_bid = self.best_ask = None # properties to override in sub-classes self.actions = None self.action_space = None self.observation_space = None # get historical data for simulations self.data_pipeline = DataPipeline(alpha=ema_alpha) # three different data sets, for different purposes: # 1) midpoint_prices - midpoint prices that have not been transformed # 2) raw_data - raw limit order book data, not including imbalances # 3) normalized_data - z-scored limit order book and order flow imbalance # data, also midpoint price feature is replace by midpoint log price change self._midpoint_prices, self._raw_data, self._normalized_data = \ self.data_pipeline.load_environment_data( fitting_file=fitting_file, testing_file=testing_file, include_imbalances=True, as_pandas=True, ) # derive best bid and offer self._best_bids = self._raw_data['midpoint'] - ( self._raw_data['spread'] / 2) self._best_asks = self._raw_data['midpoint'] + ( self._raw_data['spread'] / 2) self.max_steps = self._raw_data.shape[0] - self.action_repeats - 1 # load indicators into the indicator manager self.tns = IndicatorManager() self.rsi = IndicatorManager() for window in INDICATOR_WINDOW: self.tns.add( ('tns_{}'.format(window), TnS(window=window, alpha=ema_alpha))) self.rsi.add( ('rsi_{}'.format(window), RSI(window=window, alpha=ema_alpha))) # buffer for appending lags self.data_buffer = deque(maxlen=self.window_size) # Index of specific data points used to generate the observation space features = self._raw_data.columns.tolist() self.best_bid_index = features.index('bids_distance_0') self.best_ask_index = features.index('asks_distance_0') self.notional_bid_index = features.index('bids_notional_0') self.notional_ask_index = features.index('asks_notional_0') self.buy_trade_index = features.index('buys') self.sell_trade_index = features.index('sells') # typecast all data sets to numpy self._raw_data = self._raw_data.to_numpy(dtype=np.float32) self._normalized_data = self._normalized_data.to_numpy( dtype=np.float32) self._midpoint_prices = self._midpoint_prices.to_numpy( dtype=np.float64) self._best_bids = self._best_bids.to_numpy(dtype=np.float32) self._best_asks = self._best_asks.to_numpy(dtype=np.float32) # rendering class self._render = TradingGraph(sym=self.symbol) # graph midpoint prices self._render.reset_render_data( y_vec=self._midpoint_prices[:np.shape(self._render.x_vec)[0]])
def test_lob_queuing(self): test_position = Broker() # perform a partial fill on the first order step = 0 bid_price = 102. ask_price = 103. buy_volume = 500 sell_volume = 500 queue_ahead = 800 order_open = LimitOrder(ccy='BTC-USD', side='long', price=bid_price, step=step, queue_ahead=queue_ahead) test_position.add(order=order_open) step += 1 pnl, is_long_order_filled, is_short_order_filled = \ test_position.step_limit_order_pnl( bid_price=bid_price, ask_price=ask_price, buy_volume=buy_volume, sell_volume=sell_volume, step=step) pnl += pnl print("#1 long_inventory.order = \n{}".format( test_position.long_inventory.order)) self.assertEqual(300, test_position.long_inventory.order.queue_ahead) self.assertEqual(0, test_position.long_inventory.order.executed) self.assertEqual(0, test_position.long_inventory_count) step += 1 pnl, is_long_order_filled, is_short_order_filled = \ test_position.step_limit_order_pnl( bid_price=bid_price, ask_price=ask_price, buy_volume=buy_volume, sell_volume=sell_volume, step=step) pnl += pnl print("#2 long_inventory.order = \n{}".format( test_position.long_inventory.order)) self.assertEqual(200, test_position.long_inventory.order.executed) self.assertEqual(0, test_position.long_inventory_count) # if order gets filled with a bid below the order's price, the order should NOT # receive any price improvement during the execution. bid_price = 100. ask_price = 102. order_open = LimitOrder(ccy='BTC-USD', side='long', price=bid_price, step=step, queue_ahead=queue_ahead) test_position.add(order=order_open) print("#3 long_inventory.order = \n{}".format( test_position.long_inventory.order)) self.assertEqual(0, test_position.long_inventory_count) bid_price = 100. for i in range(5): step += 1 pnl, is_long_order_filled, is_short_order_filled = \ test_position.step_limit_order_pnl( bid_price=bid_price, ask_price=ask_price, buy_volume=buy_volume, sell_volume=sell_volume, step=step) pnl += pnl self.assertEqual(1, test_position.long_inventory_count) self.assertEqual(100.40, round(test_position.long_inventory.average_price, 2)) print("PnL: {}".format(pnl))