def __init__(self, inventory, target_inventory, trade_window, impact_param, data_path, limit_order_level=2, is_buy_agent=False, sampling_freq=5): self.metadata = None # Simulation parameters self.period = 0 # in index units, ie. period=0 is t=0secs, period=1 is t=5secs self.time = 0 # in seconds self.trade_window = trade_window # trade needs to be completed within this time frame self.impact_param = impact_param # parameter for the LOB sim self.sampling_freq = sampling_freq # units of one period, fixed at 5 seconds for us self.data_path = data_path # path to lob and trade data samples self.num_periods = int(trade_window / sampling_freq) self.simulation_ids = get_data_file_paths(data_path) # Agent parameters self.is_buy_agent = is_buy_agent self.initial_inventory = inventory self.current_inventory = inventory self.target_inventory = target_inventory self.limit_order_level = limit_order_level self.order_execution_history = [] # Set up initial LOB simulator self.observation_space = gym.spaces.Box( low=np.array([0, 0, 0, 0, -1, 0, -np.inf, -np.inf, 0, 0, 0]), high=np.array([ np.inf, np.inf, 1, np.inf, np.inf, np.inf, np.inf, np.inf, 1, np.inf, 1 ]), dtype=np.float32) self.current_sim_id = np.random.choice(self.simulation_ids, 1)[-1] lob_file = os.path.join(self.current_sim_id, 'ob.feather') trades_file = os.path.join(self.current_sim_id, 'trades.feather') self.LOB_SIM = OrderBookSimulator(lob_file, trades_file, impact_param) ob, trds, executed_orders, active_limit_order_levels = self.LOB_SIM.iterate( force=True) self.initial_price = (ob.BID_PRICE.max() + ob.ASK_PRICE.min()) / 2 self.state = self.calculate_state(ob, trds, executed_orders, active_limit_order_levels) # Define Action Space # 0: do nothing, 1: LO at tick level 2, 2: MO of size 100, 3: MO of size 200 self.action_space = gym.spaces.Discrete( 4) # number of discrete action bins
def reset(self): # Simulation parameters self.period = 0 # in index units, ie. period=0 is t=0secs, period=1 is t=5secs self.time = 0 # in seconds # Reset agent parameters self.current_inventory = self.initial_inventory # Reset LOB simulator self.current_sim_id = np.random.choice(self.simulation_ids, 1)[-1] lob_file = os.path.join(self.current_sim_id, 'ob.feather') trades_file = os.path.join(self.current_sim_id, 'trades.feather') self.LOB_SIM = OrderBookSimulator(lob_file, trades_file, self.impact_param) ob, trds, executed_orders, active_limit_order_levels = self.LOB_SIM.iterate( force=True) self.initial_price = (ob.BID_PRICE.max() + ob.ASK_PRICE.min()) / 2 self.state = self.calculate_state(ob, trds, executed_orders, active_limit_order_levels) self.order_execution_history = [] return self.state
class SpamTrader(gym.Env): def __init__(self, inventory, target_inventory, trade_window, impact_param, data_path, htfu_penalty=0.001, inventory_reduction_reward=0.001, vol_penalty_window=12, vol_penality_threshold=200, vol_penalty=0.001, limit_order_level=2, is_buy_agent=False, sampling_freq=5): self.metadata = None # Simulation parameters self.period = 0 # in index units, ie. period=0 is t=0secs, period=1 is t=5secs self.time = 0 # in seconds self.trade_window = trade_window # trade needs to be completed within this time frame self.impact_param = impact_param # parameter for the LOB sim self.sampling_freq = sampling_freq # units of one period, fixed at 5 seconds for us self.data_path = data_path # path to lob and trade data samples self.num_periods = int(trade_window / sampling_freq) self.simulation_ids = get_data_file_paths(data_path) # Agent parameters self.is_buy_agent = is_buy_agent self.initial_inventory = inventory self.current_inventory = inventory self.target_inventory = target_inventory self.limit_order_level = limit_order_level self.order_execution_history = [] self.one_minute_vol_executed = np.zeros(vol_penalty_window) self.vol_penality_threshold = vol_penality_threshold self.vol_penalty = vol_penalty self.vol_penalty_window = vol_penalty_window self.inventory_reduction_reward = inventory_reduction_reward self.htfu_penalty = htfu_penalty # Set up initial LOB simulator self.observation_space = gym.spaces.Box( low=np.array([0, -np.inf, 0, 0, 0]), high=np.array([1, np.inf, 1, np.inf, np.inf]), dtype=np.float32) self.current_sim_id = np.random.choice(self.simulation_ids, 1)[-1] lob_file = os.path.join(self.current_sim_id, 'ob.feather') trades_file = os.path.join(self.current_sim_id, 'trades.feather') self.LOB_SIM = OrderBookSimulator(lob_file, trades_file, impact_param) ob, trds, executed_orders, active_limit_order_levels = self.LOB_SIM.iterate( force=True) self.initial_price = (ob.BID_PRICE.max() + ob.ASK_PRICE.min()) / 2 self.state = self.calculate_state(ob, trds, executed_orders, active_limit_order_levels) # Define Action Space # 0: LO at tick level 2, 1: MO of size 100 self.action_space = gym.spaces.Discrete( 3) # number of discrete action bins def step(self, action): # Perform agent action if action == 1: # LO at tick level inventory_delta = abs(self.target_inventory - self.current_inventory) volume = min(inventory_delta, 100.0) if self.is_buy_agent: self.LOB_SIM.place_limit_buy_order_at_tick( volume, self.limit_order_level) else: self.LOB_SIM.place_limit_sell_order_at_tick( volume, self.limit_order_level) placed_order = False elif action == 2: # MO of size 100 inventory_delta = abs(self.target_inventory - self.current_inventory) volume = min(inventory_delta, 100.0) if self.is_buy_agent: self.LOB_SIM.place_market_buy_order(volume) else: self.LOB_SIM.place_market_sell_order(volume) placed_order = True else: placed_order = False # Update market environment try: ob, trds, executed_orders, active_limit_order_levels = self.LOB_SIM.iterate( ) except: return self.state, 0, True, {} self.order_execution_history += executed_orders self.one_minute_vol_executed[:-1] = self.one_minute_vol_executed[1:] self.one_minute_vol_executed[-1] = sum( [order['volume'] for order in executed_orders]) # Check executed orders and update inventory price_weighted_volume, total_executed_volume = 0, 0 for order in executed_orders: price_weighted_volume += order['volume'] * order['price'] total_executed_volume += order['volume'] if order['is_buy']: self.current_inventory = min( self.target_inventory, self.current_inventory + order['volume']) else: self.current_inventory = max( self.target_inventory, self.current_inventory - order['volume']) # Calculate implementation shortfall if total_executed_volume > 0: executed_vwap = price_weighted_volume / total_executed_volume if self.is_buy_agent: shortfall = (self.initial_price - executed_vwap) / self.initial_price else: shortfall = (executed_vwap - self.initial_price) / self.initial_price else: shortfall = 0 # Do time accounting self.period += 1 self.time += self.sampling_freq # Check if target inventory achieved reached_target_position = self.current_inventory == self.target_inventory # early stop condition about_to_run_out_of_time = (self.time + self.sampling_freq) >= self.trade_window ran_out_of_time = self.time >= self.trade_window is_done = reached_target_position or ran_out_of_time if about_to_run_out_of_time: # liquidate inventory_delta = abs(self.target_inventory - self.current_inventory) volume = inventory_delta if self.is_buy_agent: self.LOB_SIM.place_market_buy_order(volume) else: self.LOB_SIM.place_market_sell_order(volume) placed_order = True # Calculate reward reward = 1000 * self.calculate_reward(shortfall, placed_order) # Update agent state self.state = self.calculate_state(ob, trds, executed_orders, active_limit_order_levels) return self.state, reward, is_done, {} def reset(self): # Simulation parameters self.period = 0 # in index units, ie. period=0 is t=0secs, period=1 is t=5secs self.time = 0 # in seconds # Reset agent parameters self.current_inventory = self.initial_inventory # Reset LOB simulator self.current_sim_id = np.random.choice(self.simulation_ids, 1)[-1] lob_file = os.path.join(self.current_sim_id, 'ob.feather') trades_file = os.path.join(self.current_sim_id, 'trades.feather') self.LOB_SIM = OrderBookSimulator(lob_file, trades_file, self.impact_param) ob, trds, executed_orders, active_limit_order_levels = self.LOB_SIM.iterate( force=True) self.initial_price = (ob.BID_PRICE.max() + ob.ASK_PRICE.min()) / 2 self.state = self.calculate_state(ob, trds, executed_orders, active_limit_order_levels) self.order_execution_history = [] self.one_minute_vol_executed = np.zeros(self.vol_penalty_window) return self.state def calculate_reward(self, shortfall, placed_order, gamma=1): # hurry the f**k up penalty remaining_periods = self.num_periods - self.period # if (self.current_inventory / 100) > gamma * remaining_periods: tau = ((self.current_inventory / 100) - remaining_periods) inventory_penalty = -self.htfu_penalty * 1.0 / (1.0 + np.exp(-tau)) # inventory_penalty = (gamma * remaining_periods - (self.current_inventory / 100)) # else: # inventory_penalty = 0 # attempt to reduce inventory reward if placed_order: reduce_inventory_reward = self.inventory_reduction_reward else: reduce_inventory_reward = 0 last_minute_vol_executed = np.sum(self.one_minute_vol_executed) if (last_minute_vol_executed > self.vol_penality_threshold) and placed_order: fast_execution_penalty = -self.vol_penalty - reduce_inventory_reward else: fast_execution_penalty = 0.0 return shortfall + fast_execution_penalty + reduce_inventory_reward + inventory_penalty def calculate_state(self, ob, trds, executed_orders, active_limit_order_levels): side = 'ASK' if self.is_buy_agent else 'BID' # opposite_side = 'BID' if (side == 'ASK') else 'ASK' best_tick_volume = ob.loc[ob['LEVEL'] == 1, [side + '_SIZE']].values[-1, -1] / 100 # second_best_tick_volume = ob.loc[ob['LEVEL'] == 2, [side + '_SIZE']].values[-1, -1] / 100 # trading_day_progression = (1.0 / (6.5 * 60 * 60)) * ((datetime.combine(date.today(), ob.Time.dt.time.values[ # -1]) - datetime.combine(date.today(), time(9, 30, 0))) / timedelta(seconds=1)) inventory_delta = abs(self.current_inventory - self.target_inventory) / max( abs(self.initial_inventory), abs(self.target_inventory)) pct_diff_from_initial_price = 100.0 * ( ob.loc[ob['LEVEL'] == 1, [side + '_PRICE']].values[-1, -1] - self.initial_price) / self.initial_price # gross_last_period_trade_volume = trds.SIZE.sum() / 100 # net_last_period_trade_volume = (trds[trds['BUY_SELL_FLAG'] == 1].SIZE.sum() - trds[ # trds['BUY_SELL_FLAG'] == 0].SIZE.sum()) / 100 # spread = 10 * (ob.ASK_PRICE.min() - ob.BID_PRICE.max()) pct_trade_window_progression = self.time / self.trade_window # num_open_lob_levels = (len(active_limit_order_levels['ASK']) + len(active_limit_order_levels['BID'])) / 10.0 # has_limit_order_at_tick_2 = ob.loc[ob['LEVEL'] == 2, [opposite_side + '_PRICE']].values[-1, -1] in \ # active_limit_order_levels[opposite_side] state = np.array([ inventory_delta, pct_diff_from_initial_price, pct_trade_window_progression, best_tick_volume, np.sum(self.one_minute_vol_executed) / 100 ]) return state def render(self, mode='human'): return def close(self): return
class DiscreteTrader(gym.Env): def __init__(self, inventory, target_inventory, trade_window, impact_param, data_path, limit_order_level=2, is_buy_agent=False, sampling_freq=5): self.metadata = None # Simulation parameters self.period = 0 # in index units, ie. period=0 is t=0secs, period=1 is t=5secs self.time = 0 # in seconds self.trade_window = trade_window # trade needs to be completed within this time frame self.impact_param = impact_param # parameter for the LOB sim self.sampling_freq = sampling_freq # units of one period, fixed at 5 seconds for us self.data_path = data_path # path to lob and trade data samples self.num_periods = int(trade_window / sampling_freq) self.simulation_ids = get_data_file_paths(data_path) # Agent parameters self.is_buy_agent = is_buy_agent self.initial_inventory = inventory self.current_inventory = inventory self.target_inventory = target_inventory self.limit_order_level = limit_order_level self.order_execution_history = [] # Set up initial LOB simulator self.observation_space = gym.spaces.Box( low=np.array([0, 0, 0, 0, -1, 0, -np.inf, -np.inf, 0, 0, 0]), high=np.array([ np.inf, np.inf, 1, np.inf, np.inf, np.inf, np.inf, np.inf, 1, np.inf, 1 ]), dtype=np.float32) self.current_sim_id = np.random.choice(self.simulation_ids, 1)[-1] lob_file = os.path.join(self.current_sim_id, 'ob.feather') trades_file = os.path.join(self.current_sim_id, 'trades.feather') self.LOB_SIM = OrderBookSimulator(lob_file, trades_file, impact_param) ob, trds, executed_orders, active_limit_order_levels = self.LOB_SIM.iterate( force=True) self.initial_price = (ob.BID_PRICE.max() + ob.ASK_PRICE.min()) / 2 self.state = self.calculate_state(ob, trds, executed_orders, active_limit_order_levels) # Define Action Space # 0: do nothing, 1: LO at tick level 2, 2: MO of size 100, 3: MO of size 200 self.action_space = gym.spaces.Discrete( 4) # number of discrete action bins def step(self, action): # Perform agent action if action == 1: # LO at tick level inventory_delta = abs(self.target_inventory - self.current_inventory) volume = min(inventory_delta, 100.0) if self.is_buy_agent: self.LOB_SIM.place_limit_buy_order_at_tick( volume, self.limit_order_level) else: self.LOB_SIM.place_limit_sell_order_at_tick( volume, self.limit_order_level) elif action == 2: # MO of size 100 inventory_delta = abs(self.target_inventory - self.current_inventory) volume = min(inventory_delta, 100.0) if self.is_buy_agent: self.LOB_SIM.place_market_buy_order(volume) else: self.LOB_SIM.place_market_sell_order(volume) elif action == 3: # MO of size 200 inventory_delta = abs(self.target_inventory - self.current_inventory) volume = min(inventory_delta, 200.0) if self.is_buy_agent: self.LOB_SIM.place_market_buy_order(volume) else: self.LOB_SIM.place_market_sell_order(volume) # Update market environment try: ob, trds, executed_orders, active_limit_order_levels = self.LOB_SIM.iterate( ) except: return self.state, 0, True, {} self.order_execution_history += executed_orders # Check executed orders and update inventory price_weighted_volume, total_executed_volume = 0, 0 for order in executed_orders: price_weighted_volume += order['volume'] * order['price'] total_executed_volume += order['volume'] if order['is_buy']: self.current_inventory = min( self.target_inventory, self.current_inventory + order['volume']) else: self.current_inventory = max( self.target_inventory, self.current_inventory - order['volume']) # Calculate implementation shortfall if total_executed_volume > 0: executed_vwap = price_weighted_volume / total_executed_volume if self.is_buy_agent: shortfall = (self.initial_price - executed_vwap) / self.initial_price else: shortfall = (executed_vwap - self.initial_price) / self.initial_price else: shortfall = 0 # Do time accounting self.period += 1 self.time += self.sampling_freq # Check if target inventory achieved reached_target_position = self.current_inventory == self.target_inventory # early stop condition ran_out_of_time = self.time >= self.trade_window is_done = reached_target_position or ran_out_of_time # Calculate reward # had_market_order_in_prev_period = executed_orders[] reward = self.calculate_reward(shortfall, self.time) # Update agent state self.state = self.calculate_state(ob, trds, executed_orders, active_limit_order_levels) return self.state, reward, is_done, {} def reset(self): # Simulation parameters self.period = 0 # in index units, ie. period=0 is t=0secs, period=1 is t=5secs self.time = 0 # in seconds # Reset agent parameters self.current_inventory = self.initial_inventory # Reset LOB simulator self.current_sim_id = np.random.choice(self.simulation_ids, 1)[-1] lob_file = os.path.join(self.current_sim_id, 'ob.feather') trades_file = os.path.join(self.current_sim_id, 'trades.feather') self.LOB_SIM = OrderBookSimulator(lob_file, trades_file, self.impact_param) ob, trds, executed_orders, active_limit_order_levels = self.LOB_SIM.iterate( force=True) self.initial_price = (ob.BID_PRICE.max() + ob.ASK_PRICE.min()) / 2 self.state = self.calculate_state(ob, trds, executed_orders, active_limit_order_levels) self.order_execution_history = [] return self.state def calculate_reward(self, shortfall, time, gamma=1): remaining_periods = self.num_periods - self.period if (self.current_inventory / 100) > gamma * remaining_periods: inventory_penalty = (gamma * remaining_periods - (self.current_inventory / 100)) * 0.01 else: inventory_penalty = 0 if time >= self.trade_window: non_completion_penalty = -self.current_inventory / 10 else: non_completion_penalty = 0 return shortfall + non_completion_penalty + inventory_penalty def calculate_state(self, ob, trds, executed_orders, active_limit_order_levels): side = 'ASK' if self.is_buy_agent else 'BID' opposite_side = 'BID' if (side == 'ASK') else 'ASK' best_tick_volume = ob.loc[ob['LEVEL'] == 1, [side + '_SIZE']].values[-1, -1] / 100 second_best_tick_volume = ob.loc[ob['LEVEL'] == 2, [side + '_SIZE']].values[-1, -1] / 100 trading_day_progression = (1.0 / (6.5 * 60 * 60)) * ( (datetime.combine(date.today(), ob.Time.dt.time.values[-1]) - datetime.combine(date.today(), time(9, 30, 0))) / timedelta(seconds=1)) inventory_delta = abs(self.current_inventory - self.target_inventory) / max( abs(self.initial_inventory), abs(self.target_inventory)) pct_diff_from_initial_price = ( ob.loc[ob['LEVEL'] == 1, [side + '_PRICE']].values[-1, -1] - self.initial_price) / self.initial_price gross_last_period_trade_volume = trds.SIZE.sum() / 100 net_last_period_trade_volume = ( trds[trds['BUY_SELL_FLAG'] == 1].SIZE.sum() - trds[trds['BUY_SELL_FLAG'] == 0].SIZE.sum()) / 100 spread = 10 * (ob.ASK_PRICE.min() - ob.BID_PRICE.max()) pct_trade_window_progression = self.time / self.trade_window num_open_lob_levels = len(active_limit_order_levels['ASK']) + len( active_limit_order_levels['BID']) has_limit_order_at_tick_2 = ob.loc[ob['LEVEL'] == 2, [opposite_side + '_PRICE']].values[-1, -1] in \ active_limit_order_levels[opposite_side] state = np.array([ best_tick_volume, second_best_tick_volume, trading_day_progression, inventory_delta, pct_diff_from_initial_price, gross_last_period_trade_volume, net_last_period_trade_volume, spread, pct_trade_window_progression, num_open_lob_levels, has_limit_order_at_tick_2 ]) return state def render(self, mode='human'): return def close(self): return