def _reset(self, t, i): orderbookState, orderbookIndex = self._get_random_orderbook_state() feature = self._makeFeature(orderbookIndex=orderbookIndex, qty=i) state = ActionState( t, i, {self.featureType.value: feature}) #np.array([[t, i]]) self.execution = None self.orderbookIndex = orderbookIndex self.actionState = state return state.toArray()
def backtest(self, q=None, episodes=10, average=False, fixed_a=None): Ms = [] for _ in range(episodes): actions = [] t = self.env.T[-1] i = self.env.I[-1] state = ActionState(t, i, {}) #print(state) if fixed_a is not None: a = fixed_a else: a = self.ai.getQAction(state, 0) actions.append(a) action = self.env.createAction(level=a, state=state, force_execution=True) midPrice = action.getReferencePrice() #print("before...") #print(action) action.run(self.env.orderbook) #print("after...") #print(action) i_next = self.env.determineNextInventory(action) t_next = self.env.determineNextTime(t) # print("i_next: " + str(i_next)) while i_next != 0: state_next = ActionState(t_next, i_next, {}) if fixed_a is not None: a_next = fixed_a else: a_next = self.ai.getQAction(state_next, 0) actions.append(a_next) #print("Action transition " + str((t, i)) + " -> " + str(aiState_next) + " with " + str(runtime_next) + "s runtime.") runtime_next = self.env.determineRuntime(t_next) action.setState(state_next) action.update(a_next, runtime_next) action.run(self.env.orderbook) #print(action) i_next = self.env.determineNextInventory(action) t_next = self.env.determineNextTime(t_next) price = action.getAvgPrice() if action.getOrder().getSide() == OrderSide.BUY: profit = midPrice - price else: profit = price - midPrice Ms.append([state, midPrice, actions, price, profit]) if not average: return Ms return self.averageBacktest(Ms)
def _reset(self, t, i): #self.orderbook = copy.deepcopy(self.orderbookOriginal) # TODO: Slow but currently required to reset after every episode due to change of order book states during matching orderbookState, orderbookIndex = self._get_random_orderbook_state() bidAskFeature = self._makeFeature(orderbookIndex=orderbookIndex) state = ActionState(t, i, {'bidask': bidAskFeature}) #np.array([[t, i]]) self.executionBuy = None self.executionSell = None self.orderbookIndexBuy = orderbookIndex self.orderbookIndexSell = orderbookIndex self.actionStateBuy = state self.actionStateSell = state return state.toArray()
def step(self, action): self.episode += 1 action = self.levels[action] self.episodeActions.append(action) if self.execution is None: self.execution = self._create_execution(action) else: self.execution = self._update_execution(self.execution, action) logging.info('Created/Updated execution.' + '\nAction: ' + str(action) + ' (' + str(self.execution.getOrder().getType()) + ')' + '\nt: ' + str(self.actionState.getT()) + '\nruntime: ' + str(self.execution.getRuntime()) + '\ni: ' + str(self.actionState.getI())) self.execution, counterTrades = self.execution.run(self.orderbook) i_next = self._determine_next_inventory(self.execution) t_next = self._determine_next_time(self.execution.getState().getT()) feature = self._makeFeature( orderbookIndex=self.execution.getOrderbookIndex(), qty=i_next) state_next = ActionState(t_next, i_next, {self.featureType.value: feature}) done = self.execution.isFilled() or state_next.getI() == 0 if done: reward = self.execution.getReward() volumeRatio = 1.0 if self.callbacks is not []: for cb in self.callbacks: cb.on_episode_end( self.episode, { 'episode_reward': reward, 'episode_actions': self.episodeActions }) self.episodeActions = [] else: reward, volumeRatio = self.execution.calculateRewardWeighted( counterTrades, self.I[-1]) logging.info('Run execution.' + '\nTrades: ' + str(len(counterTrades)) + '\nReward: ' + str(reward) + ' (Ratio: ' + str(volumeRatio) + ')' + '\nDone: ' + str(done)) self.orderbookIndex = self.execution.getOrderbookIndex() self.actionState = state_next return state_next.toArray(), reward, done, {}
def update(self, t, i, force_execution=False): aiState = ActionState(t, i) a = self.ai.chooseAction(aiState) self.logActions.append(a) # print('Random action: ' + str(level) + ' for state: ' + str(aiState)) action = self.env.createAction(level=a, state=aiState, force_execution=force_execution, orderbookIndex=self.orderbookIndex) action.run(self.env.orderbook) i_next = self.env.determineNextInventory(action) t_next = self.env.determineNextTime(t) reward = action.getReward() self.logRewards.append(reward) state_next = ActionState(action.getState().getT(), action.getState().getI(), action.getState().getMarket()) state_next.setT(t_next) state_next.setI(i_next) #print("Reward " + str(reward) + ": " + str(action.getState()) + " with " + str(action.getA()) + " -> " + str(state_next)) self.ai.learn(state1=action.getState(), action1=action.getA(), reward=reward, state2=state_next) return (t_next, i_next)
def step(self, action): # print('action') # print(action) actionBuy = self.levels[action][0] actionSell = self.levels[action][1] if self.executionBuy is None or self.executionSell is None: self.executionBuy = self._create_execution(a=actionBuy, actionState=self.actionStateBuy, orderbookIndex=self.orderbookIndexBuy, side=OrderSide.BUY) self.executionSell = self._create_execution(a=actionSell, actionState=self.actionStateSell, orderbookIndex=self.orderbookIndexSell, side=OrderSide.SELL) else: if not self.executionBuy.isFilled(): self.executionBuy = self._update_execution(execution=self.executionBuy, a=actionBuy, actionState=self.actionStateBuy, orderbookIndex=self.orderbookIndexBuy, side=OrderSide.BUY) if not self.executionSell.isFilled(): self.executionSell = self._update_execution(execution=self.executionSell, a=actionSell, actionState=self.actionStateSell, orderbookIndex=self.orderbookIndexSell, side=OrderSide.SELL) # logging.info( # 'Created/Updated execution.' + # '\nAction: ' + str(action) + ' (' + str(self.execution.getOrder().getType()) + ')' + # '\nt: ' + str(self.actionState.getT()) + # '\nruntime: ' + str(self.execution.getRuntime()) + # '\ni: ' + str(self.actionState.getI()) # ) if not self.executionBuy.isFilled(): self.executionBuy, counterTradesBuy = self.executionBuy.run(self.orderbook) i_next_buy = self._determine_next_inventory(self.executionBuy) t_next_buy = self._determine_next_time(self.executionBuy.getState().getT()) bidAskFeatureBuy = self._makeFeature(orderbookIndex=self.executionBuy.getOrderbookIndex()) self.actionStateBuy = ActionState(t_next_buy, i_next_buy, {'bidask': bidAskFeatureBuy}) self.orderbookIndexBuy = self.executionBuy.getOrderbookIndex() price_buy = self.executionBuy.calculateAvgPrice(counterTradesBuy) else: price_buy = self.executionBuy.getAvgPrice() if not self.executionSell.isFilled(): self.executionSell, counterTradesSell = self.executionSell.run(self.orderbook) i_next_sell = self._determine_next_inventory(self.executionSell) t_next_sell = self._determine_next_time(self.executionSell.getState().getT()) bidAskFeatureSell = self._makeFeature(orderbookIndex=self.executionSell.getOrderbookIndex()) self.actionStateSell = ActionState(t_next_sell, i_next_sell, {'bidask': bidAskFeatureSell}) self.orderbookIndexSell = self.executionSell.getOrderbookIndex() price_sell = self.executionSell.calculateAvgPrice(counterTradesSell) else: price_sell = self.executionSell.getAvgPrice() done_buy = self.executionBuy.isFilled() or self.actionStateBuy.getI() == 0 done_sell = self.executionSell.isFilled() or self.actionStateSell.getI() == 0 print('price buy: ' + str(price_buy)) print('price sell: ' + str(price_sell)) if price_buy == 0 or price_sell == 0: reward = 0.0 else: reward = price_sell - price_buy print('reward: ' + str(reward)) # logging.info( # 'Run execution.' + # '\nTrades: ' + str(len(counterTrades)) + # '\nReward: ' + str(reward) + ' (Ratio: ' + str(volumeRatio) + ')' + # '\nDone: ' + str(done) # ) if self.orderbookIndexBuy >= self.orderbookIndexSell: state_next = self.actionStateBuy else: state_next = self.actionStateSell return state_next.toArray(), reward, (done_buy and done_sell), {}
class MarketMakerEnv(execution_env.ExecutionEnv): def __init__(self): self.orderbookIndexBuy = None self.orderbookIndexSell = None self.actionStateBuy = None self.actionStateSell = None self.executionBuy = None self.executionSell = None self._configure() def _configure(self, orderbook=None, levels=(-50, 50, 1), T=(0, 100, 10), I=(0, 1, 0.1), lookback=25, bookSize=10 ): self.orderbook = orderbook self.levels = self._generate_Sequence(min=levels[0], max=levels[1], step=levels[2]) self.levels = list(itertools.product(self.levels, self.levels)) self.T = self._generate_Sequence(min=T[0], max=T[1], step=T[2]) self.I = self._generate_Sequence(min=I[0], max=I[1], step=I[2]) self.lookback = lookback # results in (bid|size, ask|size) -> 4*5 self.bookSize = bookSize self.action_space = spaces.Discrete(len(self.levels)) self.observation_space = spaces.Box(low=0.0, high=10.0, shape=(2*self.lookback, self.bookSize, 2)) def setSide(self, side): pass def _create_execution(self, a, actionState, orderbookIndex, side): runtime = self._determine_runtime(actionState.getT()) orderbookState = self.orderbook.getState(orderbookIndex) if runtime <= 0.0 or a is None: price = None ot = OrderType.MARKET else: price = orderbookState.getPriceAtLevel(side, a) ot = OrderType.LIMIT order = Order( orderType=ot, orderSide=side, cty=actionState.getI(), price=price ) execution = Action(a=a, runtime=runtime) execution.setState(actionState) execution.setOrder(order) execution.setOrderbookState(orderbookState) execution.setOrderbookIndex(orderbookIndex) execution.setReferencePrice(orderbookState.getBestAsk()) return execution def _update_execution(self, execution, a, actionState, orderbookIndex, side): runtime = self._determine_runtime(actionState.getT()) orderbookState = self.orderbook.getState(orderbookIndex) if runtime <= 0.0 or a is None: price = None ot = OrderType.MARKET else: price = execution.getOrderbookState().getPriceAtLevel(side, a) ot = OrderType.LIMIT order = Order( orderType=ot, orderSide=side, cty=actionState.getI(), price=price ) execution.setRuntime(runtime) execution.setState(actionState) execution.setOrder(order) execution.setOrderbookState(orderbookState) execution.setOrderbookIndex(orderbookIndex) return execution def step(self, action): # print('action') # print(action) actionBuy = self.levels[action][0] actionSell = self.levels[action][1] if self.executionBuy is None or self.executionSell is None: self.executionBuy = self._create_execution(a=actionBuy, actionState=self.actionStateBuy, orderbookIndex=self.orderbookIndexBuy, side=OrderSide.BUY) self.executionSell = self._create_execution(a=actionSell, actionState=self.actionStateSell, orderbookIndex=self.orderbookIndexSell, side=OrderSide.SELL) else: if not self.executionBuy.isFilled(): self.executionBuy = self._update_execution(execution=self.executionBuy, a=actionBuy, actionState=self.actionStateBuy, orderbookIndex=self.orderbookIndexBuy, side=OrderSide.BUY) if not self.executionSell.isFilled(): self.executionSell = self._update_execution(execution=self.executionSell, a=actionSell, actionState=self.actionStateSell, orderbookIndex=self.orderbookIndexSell, side=OrderSide.SELL) # logging.info( # 'Created/Updated execution.' + # '\nAction: ' + str(action) + ' (' + str(self.execution.getOrder().getType()) + ')' + # '\nt: ' + str(self.actionState.getT()) + # '\nruntime: ' + str(self.execution.getRuntime()) + # '\ni: ' + str(self.actionState.getI()) # ) if not self.executionBuy.isFilled(): self.executionBuy, counterTradesBuy = self.executionBuy.run(self.orderbook) i_next_buy = self._determine_next_inventory(self.executionBuy) t_next_buy = self._determine_next_time(self.executionBuy.getState().getT()) bidAskFeatureBuy = self._makeFeature(orderbookIndex=self.executionBuy.getOrderbookIndex()) self.actionStateBuy = ActionState(t_next_buy, i_next_buy, {'bidask': bidAskFeatureBuy}) self.orderbookIndexBuy = self.executionBuy.getOrderbookIndex() price_buy = self.executionBuy.calculateAvgPrice(counterTradesBuy) else: price_buy = self.executionBuy.getAvgPrice() if not self.executionSell.isFilled(): self.executionSell, counterTradesSell = self.executionSell.run(self.orderbook) i_next_sell = self._determine_next_inventory(self.executionSell) t_next_sell = self._determine_next_time(self.executionSell.getState().getT()) bidAskFeatureSell = self._makeFeature(orderbookIndex=self.executionSell.getOrderbookIndex()) self.actionStateSell = ActionState(t_next_sell, i_next_sell, {'bidask': bidAskFeatureSell}) self.orderbookIndexSell = self.executionSell.getOrderbookIndex() price_sell = self.executionSell.calculateAvgPrice(counterTradesSell) else: price_sell = self.executionSell.getAvgPrice() done_buy = self.executionBuy.isFilled() or self.actionStateBuy.getI() == 0 done_sell = self.executionSell.isFilled() or self.actionStateSell.getI() == 0 print('price buy: ' + str(price_buy)) print('price sell: ' + str(price_sell)) if price_buy == 0 or price_sell == 0: reward = 0.0 else: reward = price_sell - price_buy print('reward: ' + str(reward)) # logging.info( # 'Run execution.' + # '\nTrades: ' + str(len(counterTrades)) + # '\nReward: ' + str(reward) + ' (Ratio: ' + str(volumeRatio) + ')' + # '\nDone: ' + str(done) # ) if self.orderbookIndexBuy >= self.orderbookIndexSell: state_next = self.actionStateBuy else: state_next = self.actionStateSell return state_next.toArray(), reward, (done_buy and done_sell), {} def reset(self): return self._reset(t=self.T[-1], i=self.I[-1]) def _reset(self, t, i): #self.orderbook = copy.deepcopy(self.orderbookOriginal) # TODO: Slow but currently required to reset after every episode due to change of order book states during matching orderbookState, orderbookIndex = self._get_random_orderbook_state() bidAskFeature = self._makeFeature(orderbookIndex=orderbookIndex) state = ActionState(t, i, {'bidask': bidAskFeature}) #np.array([[t, i]]) self.executionBuy = None self.executionSell = None self.orderbookIndexBuy = orderbookIndex self.orderbookIndexSell = orderbookIndex self.actionStateBuy = state self.actionStateSell = state return state.toArray() def render(self, mode='human', close=False): pass def seed(self, seed): pass
def backtest(self, q=None, episodes=10, average=False, fixed_a=None): if q is None: q = self.ai.q else: self.ai.q = q if not q: raise Exception('Q-Table is empty, please train first.') Ms = [] #T = self.T[1:len(self.T)] for t in [self.T[-1]]: logging.info("\n" + "t==" + str(t)) for i in [self.I[-1]]: logging.info(" i==" + str(i)) actions = [] state = ActionState(t, i, {}) #print(state) if fixed_a is not None: a = fixed_a else: try: a = self.ai.getQAction(state, 0) # print("Q action for state " + str(state) + ": " + str(a)) except: # State might not be in Q-Table yet, more training requried. logging.info("State " + str(state) + " not in Q-Table.") break actions.append(a) action = self.createAction(level=a, state=state, force_execution=False) midPrice = action.getReferencePrice() #print("before...") #print(action) action.run(self.orderbook) #print("after...") #print(action) i_next = self.determineNextInventory(action) t_next = self.determineNextTime(t) # print("i_next: " + str(i_next)) while i_next != 0: state_next = ActionState(t_next, i_next, {}) if fixed_a is not None: a_next = fixed_a else: try: a_next = self.ai.getQAction(state_next, 0) # print("t: " + str(t_next)) # print("i: " + str(i_next)) # print("Action: " + str(a_next)) # print("Q action for next state " + str(state_next) + ": " + str(a_next)) except: # State might not be in Q-Table yet, more training requried. # print("State " + str(state_next) + " not in Q-Table.") break actions.append(a_next) #print("Action transition " + str((t, i)) + " -> " + str(aiState_next) + " with " + str(runtime_next) + "s runtime.") runtime_next = self.determineRuntime(t_next) action.setState(state_next) action.update(a_next, runtime_next) action.run(self.orderbook) #print(action) i_next = self.determineNextInventory(action) t_next = self.determineNextTime(t_next) price = action.getAvgPrice() # TODO: last column is for for the BUY scenario only if action.getOrder().getSide() == OrderSide.BUY: profit = midPrice - price else: profit = price - midPrice Ms.append([state, midPrice, actions, price, profit]) if not average: return Ms return self.averageBacktest(Ms)