def _reset(self, t, i): self.orderbook = copy.deepcopy(self.orderbookOriginal) # TODO: Slow but currently required to reset after every episode due to change of order book states during matching orderbookState, orderbookIndex = self._get_random_orderbook_state() bidAskFeature = self._makeFeature(orderbookIndex=orderbookIndex) state = ActionState(t, i, {'bidask': bidAskFeature}) #np.array([[t, i]]) self.execution = None self.orderbookIndex = orderbookIndex self.actionState = state return state.toArray()
def step(self, action): action = self.levels[action] if self.execution is None: self.execution = self._create_execution(action) else: self.execution = self._update_execution(self.execution, action) logging.info( 'Created/Updated execution.' + '\nAction: ' + str(action) + ' (' + str(self.execution.getOrder().getType()) + ')' + '\nt: ' + str(self.actionState.getT()) + '\nruntime: ' + str(self.execution.getRuntime()) + '\ni: ' + str(self.actionState.getI()) ) self.execution, counterTrades = self.execution.run(self.orderbook) i_next = self._determine_next_inventory(self.execution) t_next = self._determine_next_time(self.execution.getState().getT()) bidAskFeature = self._makeFeature(orderbookIndex=self.execution.getOrderbookIndex()) state_next = ActionState(t_next, i_next, {'bidask': bidAskFeature}) done = self.execution.isFilled() or state_next.getI() == 0 # if done == True: # #reward = self.execution.getReward() # #volumeRatio = 1.0 # else: reward, volumeRatio = self.execution.calculateRewardWeighted(counterTrades, self.I[-1]) logging.info( 'Run execution.' + '\nTrades: ' + str(len(counterTrades)) + '\nReward: ' + str(reward) + ' (Ratio: ' + str(volumeRatio) + ')' + '\nDone: ' + str(done) ) self.orderbookIndex = self.execution.getOrderbookIndex() self.actionState = state_next return state_next.toArray(), reward, done, {}