def _reset(self, t, i):
     self.orderbook = copy.deepcopy(self.orderbookOriginal) # TODO: Slow but currently required to reset after every episode due to change of order book states during matching
     orderbookState, orderbookIndex = self._get_random_orderbook_state()
     bidAskFeature = self._makeFeature(orderbookIndex=orderbookIndex)
     state = ActionState(t, i, {'bidask': bidAskFeature}) #np.array([[t, i]])
     self.execution = None
     self.orderbookIndex = orderbookIndex
     self.actionState = state
     return state.toArray()
    def step(self, action):
        action = self.levels[action]
        if self.execution is None:
            self.execution = self._create_execution(action)
        else:
            self.execution = self._update_execution(self.execution, action)

        logging.info(
            'Created/Updated execution.' +
            '\nAction: ' + str(action) + ' (' + str(self.execution.getOrder().getType()) + ')' +
            '\nt: ' + str(self.actionState.getT()) +
            '\nruntime: ' + str(self.execution.getRuntime()) +
            '\ni: ' + str(self.actionState.getI())
        )
        self.execution, counterTrades = self.execution.run(self.orderbook)

        i_next = self._determine_next_inventory(self.execution)
        t_next = self._determine_next_time(self.execution.getState().getT())

        bidAskFeature = self._makeFeature(orderbookIndex=self.execution.getOrderbookIndex())
        state_next = ActionState(t_next, i_next, {'bidask': bidAskFeature})
        done = self.execution.isFilled() or state_next.getI() == 0
        # if done == True:
        #     #reward = self.execution.getReward()
        #     #volumeRatio = 1.0
        # else:
        reward, volumeRatio = self.execution.calculateRewardWeighted(counterTrades, self.I[-1])

        logging.info(
            'Run execution.' +
            '\nTrades: ' + str(len(counterTrades)) +
            '\nReward: ' + str(reward) + ' (Ratio: ' + str(volumeRatio) + ')' +
            '\nDone: ' + str(done)
        )
        self.orderbookIndex = self.execution.getOrderbookIndex()
        self.actionState = state_next
        return state_next.toArray(), reward, done, {}