def _reset(self, t, i): self.orderbook = copy.deepcopy(self.orderbookOriginal) # TODO: Slow but currently required to reset after every episode due to change of order book states during matching orderbookState, orderbookIndex = self._get_random_orderbook_state() bidAskFeature = self._makeFeature(orderbookIndex=orderbookIndex) state = ActionState(t, i, {'bidask': bidAskFeature}) #np.array([[t, i]]) self.execution = None self.orderbookIndex = orderbookIndex self.actionState = state return state.toArray()
def update(self, t, i, force_execution=False): aiState = ActionState(t, i) a = self.ai.chooseAction(aiState) # print('Random action: ' + str(level) + ' for state: ' + str(aiState)) action = self.env.createAction(level=a, state=aiState, force_execution=force_execution) action.run(self.env.orderbook) i_next = self.env.determineNextInventory(action) t_next = self.env.determineNextTime(t) reward = action.getReward() state_next = ActionState(action.getState().getT(), action.getState().getI(), action.getState().getMarket()) state_next.setT(t_next) state_next.setI(i_next) #print("Reward " + str(reward) + ": " + str(action.getState()) + " with " + str(action.getA()) + " -> " + str(state_next)) self.ai.learn(state1=action.getState(), action1=action.getA(), reward=reward, state2=state_next) return (t_next, i_next)
def step(self, action): action = self.levels[action] if self.execution is None: self.execution = self._create_execution(action) else: self.execution = self._update_execution(self.execution, action) logging.info( 'Created/Updated execution.' + '\nAction: ' + str(action) + ' (' + str(self.execution.getOrder().getType()) + ')' + '\nt: ' + str(self.actionState.getT()) + '\nruntime: ' + str(self.execution.getRuntime()) + '\ni: ' + str(self.actionState.getI()) ) self.execution, counterTrades = self.execution.run(self.orderbook) i_next = self._determine_next_inventory(self.execution) t_next = self._determine_next_time(self.execution.getState().getT()) bidAskFeature = self._makeFeature(orderbookIndex=self.execution.getOrderbookIndex()) state_next = ActionState(t_next, i_next, {'bidask': bidAskFeature}) done = self.execution.isFilled() or state_next.getI() == 0 # if done == True: # #reward = self.execution.getReward() # #volumeRatio = 1.0 # else: reward, volumeRatio = self.execution.calculateRewardWeighted(counterTrades, self.I[-1]) logging.info( 'Run execution.' + '\nTrades: ' + str(len(counterTrades)) + '\nReward: ' + str(reward) + ' (Ratio: ' + str(volumeRatio) + ')' + '\nDone: ' + str(done) ) self.orderbookIndex = self.execution.getOrderbookIndex() self.actionState = state_next return state_next.toArray(), reward, done, {}
def testStateEquality(self): ai = QLearn([-1, 0, 1]) a1 = ActionState(1.0, 1.0, {'vol60': 1}) a2 = ActionState(1.0, 1.0, {'vol60': 1}) ai.learn(a1, 1, 1.0, a2) self.assertEqual(ai.getQAction(a2), 1)
import unittest from qlearn import QLearn from action_state import ActionState import numpy as np class QlearnTest(unittest.TestCase): def testStateEquality(self): ai = QLearn([-1, 0, 1]) a1 = ActionState(1.0, 1.0, {'vol60': 1}) a2 = ActionState(1.0, 1.0, {'vol60': 1}) ai.learn(a1, 1, 1.0, a2) self.assertEqual(ai.getQAction(a2), 1) #def testQTableLookup(self): actions = [5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -7, -10, -15, -20] ai = QLearn(actions) ai.q = np.load('test_q.npy').item() ai.q state = ActionState(30, 0.9, {}) ai.q.get((state, -10)) print(ai.getQAction(state))
def backtest(self, q=None, episodes=10, average=False, fixed_a=None): if q is None: q = self.ai.q else: self.ai.q = q if not q: raise Exception('Q-Table is empty, please train first.') Ms = [] #T = self.T[1:len(self.T)] for t in [self.T[-1]]: logging.info("\n" + "t==" + str(t)) for i in [self.I[-1]]: logging.info(" i==" + str(i)) actions = [] state = ActionState(t, i, {}) #print(state) if fixed_a is not None: a = fixed_a else: try: a = self.ai.getQAction(state, 0) # print("Q action for state " + str(state) + ": " + str(a)) except: # State might not be in Q-Table yet, more training requried. logging.info("State " + str(state) + " not in Q-Table.") break actions.append(a) action = self.createAction(level=a, state=state, force_execution=False) midPrice = action.getReferencePrice() #print("before...") #print(action) action.run(self.orderbook) #print("after...") #print(action) i_next = self.determineNextInventory(action) t_next = self.determineNextTime(t) # print("i_next: " + str(i_next)) while i_next != 0: state_next = ActionState(t_next, i_next, {}) if fixed_a is not None: a_next = fixed_a else: try: a_next = self.ai.getQAction(state_next, 0) print("t: " + str(t_next)) print("i: " + str(i_next)) print("Action: " + str(a_next)) # print("Q action for next state " + str(state_next) + ": " + str(a_next)) except: # State might not be in Q-Table yet, more training requried. # print("State " + str(state_next) + " not in Q-Table.") break actions.append(a_next) #print("Action transition " + str((t, i)) + " -> " + str(aiState_next) + " with " + str(runtime_next) + "s runtime.") runtime_next = self.determineRuntime(t_next) action.setState(state_next) action.update(a_next, runtime_next) action.run(self.orderbook) #print(action) i_next = self.determineNextInventory(action) t_next = self.determineNextTime(t_next) price = action.getAvgPrice() # TODO: last column is for for the BUY scenario only if action.getOrder().getSide() == OrderSide.BUY: profit = midPrice - price else: profit = price - midPrice Ms.append([state, midPrice, actions, price, profit]) if not average: return Ms return self.averageBacktest(Ms)
def add_pair(self, state, prob, count): new_as = ActionState(state, prob, count) self.action_states.append(new_as)