Esempio n. 1
0
 def _reset(self, t, i):
     orderbookState, orderbookIndex = self._get_random_orderbook_state()
     feature = self._makeFeature(orderbookIndex=orderbookIndex, qty=i)
     state = ActionState(
         t, i, {self.featureType.value: feature})  #np.array([[t, i]])
     self.execution = None
     self.orderbookIndex = orderbookIndex
     self.actionState = state
     return state.toArray()
Esempio n. 2
0
    def backtest(self, q=None, episodes=10, average=False, fixed_a=None):
        Ms = []
        for _ in range(episodes):
            actions = []
            t = self.env.T[-1]
            i = self.env.I[-1]
            state = ActionState(t, i, {})
            #print(state)
            if fixed_a is not None:
                a = fixed_a
            else:
                a = self.ai.getQAction(state, 0)

            actions.append(a)
            action = self.env.createAction(level=a,
                                           state=state,
                                           force_execution=True)
            midPrice = action.getReferencePrice()

            #print("before...")
            #print(action)
            action.run(self.env.orderbook)
            #print("after...")
            #print(action)
            i_next = self.env.determineNextInventory(action)
            t_next = self.env.determineNextTime(t)
            # print("i_next: " + str(i_next))
            while i_next != 0:
                state_next = ActionState(t_next, i_next, {})
                if fixed_a is not None:
                    a_next = fixed_a
                else:
                    a_next = self.ai.getQAction(state_next, 0)

                actions.append(a_next)
                #print("Action transition " + str((t, i)) + " -> " + str(aiState_next) + " with " + str(runtime_next) + "s runtime.")

                runtime_next = self.env.determineRuntime(t_next)
                action.setState(state_next)
                action.update(a_next, runtime_next)
                action.run(self.env.orderbook)
                #print(action)
                i_next = self.env.determineNextInventory(action)
                t_next = self.env.determineNextTime(t_next)

            price = action.getAvgPrice()
            if action.getOrder().getSide() == OrderSide.BUY:
                profit = midPrice - price
            else:
                profit = price - midPrice
            Ms.append([state, midPrice, actions, price, profit])
        if not average:
            return Ms
        return self.averageBacktest(Ms)
Esempio n. 3
0
    def _reset(self, t, i):
        #self.orderbook = copy.deepcopy(self.orderbookOriginal) # TODO: Slow but currently required to reset after every episode due to change of order book states during matching
        orderbookState, orderbookIndex = self._get_random_orderbook_state()
        bidAskFeature = self._makeFeature(orderbookIndex=orderbookIndex)
        state = ActionState(t, i, {'bidask': bidAskFeature}) #np.array([[t, i]])

        self.executionBuy = None
        self.executionSell = None

        self.orderbookIndexBuy = orderbookIndex
        self.orderbookIndexSell = orderbookIndex

        self.actionStateBuy = state
        self.actionStateSell = state

        return state.toArray()
Esempio n. 4
0
    def step(self, action):
        self.episode += 1
        action = self.levels[action]
        self.episodeActions.append(action)
        if self.execution is None:
            self.execution = self._create_execution(action)
        else:
            self.execution = self._update_execution(self.execution, action)

        logging.info('Created/Updated execution.' + '\nAction: ' +
                     str(action) + ' (' +
                     str(self.execution.getOrder().getType()) + ')' + '\nt: ' +
                     str(self.actionState.getT()) + '\nruntime: ' +
                     str(self.execution.getRuntime()) + '\ni: ' +
                     str(self.actionState.getI()))
        self.execution, counterTrades = self.execution.run(self.orderbook)

        i_next = self._determine_next_inventory(self.execution)
        t_next = self._determine_next_time(self.execution.getState().getT())

        feature = self._makeFeature(
            orderbookIndex=self.execution.getOrderbookIndex(), qty=i_next)
        state_next = ActionState(t_next, i_next,
                                 {self.featureType.value: feature})
        done = self.execution.isFilled() or state_next.getI() == 0
        if done:
            reward = self.execution.getReward()
            volumeRatio = 1.0
            if self.callbacks is not []:
                for cb in self.callbacks:
                    cb.on_episode_end(
                        self.episode, {
                            'episode_reward': reward,
                            'episode_actions': self.episodeActions
                        })
            self.episodeActions = []
        else:
            reward, volumeRatio = self.execution.calculateRewardWeighted(
                counterTrades, self.I[-1])

        logging.info('Run execution.' + '\nTrades: ' +
                     str(len(counterTrades)) + '\nReward: ' + str(reward) +
                     ' (Ratio: ' + str(volumeRatio) + ')' + '\nDone: ' +
                     str(done))
        self.orderbookIndex = self.execution.getOrderbookIndex()
        self.actionState = state_next
        return state_next.toArray(), reward, done, {}
Esempio n. 5
0
 def update(self, t, i, force_execution=False):
     aiState = ActionState(t, i)
     a = self.ai.chooseAction(aiState)
     self.logActions.append(a)
     # print('Random action: ' + str(level) + ' for state: ' + str(aiState))
     action = self.env.createAction(level=a,
                                    state=aiState,
                                    force_execution=force_execution,
                                    orderbookIndex=self.orderbookIndex)
     action.run(self.env.orderbook)
     i_next = self.env.determineNextInventory(action)
     t_next = self.env.determineNextTime(t)
     reward = action.getReward()
     self.logRewards.append(reward)
     state_next = ActionState(action.getState().getT(),
                              action.getState().getI(),
                              action.getState().getMarket())
     state_next.setT(t_next)
     state_next.setI(i_next)
     #print("Reward " + str(reward) + ": " + str(action.getState()) + " with " + str(action.getA()) + " -> " + str(state_next))
     self.ai.learn(state1=action.getState(),
                   action1=action.getA(),
                   reward=reward,
                   state2=state_next)
     return (t_next, i_next)
Esempio n. 6
0
    def step(self, action):
        # print('action')
        # print(action)
        actionBuy = self.levels[action][0]
        actionSell = self.levels[action][1]

        if self.executionBuy is None or self.executionSell is None:
            self.executionBuy = self._create_execution(a=actionBuy, actionState=self.actionStateBuy, orderbookIndex=self.orderbookIndexBuy, side=OrderSide.BUY)
            self.executionSell = self._create_execution(a=actionSell, actionState=self.actionStateSell, orderbookIndex=self.orderbookIndexSell, side=OrderSide.SELL)
        else:
            if not self.executionBuy.isFilled():
                self.executionBuy = self._update_execution(execution=self.executionBuy, a=actionBuy, actionState=self.actionStateBuy, orderbookIndex=self.orderbookIndexBuy, side=OrderSide.BUY)
            if not self.executionSell.isFilled():
                self.executionSell = self._update_execution(execution=self.executionSell, a=actionSell, actionState=self.actionStateSell, orderbookIndex=self.orderbookIndexSell, side=OrderSide.SELL)

        # logging.info(
        #     'Created/Updated execution.' +
        #     '\nAction: ' + str(action) + ' (' + str(self.execution.getOrder().getType()) + ')' +
        #     '\nt: ' + str(self.actionState.getT()) +
        #     '\nruntime: ' + str(self.execution.getRuntime()) +
        #     '\ni: ' + str(self.actionState.getI())
        # )
        if not self.executionBuy.isFilled():
            self.executionBuy, counterTradesBuy = self.executionBuy.run(self.orderbook)
            i_next_buy = self._determine_next_inventory(self.executionBuy)
            t_next_buy = self._determine_next_time(self.executionBuy.getState().getT())
            bidAskFeatureBuy = self._makeFeature(orderbookIndex=self.executionBuy.getOrderbookIndex())
            self.actionStateBuy = ActionState(t_next_buy, i_next_buy, {'bidask': bidAskFeatureBuy})
            self.orderbookIndexBuy = self.executionBuy.getOrderbookIndex()
            price_buy = self.executionBuy.calculateAvgPrice(counterTradesBuy)
        else:
            price_buy = self.executionBuy.getAvgPrice()

        if not self.executionSell.isFilled():
            self.executionSell, counterTradesSell = self.executionSell.run(self.orderbook)
            i_next_sell = self._determine_next_inventory(self.executionSell)
            t_next_sell = self._determine_next_time(self.executionSell.getState().getT())
            bidAskFeatureSell = self._makeFeature(orderbookIndex=self.executionSell.getOrderbookIndex())
            self.actionStateSell = ActionState(t_next_sell, i_next_sell, {'bidask': bidAskFeatureSell})
            self.orderbookIndexSell = self.executionSell.getOrderbookIndex()
            price_sell = self.executionSell.calculateAvgPrice(counterTradesSell)
        else:
            price_sell = self.executionSell.getAvgPrice()


        done_buy = self.executionBuy.isFilled() or self.actionStateBuy.getI() == 0
        done_sell = self.executionSell.isFilled() or self.actionStateSell.getI() == 0

        print('price buy: ' + str(price_buy))
        print('price sell: ' + str(price_sell))
        if price_buy == 0 or price_sell == 0:
            reward = 0.0
        else:
            reward = price_sell - price_buy
        print('reward: ' + str(reward))

        # logging.info(
        #     'Run execution.' +
        #     '\nTrades: ' + str(len(counterTrades)) +
        #     '\nReward: ' + str(reward) + ' (Ratio: ' + str(volumeRatio) + ')' +
        #     '\nDone: ' + str(done)
        # )

        if self.orderbookIndexBuy >= self.orderbookIndexSell:
            state_next = self.actionStateBuy
        else:
            state_next = self.actionStateSell
        return state_next.toArray(), reward, (done_buy and done_sell), {}
Esempio n. 7
0
class MarketMakerEnv(execution_env.ExecutionEnv):

    def __init__(self):
        self.orderbookIndexBuy = None
        self.orderbookIndexSell = None
        self.actionStateBuy = None
        self.actionStateSell = None
        self.executionBuy = None
        self.executionSell = None
        self._configure()

    def _configure(self,
                   orderbook=None,
                   levels=(-50, 50, 1),
                   T=(0, 100, 10),
                   I=(0, 1, 0.1),
                   lookback=25,
                   bookSize=10
                   ):
        self.orderbook = orderbook
        self.levels = self._generate_Sequence(min=levels[0], max=levels[1], step=levels[2])
        self.levels = list(itertools.product(self.levels, self.levels))
        self.T = self._generate_Sequence(min=T[0], max=T[1], step=T[2])
        self.I = self._generate_Sequence(min=I[0], max=I[1], step=I[2])
        self.lookback = lookback # results in (bid|size, ask|size) -> 4*5
        self.bookSize = bookSize
        self.action_space = spaces.Discrete(len(self.levels))
        self.observation_space = spaces.Box(low=0.0, high=10.0, shape=(2*self.lookback, self.bookSize, 2))

    def setSide(self, side):
        pass

    def _create_execution(self, a, actionState, orderbookIndex, side):
        runtime = self._determine_runtime(actionState.getT())
        orderbookState = self.orderbook.getState(orderbookIndex)

        if runtime <= 0.0 or a is None:
            price = None
            ot = OrderType.MARKET
        else:
            price = orderbookState.getPriceAtLevel(side, a)
            ot = OrderType.LIMIT

        order = Order(
            orderType=ot,
            orderSide=side,
            cty=actionState.getI(),
            price=price
        )
        execution = Action(a=a, runtime=runtime)
        execution.setState(actionState)
        execution.setOrder(order)
        execution.setOrderbookState(orderbookState)
        execution.setOrderbookIndex(orderbookIndex)
        execution.setReferencePrice(orderbookState.getBestAsk())
        return execution

    def _update_execution(self, execution, a, actionState, orderbookIndex, side):
        runtime = self._determine_runtime(actionState.getT())
        orderbookState = self.orderbook.getState(orderbookIndex)

        if runtime <= 0.0 or a is None:
            price = None
            ot = OrderType.MARKET
        else:
            price = execution.getOrderbookState().getPriceAtLevel(side, a)
            ot = OrderType.LIMIT

        order = Order(
            orderType=ot,
            orderSide=side,
            cty=actionState.getI(),
            price=price
        )
        execution.setRuntime(runtime)
        execution.setState(actionState)
        execution.setOrder(order)
        execution.setOrderbookState(orderbookState)
        execution.setOrderbookIndex(orderbookIndex)
        return execution

    def step(self, action):
        # print('action')
        # print(action)
        actionBuy = self.levels[action][0]
        actionSell = self.levels[action][1]

        if self.executionBuy is None or self.executionSell is None:
            self.executionBuy = self._create_execution(a=actionBuy, actionState=self.actionStateBuy, orderbookIndex=self.orderbookIndexBuy, side=OrderSide.BUY)
            self.executionSell = self._create_execution(a=actionSell, actionState=self.actionStateSell, orderbookIndex=self.orderbookIndexSell, side=OrderSide.SELL)
        else:
            if not self.executionBuy.isFilled():
                self.executionBuy = self._update_execution(execution=self.executionBuy, a=actionBuy, actionState=self.actionStateBuy, orderbookIndex=self.orderbookIndexBuy, side=OrderSide.BUY)
            if not self.executionSell.isFilled():
                self.executionSell = self._update_execution(execution=self.executionSell, a=actionSell, actionState=self.actionStateSell, orderbookIndex=self.orderbookIndexSell, side=OrderSide.SELL)

        # logging.info(
        #     'Created/Updated execution.' +
        #     '\nAction: ' + str(action) + ' (' + str(self.execution.getOrder().getType()) + ')' +
        #     '\nt: ' + str(self.actionState.getT()) +
        #     '\nruntime: ' + str(self.execution.getRuntime()) +
        #     '\ni: ' + str(self.actionState.getI())
        # )
        if not self.executionBuy.isFilled():
            self.executionBuy, counterTradesBuy = self.executionBuy.run(self.orderbook)
            i_next_buy = self._determine_next_inventory(self.executionBuy)
            t_next_buy = self._determine_next_time(self.executionBuy.getState().getT())
            bidAskFeatureBuy = self._makeFeature(orderbookIndex=self.executionBuy.getOrderbookIndex())
            self.actionStateBuy = ActionState(t_next_buy, i_next_buy, {'bidask': bidAskFeatureBuy})
            self.orderbookIndexBuy = self.executionBuy.getOrderbookIndex()
            price_buy = self.executionBuy.calculateAvgPrice(counterTradesBuy)
        else:
            price_buy = self.executionBuy.getAvgPrice()

        if not self.executionSell.isFilled():
            self.executionSell, counterTradesSell = self.executionSell.run(self.orderbook)
            i_next_sell = self._determine_next_inventory(self.executionSell)
            t_next_sell = self._determine_next_time(self.executionSell.getState().getT())
            bidAskFeatureSell = self._makeFeature(orderbookIndex=self.executionSell.getOrderbookIndex())
            self.actionStateSell = ActionState(t_next_sell, i_next_sell, {'bidask': bidAskFeatureSell})
            self.orderbookIndexSell = self.executionSell.getOrderbookIndex()
            price_sell = self.executionSell.calculateAvgPrice(counterTradesSell)
        else:
            price_sell = self.executionSell.getAvgPrice()


        done_buy = self.executionBuy.isFilled() or self.actionStateBuy.getI() == 0
        done_sell = self.executionSell.isFilled() or self.actionStateSell.getI() == 0

        print('price buy: ' + str(price_buy))
        print('price sell: ' + str(price_sell))
        if price_buy == 0 or price_sell == 0:
            reward = 0.0
        else:
            reward = price_sell - price_buy
        print('reward: ' + str(reward))

        # logging.info(
        #     'Run execution.' +
        #     '\nTrades: ' + str(len(counterTrades)) +
        #     '\nReward: ' + str(reward) + ' (Ratio: ' + str(volumeRatio) + ')' +
        #     '\nDone: ' + str(done)
        # )

        if self.orderbookIndexBuy >= self.orderbookIndexSell:
            state_next = self.actionStateBuy
        else:
            state_next = self.actionStateSell
        return state_next.toArray(), reward, (done_buy and done_sell), {}

    def reset(self):
        return self._reset(t=self.T[-1], i=self.I[-1])

    def _reset(self, t, i):
        #self.orderbook = copy.deepcopy(self.orderbookOriginal) # TODO: Slow but currently required to reset after every episode due to change of order book states during matching
        orderbookState, orderbookIndex = self._get_random_orderbook_state()
        bidAskFeature = self._makeFeature(orderbookIndex=orderbookIndex)
        state = ActionState(t, i, {'bidask': bidAskFeature}) #np.array([[t, i]])

        self.executionBuy = None
        self.executionSell = None

        self.orderbookIndexBuy = orderbookIndex
        self.orderbookIndexSell = orderbookIndex

        self.actionStateBuy = state
        self.actionStateSell = state

        return state.toArray()

    def render(self, mode='human', close=False):
        pass

    def seed(self, seed):
        pass
Esempio n. 8
0
    def backtest(self, q=None, episodes=10, average=False, fixed_a=None):
        if q is None:
            q = self.ai.q
        else:
            self.ai.q = q

        if not q:
            raise Exception('Q-Table is empty, please train first.')

        Ms = []
        #T = self.T[1:len(self.T)]
        for t in [self.T[-1]]:
            logging.info("\n" + "t==" + str(t))
            for i in [self.I[-1]]:
                logging.info("     i==" + str(i))
                actions = []
                state = ActionState(t, i, {})
                #print(state)
                if fixed_a is not None:
                    a = fixed_a
                else:
                    try:
                        a = self.ai.getQAction(state, 0)
                        # print("Q action for state " + str(state) + ": " + str(a))
                    except:
                        # State might not be in Q-Table yet, more training requried.
                        logging.info("State " + str(state) +
                                     " not in Q-Table.")
                        break
                actions.append(a)
                action = self.createAction(level=a,
                                           state=state,
                                           force_execution=False)
                midPrice = action.getReferencePrice()

                #print("before...")
                #print(action)
                action.run(self.orderbook)
                #print("after...")
                #print(action)
                i_next = self.determineNextInventory(action)
                t_next = self.determineNextTime(t)
                # print("i_next: " + str(i_next))
                while i_next != 0:
                    state_next = ActionState(t_next, i_next, {})
                    if fixed_a is not None:
                        a_next = fixed_a
                    else:
                        try:
                            a_next = self.ai.getQAction(state_next, 0)
                            # print("t: " + str(t_next))
                            # print("i: " + str(i_next))
                            # print("Action: " + str(a_next))
                            # print("Q action for next state " + str(state_next) + ": " + str(a_next))
                        except:
                            # State might not be in Q-Table yet, more training requried.
                            # print("State " + str(state_next) + " not in Q-Table.")
                            break
                    actions.append(a_next)
                    #print("Action transition " + str((t, i)) + " -> " + str(aiState_next) + " with " + str(runtime_next) + "s runtime.")

                    runtime_next = self.determineRuntime(t_next)
                    action.setState(state_next)
                    action.update(a_next, runtime_next)
                    action.run(self.orderbook)
                    #print(action)
                    i_next = self.determineNextInventory(action)
                    t_next = self.determineNextTime(t_next)

                price = action.getAvgPrice()
                # TODO: last column is for for the BUY scenario only
                if action.getOrder().getSide() == OrderSide.BUY:
                    profit = midPrice - price
                else:
                    profit = price - midPrice
                Ms.append([state, midPrice, actions, price, profit])
        if not average:
            return Ms
        return self.averageBacktest(Ms)