Beispiel #1
0
    def update_history(self, ttt: TicTacToe):
        reward = 0
        if ttt.game_over() and ttt.is_draw():
            reward = 0
        elif ttt.game_over() and ttt.winner == self.symbol:
            reward = 1
        else:
            reward = -1

        self.history.append((ttt.get_state(), reward))
Beispiel #2
0
    def make_move(self, ttt: TicTacToe):
        # explore - make a random move
        if np.random.rand() < self.epsilon:
            while True:
                coord = np.random.randint(3, size=2)
                x = coord[0]
                y = coord[1]

                if ttt.make_move(x, y, self.symbol) or ttt.game_over():
                    return

        # let the network predict the next move
        state = ttt.get_state()
        one_hot = self.one_hot_encoded(state, ttt)
        values = self.model.predict(np.asarray([one_hot]))[0]

        high = -1000  # value of field
        field = -1  # index of field

        for i in range(len(state)):  # select best move
            if state[i] == 0:
                if values[i] > high:
                    high = values[i].copy()
                    field = i

        x = field % 3
        y = field // 3
        if ttt.make_move(x, y, self.symbol):
            return
        else:
            raise Exception("dafuq?")