Ejemplo n.º 1
0
    def move(self, board: Board) -> (GameResult, bool):
        """
        Makes a move on the given input state
        :param board: The current state of the game
        :return: The GameResult after this move, Flag to indicate whether the move finished the game
        """
        self.board_position_log.append(board.state.copy())
        nn_input = self.board_state_to_nn_input(board.state)

        probs = self.get_valid_probs([nn_input], [board])
        probs = probs[0]

        # Most of the time our next move is the one with the highest probability after removing all illegal ones.
        # Occasionally, however we randomly chose a random move to encourage exploration
        if (self.training is True) and \
                (self.game_counter < self.pre_training_games):
            move = board.random_empty_spot()
        else:
            if np.isnan(probs).any():  # Can happen when all probabilities degenerate to 0. Best thing we can do is
                # make a random legal move
                move = board.random_empty_spot()
            else:
                move = np.random.choice(np.arange(len(probs)), p=probs)
            if not board.is_legal(move):  # Debug case only, I hope
                print("Illegal move!")

        # We record the action we selected as well as the Q values of the current state for later use when
        # adjusting NN weights.
        self.action_log.append(move)

        _, res, finished = board.move(move, self.side)

        return res, finished
Ejemplo n.º 2
0
def play_game(board: Board, player1: Player, player2: Player):
    player1.new_game(CROSS)
    player2.new_game(NAUGHT)
    board.reset()

    finished = False
    while not finished:
        result, finished = player1.move(board)
        if finished:
            if result == GameResult.DRAW:
                final_result = GameResult.DRAW
            else:
                final_result = GameResult.CROSS_WIN
        else:
            result, finished = player2.move(board)
            if finished:
                if result == GameResult.DRAW:
                    final_result = GameResult.DRAW
                else:
                    final_result = GameResult.NAUGHT_WIN

    # noinspection PyUnboundLocalVariable
    player1.final_result(final_result)
    # noinspection PyUnboundLocalVariable
    player2.final_result(final_result)
    return final_result
Ejemplo n.º 3
0
    def move(self, board: Board) -> (GameResult, bool):
        """
        Implements the Player interface and makes a move on Board `board`
        :param board: The Board to make a move on
        :return: A tuple of the GameResult and a flag indicating if the game is over after this move.
        """

        # We record all game positions to feed them into the NN for training with the corresponding updated Q
        # values.
        self.board_position_log.append(board.state.copy())

        nn_input = self.board_state_to_nn_input(board.state)

        probs, _ = self.get_valid_probs([nn_input], self.q_net, [board])
        probs = probs[0]

        # Most of the time our next move is the one with the highest probability after removing all illegal ones.
        # Occasionally, however we randomly chose a random move to encourage exploration
        if (self.training is True) and \
                ((self.game_counter < self.pre_training_games) or (np.random.rand(1) < self.random_move_prob)):
            move = board.random_empty_spot()
        else:
            move = np.argmax(probs)

        # We record the action we selected as well as the Q values of the current state for later use when
        # adjusting NN weights.
        self.action_log.append(move)

        # We execute the move and return the result
        _, res, finished = board.move(move, self.side)
        return res, finished
Ejemplo n.º 4
0
 def get_move(self, board: Board) -> int:
     """
     Return the next move given the board `board` based on the current values of next states
     :param board: The current board state
     :return: The next move based on the current values of next states, starting from input state
     """
     if self.move_strategy == MoveStrategy.EXPLORATION:
         # exploratory random move
         m = board.random_empty_spot()
         _ = self.get_v(
             board)  # just to ensure we have values for our board state
         return m
     else:
         # greedy move: exploiting current knowledge
         vals = self.get_v(board)  # type: np.ndarray
         while True:
             maxv_idxs = np.argwhere(
                 vals == np.amax(vals))  # positions of max values in array
             m = np.random.choice(maxv_idxs.flatten().tolist())  # type: int
             #m = np.argmax(vals)  # type: int # this instead would return 1st occurance
             if board.is_legal(m):
                 #                    print("vals=", end='')
                 #                    print(vals)
                 #                    print("m={}".format(m))
                 return m
             else:
                 vals[m] = -1.0
Ejemplo n.º 5
0
 def move(self, board: Board) -> (GameResult, bool):
     """
     Making a random move
     :param board: The board to make a move on
     :return: The result of the move
     """
     _, res, finished = board.move(board.random_empty_spot(), self.side)
     return res, finished
Ejemplo n.º 6
0
 def move(self, board: Board):
     """
     Makes a move and returns the game result after this move and whether the move ended the game
     :param board: The board to make a move on
     :return: The GameResult after this move, Flag to indicate whether the move finished the game
     """
     m = self.get_move(board)
     self.move_history.append((board.hash_value(), m))
     _, res, finished = board.move(m, self.side)
     return res, finished
Ejemplo n.º 7
0
 def get_move(self, board: Board) -> int:
     """
     Return the next move given the board `board` based on the current Q values
     :param board: The current board state
     :return: The next move based on the current Q values for the input state
     """
     board_hash = board.hash_value()  # type: int
     qvals = self.get_q(board_hash)  # type: np.ndarray
     while True:
         m = np.argmax(qvals)  # type: int
         if board.is_legal(m):
             return m
         else:
             qvals[m] = -1.0
Ejemplo n.º 8
0
    def get_v(self, board: Board) -> np.ndarray:
        """
        Returns all values when moving from current state of 'board'
        :param board: The current board state
        :return: List of values of all possible next board states
        """
        # We build the value dictionary in a lazy manner, only adding a state when it is actually used for the first time
        #
        board_hash = board.hash_value(
        )  # needed because value dictionary maps *hashed* state to values
        if board_hash in self.v:
            vals = self.v[board_hash]
        else:
            vals = np.full(9, self.v_init)  # default initial value
            # set values for winning states to WIN_VALUE
            # (player cannot end up in a losing state after a move
            # so losing states need not be considered):
            for pos in range(vals.size):  # vals.size = BOARD_SIZE
                if board.is_legal(pos):
                    b = Board(board.state)
                    b.move(pos, self.side)
                    if b.check_win():
                        vals[pos] = self.v_win
                    elif b.num_empty() == 0:
                        # if it is not a win, and there are no other positions
                        # available, then it is a draw
                        vals[pos] = self.v_draw
            # Update dictionary:
            self.v[board_hash] = vals
#            print("v[{}]={}".format(board_hash, self.v[board_hash]))
        return vals
Ejemplo n.º 9
0
def play_random_game():
    board = Board()
    finished = False
    last_play = NAUGHT
    next_play = CROSS
    while not finished:
        _, result, finished = board.move(board.random_empty_spot(), next_play)
        print_board(board)
        last_play, next_play = next_play, last_play
    if result == GameResult.DRAW:
        print("Game is a draw")
    elif last_play == CROSS:
        print("Cross won!")
    else:
        print("Naught won!")
Ejemplo n.º 10
0
def battle(player1: Player = RandomPlayer(),
           player2: Player = RandomPlayer(),
           num_games: int = 100000,
           silent: bool = False):
    board = Board()
    draw_count = 0
    cross_count = 0
    naught_count = 0
    for _ in range(num_games):
        result = play_game(board, player1, player2)
        if result == GameResult.CROSS_WIN:
            cross_count += 1
        elif result == GameResult.NAUGHT_WIN:
            naught_count += 1
        else:
            draw_count += 1

    if not silent:
        print(
            "After {} game we have draws: {}, Player 1 wins: {}, and Player 2 wins: {}."
            .format(num_games, draw_count, cross_count, naught_count))
        print(
            "Which gives percentages of draws: {:.2%}, Player 1 wins: {:.2%}, and Player 2 wins:  {:.2%}"
            .format(draw_count / num_games, cross_count / num_games,
                    naught_count / num_games))

    return cross_count, naught_count, draw_count
Ejemplo n.º 11
0
def evaluate_players(p1: Player,
                     p2: Player,
                     games_per_battle=100,
                     num_battles=100):
    board = Board()

    p1_wins = []
    p2_wins = []
    draws = []
    game_number = []
    game_counter = 0

    TFSessionManager.set_session(tf.Session())
    TFSessionManager.get_session().run(tf.global_variables_initializer())

    for i in range(num_battles):
        p1win, p2win, draw = battle(p1, p2, games_per_battle, False)
        p1_wins.append(p1win)
        p2_wins.append(p2win)
        draws.append(draw)
        game_counter = game_counter + 1
        game_number.append(game_counter)

    TFSessionManager.set_session(None)
    return game_number, p1_wins, p2_wins, draws
Ejemplo n.º 12
0
    def move(self, board: Board) -> (GameResult, bool):
        """
        Implements the Player interface and makes a move on Board `board`
        :param board: The Board to make a move on
        :return: A tuple of the GameResult and a flag indicating if the game is over after this move.
        """

        # We record all game positions to feed them into the NN for training with the corresponding updated Q
        # values.
        self.board_position_log.append(board.state.copy())

        nn_input = self.board_state_to_nn_input(board.state)
        probs, qvalues = self.get_probs(nn_input)
        qvalues = np.copy(qvalues)

        # We filter out all illegal moves by setting the probability to 0. We don't change the q values
        # as we don't want the NN to waste any effort of learning different Q values for moves that are illegal
        # anyway.
        for index, p in enumerate(qvalues):
            if not board.is_legal(index):
                probs[index] = -1
            elif probs[index] < 0:
                probs[index] = 0.0

        # Most of the time our next move is the one with the highest probability after removing all illegal ones.
        # Occasionally, however we randomly chose a random move to encourage exploration
        if (self.training is True) and (np.random.rand(1) <
                                        self.random_move_prob):
            move = board.random_empty_spot()
        else:
            move = np.argmax(probs)

        # Unless this is the very first move, the max Q value of this state is also the max Q value of
        # the move that got the game from the previous state to this one.
        if len(self.action_log) > 0:
            self.next_max_log.append(qvalues[np.argmax(probs)])

        # We record the action we selected as well as the Q values of the current state for later use when
        # adjusting NN weights.
        self.action_log.append(move)
        self.values_log.append(qvalues)

        # We execute the move and return the result
        _, res, finished = board.move(move, self.side)
        return res, finished
Ejemplo n.º 13
0
 def move(self, board: Board) -> (GameResult, bool):
     """
     Making a move according to the MinMax algorithm
     :param board: The board to make a move on
     :return: The result of the move
     """
     score, action = self._max(board)
     _, res, finished = board.move(action, self.side)
     return res, finished
Ejemplo n.º 14
0
    def move(self, board: Board):
        """
        Makes a move and returns the game result after this move and whether the move ended the game
        :param board: The board to make a move on
        :return: The GameResult after this move, Flag to indicate whether the move finished the game
        """
        # Select strategy to choose next move: exploit known or explore unknown?
        if np.random.uniform(0, 1) <= self.epsilon:
            self.move_strategy = MoveStrategy.EXPLORATION
        else:
            self.move_strategy = MoveStrategy.EXPLOITATION

        m = self.get_move(board)
        self.move_history.append((board.hash_value(), m))
        self.backup_value()
        #        print("v={}".format(self.v))
        _, res, finished = board.move(m, self.side)
        return res, finished
Ejemplo n.º 15
0
 def move(self, board: Board) -> (GameResult, bool):
     """
     Making a move according to the MinMax algorithm. If more than one best move exist, chooses amongst them
     randomly.
     :param board: The board to make a move on
     :return: The result of the move
     """
     score, action = self._max(board)
     _, res, finished = board.move(action, self.side)
     return res, finished
Ejemplo n.º 16
0
    def _min(self, board: Board) -> (float, int):
        """
        Evaluate the board position `board` from the Minimizing player's point of view.

        :param board: The board position to evaluate
        :return: Tuple of (Best Result, Best Move in this situation). Returns -1 for best move if the game has already
        finished
        """

        #
        # First we check if we have seen this board position before, and if yes just return the cached value
        #
        board_hash = board.hash_value()
        if board_hash in self.cache:
            return self.cache[board_hash]

        #
        # Init the min value as well as action. Min value is set to DRAW as this value will pass through in case
        # of a draw
        #
        min_value = self.DRAW_VALUE
        action = -1

        #
        # If the game has already finished we return. Otherwise we look at possible continuations
        #
        winner = board.who_won()
        if winner == self.side:
            min_value = self.WIN_VALUE
            action = -1
        elif winner == board.other_side(self.side):
            min_value = self.LOSS_VALUE
            action = -1
        else:
            for index in [
                    i for i, e in enumerate(board.state)
                    if board.state[i] == EMPTY
            ]:
                b = Board(board.state)
                b.move(index, board.other_side(self.side))

                res, _ = self._max(b)
                if res < min_value or action == -1:
                    min_value = res
                    action = index

                    # Shortcut: Can't get better than that, so abort here and return this move
                    if min_value == self.LOSS_VALUE:
                        self.cache[board_hash] = (min_value, action)
                        return min_value, action

                self.cache[board_hash] = (min_value, action)
        return min_value, action
Ejemplo n.º 17
0
def play_game(board: Board,
              player1: Player,
              player2: Player,
              silent: bool = True):
    player1.new_game(CROSS)
    player2.new_game(NAUGHT)
    board.reset()

    if not silent:
        print()
        board.print_board()
        time.sleep(1)

    finished = False
    while not finished:
        # player1 move
        result, finished = player1.move(board)
        if not silent:
            print()
            print("{} move:".format(player1.name))
            board.print_board()
            time.sleep(1)
        if finished:
            if result == GameResult.DRAW:
                final_result = GameResult.DRAW
            else:
                final_result = GameResult.CROSS_WIN
        else:
            # player 2 move
            result, finished = player2.move(board)
            if not silent:
                print()
                print("{} move:".format(player2.name))
                board.print_board()
                time.sleep(1)
            if finished:
                if result == GameResult.DRAW:
                    final_result = GameResult.DRAW
                else:
                    final_result = GameResult.NAUGHT_WIN

    player1.final_result(final_result)
    player2.final_result(final_result)

    if not silent:
        print()
        if final_result == GameResult.CROSS_WIN:
            print("{} wins!".format(player1.name))
        elif final_result == GameResult.NAUGHT_WIN:
            print("{} wins!".format(player2.name))
        else:
            print("Draw!")
    return final_result
Ejemplo n.º 18
0
    def _min(self, board: Board) -> int:
        """
        Evaluate the board position `board` from the Minimizing player's point of view.
        :param board: The board position to evaluate
        :return: returns the best Move in this situation. Returns -1 for best move if the game has already
        finished
        """

        #
        # First we check if we have seen this board position before, and if yes just return a random choice
        # from the cached values
        #
        board_hash = board.hash_value()
        if board_hash in self.cache:
            return random.choice(self.cache[board_hash])

        #
        # If the game has already finished we return. Otherwise we look at possible continuations
        #
        winner = board.who_won()
        if winner == self.side:
            best_moves = {(self.WIN_VALUE, -1)}
        elif winner == board.other_side(self.side):
            best_moves = {(self.LOSS_VALUE, -1)}
        else:
            #
            # Init the min value as well as action. Min value is set to DRAW as this value will pass through in case
            # of a draw
            #
            min_value = self.DRAW_VALUE
            action = -1
            best_moves = {(min_value, action)}
            for index in [
                    i for i, e in enumerate(board.state)
                    if board.state[i] == EMPTY
            ]:
                b = Board(board.state)
                b.move(index, board.other_side(self.side))

                res, _ = self._max(b)
                if res < min_value or action == -1:
                    min_value = res
                    action = index
                    best_moves = {(min_value, action)}
                elif res == min_value:
                    action = index
                    best_moves.add((min_value, action))

        best_moves = tuple(best_moves)
        self.cache[board_hash] = best_moves

        return random.choice(best_moves)
Ejemplo n.º 19
0
 def board_state_to_nn_input(self, state: np.ndarray) -> np.ndarray:
     """
     Converts a Tic Tac Tow board state to an input feature vector for the Neural Network. The input feature vector
     is a bit array of size 27. The first 9 bits are set to 1 on positions containing the player's pieces, the second
     9 bits are set to 1 on positions with our opponents pieces, and the final 9 bits are set on empty positions on
     the board.
     :param state: The board state that is to be converted to a feature vector.
     :return: The feature vector representing the input Tic Tac Toe board state.
     """
     res = np.array([(state == self.side).astype(int),
                     (state == Board.other_side(self.side)).astype(int),
                     (state == EMPTY).astype(int)])
     return res.reshape(-1)
Ejemplo n.º 20
0
    def move(self, board: Board) -> (GameResult, bool):
        """
        Make move corresponding to key pressed by user
        :param board: The board to make a move on
        :return: The result of the move
        """
        print()
        while True:
            key = input("Your move? ")
            if key in self.keys:
                break
        position = self.keys.index(key)

        _, res, finished = board.move(position, self.side)
        return res, finished
Ejemplo n.º 21
0
def battle(player1: Player,
           player2: Player,
           num_games: int = 100000,
           silent: bool = False):
    board = Board()
    draw_count = 0
    cross_count = 0
    naught_count = 0
    if not silent:
        print("Battling", end="", flush=True)

    for _ in range(1, num_games + 1):
        result = play_game(board, player1, player2)
        if result == GameResult.CROSS_WIN:
            cross_count += 1
        elif result == GameResult.NAUGHT_WIN:
            naught_count += 1
        else:
            draw_count += 1
        if not silent and _ % 1000 == 0:
            print(".", end="", flush=True)

    if not silent:
        print()
        print("After {} game we have draws: {}, {} wins: {}, and {} wins: {}.".
              format(num_games, draw_count, player1.name, cross_count,
                     player2.name, naught_count))

        print(
            "Which gives percentages of draws: {:.2%}, {} wins: {:.2%}, and {} wins:  {:.2%}"
            .format(draw_count / num_games, player1.name,
                    cross_count / num_games, player2.name,
                    naught_count / num_games))
        print()

    return cross_count, naught_count, draw_count
Ejemplo n.º 22
0
from tic_tac_toe.Board import Board, GameResult, CROSS, NAUGHT, EMPTY
from util import print_board, play_game, battle
from tic_tac_toe.RandomPlayer import RandomPlayer
from tic_tac_toe.MinMaxAgent import MinMaxAgent
from tic_tac_toe.RndMinMaxAgent import RndMinMaxAgent
from tic_tac_toe.TabularQPlayer import TQPlayer
from tic_tac_toe.SimpleNNQPlayer import NNQPlayer
from tic_tac_toe.TFSessionManager import TFSessionManager
import matplotlib.pyplot as plt
import tensorflow as tf
import random

board = Board()
#tf.reset_default_graph()

player1 = RandomPlayer()

player2 = RandomPlayer()

p1_wins = []
p1count = 0
p2_wins = []
p2count = 0
draws = []
drawcount = 0
count = []
num_battles = 100
games_per_battle = 10

TFSessionManager.set_session(tf.Session())
TFSessionManager.get_session().run(tf.global_variables_initializer())
Ejemplo n.º 23
0
    def final_result(self, result: GameResult):
        """
        This method is called once the game is over. If `self.training` is True, we execute a training run for
        the Neural Network.
        :param result: The result of the game that just finished.
        """

        self.game_counter += 1

        # Compute the final reward based on the game outcome
        if (result == GameResult.NAUGHT_WIN
                and self.side == NAUGHT) or (result == GameResult.CROSS_WIN
                                             and self.side == CROSS):
            reward = self.win_value  # type: float
        elif (result == GameResult.NAUGHT_WIN
              and self.side == CROSS) or (result == GameResult.CROSS_WIN
                                          and self.side == NAUGHT):
            reward = self.loss_value  # type: float
        elif result == GameResult.DRAW:
            reward = self.draw_value  # type: float
        else:
            raise ValueError("Unexpected game result {}".format(result))

        self.add_game_to_replay_buffer(reward)

        # If we are in training mode we run the optimizer.
        if self.training and (self.game_counter > self.pre_training_games):

            batch_third = self.batch_size // 3
            train_batch = self.replay_buffer_win.sample(batch_third)
            train_batch.extend(self.replay_buffer_loss.sample(batch_third))
            train_batch.extend(self.replay_buffer_draw.sample(batch_third))
            train_batch = np.array(train_batch)

            #
            # Let's compute the target q values for all non terminal move
            # We extract the resulting state, run it through the target net work and
            # get the maximum q value (of all valid moves)
            next_states = [s[2] for s in train_batch if s[2] is not None]
            target_qs = []

            if len(next_states) > 0:
                probs, qvals = self.get_valid_probs(
                    [self.board_state_to_nn_input(s) for s in next_states],
                    self.target_net, [Board(s) for s in next_states])

                i = 0
                for t in train_batch:
                    if t[2] is not None:
                        max_move = np.argmax(probs[i])
                        max_qval = qvals[i][max_move]
                        target_qs.append(max_qval * self.reward_discount)
                        i += 1
                    else:
                        target_qs.append(t[3])

                if i != len(next_states):
                    print("Something wrong here!!!")
            else:
                target_qs.extend(train_batch[:, 3])

            # We convert the input states we have recorded to feature vectors to feed into the training.
            nn_input = [
                self.board_state_to_nn_input(x[0]) for x in train_batch
            ]
            actions = train_batch[:, 1]

            # We run the training step with the recorded inputs and new Q value targets.
            summary, _ = TFSN.get_session().run(
                [self.q_net.merge, self.q_net.train_step],
                feed_dict={
                    self.q_net.input_positions: nn_input,
                    self.q_net.target_q: target_qs,
                    self.q_net.actions: actions
                })
            self.random_move_prob *= self.random_move_decrease

            if self.writer is not None:
                self.writer.add_summary(summary, self.game_counter)
                summary = tf.Summary(value=[
                    tf.Summary.Value(tag='Random_Move_Probability',
                                     simple_value=self.random_move_prob)
                ])
                self.writer.add_summary(summary, self.game_counter)

            TFSN.get_session().run(self.graph_copy_op)
Ejemplo n.º 24
0
from tic_tac_toe.Board import Board, GameResult
from tic_tac_toe.RandomPlayer import RandomPlayer
from tic_tac_toe.MinMaxAgent import MinMaxAgent
from tic_tac_toe.RndMinMaxAgent import RndMinMaxAgent
from tic_tac_toe.HumanPlayer import HumanPlayer
from tic_tac_toe.TQPlayer import TQPlayer
from tic_tac_toe.VFPlayer import VFPlayer
from util import *

# battle(RandomPlayer("RandomPlayer1"), RandomPlayer("RandomPlayer2"), num_games=10000)
# battle(MinMaxAgent(), RandomPlayer(), num_games=10000)
# battle(RandomPlayer(), MinMaxAgent(), num_games=10000)
# battle(MinMaxAgent(), RndMinMaxAgent(), num_games=10000)
#play_game(Board(), RndMinMaxAgent(), HumanPlayer(), silent=False)

#play_game(Board(), VFPlayer(), MinMaxAgent(), silent=False)

player1 = VFPlayer("VFPlayer1",
                   learning_rate=0.1,
                   exploration_rate=0.01,
                   v_init=0.6)
#player1 = TQPlayer()
eval_players(player1, RndMinMaxAgent(), 50)
player1.set_exloration_rate(0.0)
eval_players(player1, RndMinMaxAgent(), 50)
while True:
    play_game(Board(), player1, HumanPlayer(), silent=False)