Ejemplo n.º 1
0
def battle(player1: Player = RandomPlayer(),
           player2: Player = RandomPlayer(),
           num_games: int = 100000,
           silent: bool = False):
    board = Board()
    draw_count = 0
    cross_count = 0
    naught_count = 0
    for _ in range(num_games):
        result = play_game(board, player1, player2)
        if result == GameResult.CROSS_WIN:
            cross_count += 1
        elif result == GameResult.NAUGHT_WIN:
            naught_count += 1
        else:
            draw_count += 1

    if not silent:
        print(
            "After {} game we have draws: {}, Player 1 wins: {}, and Player 2 wins: {}."
            .format(num_games, draw_count, cross_count, naught_count))
        print(
            "Which gives percentages of draws: {:.2%}, Player 1 wins: {:.2%}, and Player 2 wins:  {:.2%}"
            .format(draw_count / num_games, cross_count / num_games,
                    naught_count / num_games))

    return cross_count, naught_count, draw_count
Ejemplo n.º 2
0
def evaluate_players(p1: Player,
                     p2: Player,
                     games_per_battle=100,
                     num_battles=100):
    board = Board()

    p1_wins = []
    p2_wins = []
    draws = []
    game_number = []
    game_counter = 0

    TFSessionManager.set_session(tf.Session())
    TFSessionManager.get_session().run(tf.global_variables_initializer())

    for i in range(num_battles):
        p1win, p2win, draw = battle(p1, p2, games_per_battle, False)
        p1_wins.append(p1win)
        p2_wins.append(p2win)
        draws.append(draw)
        game_counter = game_counter + 1
        game_number.append(game_counter)

    TFSessionManager.set_session(None)
    return game_number, p1_wins, p2_wins, draws
Ejemplo n.º 3
0
    def get_v(self, board: Board) -> np.ndarray:
        """
        Returns all values when moving from current state of 'board'
        :param board: The current board state
        :return: List of values of all possible next board states
        """
        # We build the value dictionary in a lazy manner, only adding a state when it is actually used for the first time
        #
        board_hash = board.hash_value(
        )  # needed because value dictionary maps *hashed* state to values
        if board_hash in self.v:
            vals = self.v[board_hash]
        else:
            vals = np.full(9, self.v_init)  # default initial value
            # set values for winning states to WIN_VALUE
            # (player cannot end up in a losing state after a move
            # so losing states need not be considered):
            for pos in range(vals.size):  # vals.size = BOARD_SIZE
                if board.is_legal(pos):
                    b = Board(board.state)
                    b.move(pos, self.side)
                    if b.check_win():
                        vals[pos] = self.v_win
                    elif b.num_empty() == 0:
                        # if it is not a win, and there are no other positions
                        # available, then it is a draw
                        vals[pos] = self.v_draw
            # Update dictionary:
            self.v[board_hash] = vals
#            print("v[{}]={}".format(board_hash, self.v[board_hash]))
        return vals
Ejemplo n.º 4
0
    def _min(self, board: Board) -> (float, int):
        """
        Evaluate the board position `board` from the Minimizing player's point of view.

        :param board: The board position to evaluate
        :return: Tuple of (Best Result, Best Move in this situation). Returns -1 for best move if the game has already
        finished
        """

        #
        # First we check if we have seen this board position before, and if yes just return the cached value
        #
        board_hash = board.hash_value()
        if board_hash in self.cache:
            return self.cache[board_hash]

        #
        # Init the min value as well as action. Min value is set to DRAW as this value will pass through in case
        # of a draw
        #
        min_value = self.DRAW_VALUE
        action = -1

        #
        # If the game has already finished we return. Otherwise we look at possible continuations
        #
        winner = board.who_won()
        if winner == self.side:
            min_value = self.WIN_VALUE
            action = -1
        elif winner == board.other_side(self.side):
            min_value = self.LOSS_VALUE
            action = -1
        else:
            for index in [
                    i for i, e in enumerate(board.state)
                    if board.state[i] == EMPTY
            ]:
                b = Board(board.state)
                b.move(index, board.other_side(self.side))

                res, _ = self._max(b)
                if res < min_value or action == -1:
                    min_value = res
                    action = index

                    # Shortcut: Can't get better than that, so abort here and return this move
                    if min_value == self.LOSS_VALUE:
                        self.cache[board_hash] = (min_value, action)
                        return min_value, action

                self.cache[board_hash] = (min_value, action)
        return min_value, action
Ejemplo n.º 5
0
    def _min(self, board: Board) -> int:
        """
        Evaluate the board position `board` from the Minimizing player's point of view.
        :param board: The board position to evaluate
        :return: returns the best Move in this situation. Returns -1 for best move if the game has already
        finished
        """

        #
        # First we check if we have seen this board position before, and if yes just return a random choice
        # from the cached values
        #
        board_hash = board.hash_value()
        if board_hash in self.cache:
            return random.choice(self.cache[board_hash])

        #
        # If the game has already finished we return. Otherwise we look at possible continuations
        #
        winner = board.who_won()
        if winner == self.side:
            best_moves = {(self.WIN_VALUE, -1)}
        elif winner == board.other_side(self.side):
            best_moves = {(self.LOSS_VALUE, -1)}
        else:
            #
            # Init the min value as well as action. Min value is set to DRAW as this value will pass through in case
            # of a draw
            #
            min_value = self.DRAW_VALUE
            action = -1
            best_moves = {(min_value, action)}
            for index in [
                    i for i, e in enumerate(board.state)
                    if board.state[i] == EMPTY
            ]:
                b = Board(board.state)
                b.move(index, board.other_side(self.side))

                res, _ = self._max(b)
                if res < min_value or action == -1:
                    min_value = res
                    action = index
                    best_moves = {(min_value, action)}
                elif res == min_value:
                    action = index
                    best_moves.add((min_value, action))

        best_moves = tuple(best_moves)
        self.cache[board_hash] = best_moves

        return random.choice(best_moves)
Ejemplo n.º 6
0
def play_random_game():
    board = Board()
    finished = False
    last_play = NAUGHT
    next_play = CROSS
    while not finished:
        _, result, finished = board.move(board.random_empty_spot(), next_play)
        print_board(board)
        last_play, next_play = next_play, last_play
    if result == GameResult.DRAW:
        print("Game is a draw")
    elif last_play == CROSS:
        print("Cross won!")
    else:
        print("Naught won!")
Ejemplo n.º 7
0
def battle(player1: Player,
           player2: Player,
           num_games: int = 100000,
           silent: bool = False):
    board = Board()
    draw_count = 0
    cross_count = 0
    naught_count = 0
    if not silent:
        print("Battling", end="", flush=True)

    for _ in range(1, num_games + 1):
        result = play_game(board, player1, player2)
        if result == GameResult.CROSS_WIN:
            cross_count += 1
        elif result == GameResult.NAUGHT_WIN:
            naught_count += 1
        else:
            draw_count += 1
        if not silent and _ % 1000 == 0:
            print(".", end="", flush=True)

    if not silent:
        print()
        print("After {} game we have draws: {}, {} wins: {}, and {} wins: {}.".
              format(num_games, draw_count, player1.name, cross_count,
                     player2.name, naught_count))

        print(
            "Which gives percentages of draws: {:.2%}, {} wins: {:.2%}, and {} wins:  {:.2%}"
            .format(draw_count / num_games, player1.name,
                    cross_count / num_games, player2.name,
                    naught_count / num_games))
        print()

    return cross_count, naught_count, draw_count
Ejemplo n.º 8
0
    def final_result(self, result: GameResult):
        """
        This method is called once the game is over. If `self.training` is True, we execute a training run for
        the Neural Network.
        :param result: The result of the game that just finished.
        """

        self.game_counter += 1

        # Compute the final reward based on the game outcome
        if (result == GameResult.NAUGHT_WIN
                and self.side == NAUGHT) or (result == GameResult.CROSS_WIN
                                             and self.side == CROSS):
            reward = self.win_value  # type: float
        elif (result == GameResult.NAUGHT_WIN
              and self.side == CROSS) or (result == GameResult.CROSS_WIN
                                          and self.side == NAUGHT):
            reward = self.loss_value  # type: float
        elif result == GameResult.DRAW:
            reward = self.draw_value  # type: float
        else:
            raise ValueError("Unexpected game result {}".format(result))

        self.add_game_to_replay_buffer(reward)

        # If we are in training mode we run the optimizer.
        if self.training and (self.game_counter > self.pre_training_games):

            batch_third = self.batch_size // 3
            train_batch = self.replay_buffer_win.sample(batch_third)
            train_batch.extend(self.replay_buffer_loss.sample(batch_third))
            train_batch.extend(self.replay_buffer_draw.sample(batch_third))
            train_batch = np.array(train_batch)

            #
            # Let's compute the target q values for all non terminal move
            # We extract the resulting state, run it through the target net work and
            # get the maximum q value (of all valid moves)
            next_states = [s[2] for s in train_batch if s[2] is not None]
            target_qs = []

            if len(next_states) > 0:
                probs, qvals = self.get_valid_probs(
                    [self.board_state_to_nn_input(s) for s in next_states],
                    self.target_net, [Board(s) for s in next_states])

                i = 0
                for t in train_batch:
                    if t[2] is not None:
                        max_move = np.argmax(probs[i])
                        max_qval = qvals[i][max_move]
                        target_qs.append(max_qval * self.reward_discount)
                        i += 1
                    else:
                        target_qs.append(t[3])

                if i != len(next_states):
                    print("Something wrong here!!!")
            else:
                target_qs.extend(train_batch[:, 3])

            # We convert the input states we have recorded to feature vectors to feed into the training.
            nn_input = [
                self.board_state_to_nn_input(x[0]) for x in train_batch
            ]
            actions = train_batch[:, 1]

            # We run the training step with the recorded inputs and new Q value targets.
            summary, _ = TFSN.get_session().run(
                [self.q_net.merge, self.q_net.train_step],
                feed_dict={
                    self.q_net.input_positions: nn_input,
                    self.q_net.target_q: target_qs,
                    self.q_net.actions: actions
                })
            self.random_move_prob *= self.random_move_decrease

            if self.writer is not None:
                self.writer.add_summary(summary, self.game_counter)
                summary = tf.Summary(value=[
                    tf.Summary.Value(tag='Random_Move_Probability',
                                     simple_value=self.random_move_prob)
                ])
                self.writer.add_summary(summary, self.game_counter)

            TFSN.get_session().run(self.graph_copy_op)
Ejemplo n.º 9
0
from tic_tac_toe.Board import Board, GameResult
from tic_tac_toe.RandomPlayer import RandomPlayer
from tic_tac_toe.MinMaxAgent import MinMaxAgent
from tic_tac_toe.RndMinMaxAgent import RndMinMaxAgent
from tic_tac_toe.HumanPlayer import HumanPlayer
from tic_tac_toe.TQPlayer import TQPlayer
from tic_tac_toe.VFPlayer import VFPlayer
from util import *

# battle(RandomPlayer("RandomPlayer1"), RandomPlayer("RandomPlayer2"), num_games=10000)
# battle(MinMaxAgent(), RandomPlayer(), num_games=10000)
# battle(RandomPlayer(), MinMaxAgent(), num_games=10000)
# battle(MinMaxAgent(), RndMinMaxAgent(), num_games=10000)
#play_game(Board(), RndMinMaxAgent(), HumanPlayer(), silent=False)

#play_game(Board(), VFPlayer(), MinMaxAgent(), silent=False)

player1 = VFPlayer("VFPlayer1",
                   learning_rate=0.1,
                   exploration_rate=0.01,
                   v_init=0.6)
#player1 = TQPlayer()
eval_players(player1, RndMinMaxAgent(), 50)
player1.set_exloration_rate(0.0)
eval_players(player1, RndMinMaxAgent(), 50)
while True:
    play_game(Board(), player1, HumanPlayer(), silent=False)
Ejemplo n.º 10
0
from tic_tac_toe.Board import Board, GameResult, CROSS, NAUGHT, EMPTY
from util import print_board, play_game, battle
from tic_tac_toe.RandomPlayer import RandomPlayer
from tic_tac_toe.MinMaxAgent import MinMaxAgent
from tic_tac_toe.RndMinMaxAgent import RndMinMaxAgent
from tic_tac_toe.TabularQPlayer import TQPlayer
from tic_tac_toe.SimpleNNQPlayer import NNQPlayer
from tic_tac_toe.TFSessionManager import TFSessionManager
import matplotlib.pyplot as plt
import tensorflow as tf
import random

board = Board()
#tf.reset_default_graph()

player1 = RandomPlayer()

player2 = RandomPlayer()

p1_wins = []
p1count = 0
p2_wins = []
p2count = 0
draws = []
drawcount = 0
count = []
num_battles = 100
games_per_battle = 10

TFSessionManager.set_session(tf.Session())
TFSessionManager.get_session().run(tf.global_variables_initializer())