Example #1
0
def train(epochs=10000, print_every_n=500, ep1=0.1, ep2=0.1):
    exp_number = int(ep1 * 100 + ep2 * 10)
    player1 = Player(epsilon=ep1, exp_number=exp_number)
    player2 = Player(epsilon=ep2, exp_number=exp_number)
    judge = Judge(player1, player2)
    player1_win = 0.0
    player2_win = 0.0
    player1_win_rate_history = np.ndarray(shape=(epochs, ))
    player2_win_rate_history = np.ndarray(shape=(epochs, ))
    for i in range(1, epochs + 1):
        winner = judge.play(print_state=False)
        if winner == 1:
            player1_win += 1
        if winner == -1:
            player2_win += 1
        if i % print_every_n == 0:
            print('[Exp#%2d] Epoch %d, 1 win rate: %.02f, 2 win rate: %.02f' %
                  (exp_number, i, player1_win / i, player2_win / i))
        player1.backup()
        player2.backup()
        judge.reset()
        player1_win_rate_history[i - 1] = player1_win / i
        player2_win_rate_history[i - 1] = player2_win / i

    player1.save_policy()
    player2.save_policy()
    return player1_win_rate_history, player2_win_rate_history
Example #2
0
def test_minimax_2():
    new_game = Game()
    new_board = Board()
    new_board.board = [["?", "O", "?"], ["X", "O", "?"], ["O", "X", "X"]]
    new_game.board = copy.deepcopy(new_board)
    new_game.player1 = Player(1, "Human", "O")
    new_game.player2 = Player(2, "Computer", "X")
    assert new_game.expert_ai(new_game.board, -math.inf, math.inf,
                              "X") == (0, 0, 2)
Example #3
0
def test_minimax():
    new_game = Game()
    new_board = Board()
    new_board.board = [["O", "O", "X"], ["X", "?", "O"], ["?", "?", "X"]]
    new_game.board = new_board
    new_game.player1 = Player(1, "Human", "O")
    new_game.player2 = Player(2, "Computer", "X")
    assert new_game.expert_ai(new_game.board, -math.inf, math.inf,
                              "X") == (1, 2, 1)
Example #4
0
def test_minimax_3():
    new_game = Game()
    new_board = Board()
    new_board.board = [["?", "?", "?"], ["?", "O", "?"], ["X", "O", "X"]]
    new_game.board = new_board
    new_game.player1 = Player(1, "Human", "O")
    new_game.player2 = Player(2, "Computer", "X")
    new_game.current_turn = new_game.player2
    result = new_game.expert_ai(new_game.board, -math.inf, math.inf, "X")
    assert result[1] == 0
    assert result[2] == 1
Example #5
0
def next_move_and_result(player, board_string):
    new_board = Board()
    flat_board = board_string.split()
    board_array = [flat_board[0:3], flat_board[3:6], flat_board[6:9]]
    new_board.board = board_array
    new_game = Game()
    new_game.board = new_board
    new_game.player1 = Player(1, "Human", "O")
    new_game.player2 = Player(2, "Computer", "X")
    result, best_row, best_column = new_game.expert_ai(new_game.board, player)
    board_array[best_row][best_column] = "*"
    new_board_string = "\n    ".join(" ".join(row) for row in board_array)
    return """
    Position score: {}
    {}
    """.format(result, new_board_string)
Example #6
0
def compete(turns, ep1, ep2):
    exp_number = int(ep1 * 100 + ep2 * 10)
    player1 = Player(epsilon=ep1, exp_number=exp_number)
    player2 = Player(epsilon=ep2, exp_number=exp_number)
    player1.load_policy()
    player2.load_policy()
    judge = Judge(player1, player2)
    player1_win = 0.0
    player2_win = 0.0
    for _ in range(turns):
        winner = judge.play()
        if winner == 1:
            player1_win += 1
        if winner == -1:
            player2_win += 1
        judge.reset()
    print('[Exp#%02d] %d turns, player 1 win %.05f, player 2 win %.05f' %
          (exp_number, turns, player1_win / turns, player2_win / turns))
    return player1_win / turns, player2_win / turns
Example #7
0
        "initial_value": 0.1,
        "verbose": False
    }

    second_player_params = {
        "train": True,
        "epsilon": 0.7,
        "epsilon_decay": 0.9999,
        "min_epsilon": 0.,
        "gamma": 1.,
        "alpha": 0.4,
        "initial_value": 0.1,
        "verbose": False
    }

    first_player = Player(**first_player_params)
    second_player = Player(**second_player_params)

    players = {1: first_player, -1: second_player}
    game = Game()

    train(game,
          players,
          games=20000,
          verbose=False,
          plot=True,
          window_size=1000,
          seed=None)

    first_player.save_state_dict('states/first_player')
    second_player.save_state_dict('states/second_player')
Example #8
0
 def setUp(self):
     self.player = Player("Mike", sign_x)
     self.player2 = Player("Computer", sign_o)
Example #9
0
from tic_tac_toe import Game, Player
from tabulate import tabulate

games_to_play_counter = 1000
games_to_play = games_to_play_counter
all_games = []

ai_1 = "E"
ai_2 = "A"

while games_to_play_counter > 0:
    game = Game()
    game.player1 = Player(1, ai_1, "X")
    game.player2 = Player(2, ai_2, "O")
    game.current_turn = game.player1
    winner = game.ai_vs_ai()
    all_games.append(winner)
    games_to_play_counter -= 1

print(all_games)
wins_player_1 = all_games.count("X")
percent_wins_player_1 = wins_player_1 / games_to_play
wins_player_2 = all_games.count("O")
percent_wins_player_2 = wins_player_2 / games_to_play
ties = all_games.count("tie")
percent_ties = ties / games_to_play
print(
    tabulate(
        [[ai_1, "1", str(wins_player_1),
          str(percent_wins_player_1)],
         [ai_2, "2", str(wins_player_2),