def train(epochs=10000, print_every_n=500, ep1=0.1, ep2=0.1): exp_number = int(ep1 * 100 + ep2 * 10) player1 = Player(epsilon=ep1, exp_number=exp_number) player2 = Player(epsilon=ep2, exp_number=exp_number) judge = Judge(player1, player2) player1_win = 0.0 player2_win = 0.0 player1_win_rate_history = np.ndarray(shape=(epochs, )) player2_win_rate_history = np.ndarray(shape=(epochs, )) for i in range(1, epochs + 1): winner = judge.play(print_state=False) if winner == 1: player1_win += 1 if winner == -1: player2_win += 1 if i % print_every_n == 0: print('[Exp#%2d] Epoch %d, 1 win rate: %.02f, 2 win rate: %.02f' % (exp_number, i, player1_win / i, player2_win / i)) player1.backup() player2.backup() judge.reset() player1_win_rate_history[i - 1] = player1_win / i player2_win_rate_history[i - 1] = player2_win / i player1.save_policy() player2.save_policy() return player1_win_rate_history, player2_win_rate_history
def test_minimax_2(): new_game = Game() new_board = Board() new_board.board = [["?", "O", "?"], ["X", "O", "?"], ["O", "X", "X"]] new_game.board = copy.deepcopy(new_board) new_game.player1 = Player(1, "Human", "O") new_game.player2 = Player(2, "Computer", "X") assert new_game.expert_ai(new_game.board, -math.inf, math.inf, "X") == (0, 0, 2)
def test_minimax(): new_game = Game() new_board = Board() new_board.board = [["O", "O", "X"], ["X", "?", "O"], ["?", "?", "X"]] new_game.board = new_board new_game.player1 = Player(1, "Human", "O") new_game.player2 = Player(2, "Computer", "X") assert new_game.expert_ai(new_game.board, -math.inf, math.inf, "X") == (1, 2, 1)
def test_minimax_3(): new_game = Game() new_board = Board() new_board.board = [["?", "?", "?"], ["?", "O", "?"], ["X", "O", "X"]] new_game.board = new_board new_game.player1 = Player(1, "Human", "O") new_game.player2 = Player(2, "Computer", "X") new_game.current_turn = new_game.player2 result = new_game.expert_ai(new_game.board, -math.inf, math.inf, "X") assert result[1] == 0 assert result[2] == 1
def next_move_and_result(player, board_string): new_board = Board() flat_board = board_string.split() board_array = [flat_board[0:3], flat_board[3:6], flat_board[6:9]] new_board.board = board_array new_game = Game() new_game.board = new_board new_game.player1 = Player(1, "Human", "O") new_game.player2 = Player(2, "Computer", "X") result, best_row, best_column = new_game.expert_ai(new_game.board, player) board_array[best_row][best_column] = "*" new_board_string = "\n ".join(" ".join(row) for row in board_array) return """ Position score: {} {} """.format(result, new_board_string)
def compete(turns, ep1, ep2): exp_number = int(ep1 * 100 + ep2 * 10) player1 = Player(epsilon=ep1, exp_number=exp_number) player2 = Player(epsilon=ep2, exp_number=exp_number) player1.load_policy() player2.load_policy() judge = Judge(player1, player2) player1_win = 0.0 player2_win = 0.0 for _ in range(turns): winner = judge.play() if winner == 1: player1_win += 1 if winner == -1: player2_win += 1 judge.reset() print('[Exp#%02d] %d turns, player 1 win %.05f, player 2 win %.05f' % (exp_number, turns, player1_win / turns, player2_win / turns)) return player1_win / turns, player2_win / turns
"initial_value": 0.1, "verbose": False } second_player_params = { "train": True, "epsilon": 0.7, "epsilon_decay": 0.9999, "min_epsilon": 0., "gamma": 1., "alpha": 0.4, "initial_value": 0.1, "verbose": False } first_player = Player(**first_player_params) second_player = Player(**second_player_params) players = {1: first_player, -1: second_player} game = Game() train(game, players, games=20000, verbose=False, plot=True, window_size=1000, seed=None) first_player.save_state_dict('states/first_player') second_player.save_state_dict('states/second_player')
def setUp(self): self.player = Player("Mike", sign_x) self.player2 = Player("Computer", sign_o)
from tic_tac_toe import Game, Player from tabulate import tabulate games_to_play_counter = 1000 games_to_play = games_to_play_counter all_games = [] ai_1 = "E" ai_2 = "A" while games_to_play_counter > 0: game = Game() game.player1 = Player(1, ai_1, "X") game.player2 = Player(2, ai_2, "O") game.current_turn = game.player1 winner = game.ai_vs_ai() all_games.append(winner) games_to_play_counter -= 1 print(all_games) wins_player_1 = all_games.count("X") percent_wins_player_1 = wins_player_1 / games_to_play wins_player_2 = all_games.count("O") percent_wins_player_2 = wins_player_2 / games_to_play ties = all_games.count("tie") percent_ties = ties / games_to_play print( tabulate( [[ai_1, "1", str(wins_player_1), str(percent_wins_player_1)], [ai_2, "2", str(wins_player_2),