def battle(player1: Player = RandomPlayer(), player2: Player = RandomPlayer(), num_games: int = 100000, silent: bool = False): board = Board() draw_count = 0 cross_count = 0 naught_count = 0 for _ in range(num_games): result = play_game(board, player1, player2) if result == GameResult.CROSS_WIN: cross_count += 1 elif result == GameResult.NAUGHT_WIN: naught_count += 1 else: draw_count += 1 if not silent: print( "After {} game we have draws: {}, Player 1 wins: {}, and Player 2 wins: {}." .format(num_games, draw_count, cross_count, naught_count)) print( "Which gives percentages of draws: {:.2%}, Player 1 wins: {:.2%}, and Player 2 wins: {:.2%}" .format(draw_count / num_games, cross_count / num_games, naught_count / num_games)) return cross_count, naught_count, draw_count
from tic_tac_toe.RndMinMaxAgent import RndMinMaxAgent from tic_tac_toe.DirectPolicyAgent import DirectPolicyAgent TENSORLOG_DIR = './graphs' if tf.gfile.Exists(TENSORLOG_DIR): tf.gfile.DeleteRecursively(TENSORLOG_DIR) tf.reset_default_graph() nnplayer = DirectPolicyAgent("PolicyLearner1") # nn2player = EGreedyNNQPlayer("QLearner2", win_value=100.0, loss_value=-100.0) # nnplayer = EGreedyNNQPlayer("QLearner1")#, learning_rate=0.001, win_value=10.0, loss_value=-10.0) # nn2player = EGreedyNNQPlayer("QLearner2")#, learning_rate=0.001, win_value=10.0, loss_value=-10.0) mm_player = MinMaxAgent() rndplayer = RandomPlayer() rm_player = RndMinMaxAgent() TFSessionManager.set_session(tf.Session()) sess = TFSessionManager.get_session() writer = tf.summary.FileWriter(TENSORLOG_DIR, sess.graph) nnplayer.writer = writer sess.run(tf.global_variables_initializer()) # game_number, p1_wins, p2_wins, draws = evaluate_players(rndplayer, nnplayer, num_battles=10000) #, num_battles = 20) # game_number, p1_wins, p2_wins, draws = evaluate_players(rndplayer, nnplayer) #, num_battles = 20) # game_number, p1_wins, p2_wins, draws = evaluate_players( mm_player, nnplayer, num_battles=300) # , num_battles = 20) game_number, p1_wins, p2_wins, draws = evaluate_players( nnplayer, rm_player, num_battles=1000,
silent=True) p1_wins.append(p1win * 100.0 / games_per_battle) p2_wins.append(p2win * 100.0 / games_per_battle) draws.append(draw * 100.0 / games_per_battle) counter += 1 game_number.append(counter) plt.ylabel('Game outcomes in %') plt.xlabel('Game number') plt.plot(game_number, draws, 'r-', label='Draw') plt.plot(game_number, p1_wins, 'g-', label='Player 1 wins') plt.plot(game_number, p2_wins, 'b-', label='Player 2 wins') plt.legend(loc=loc, shadow=True, fancybox=True, framealpha=0.7) plt.show() # TFSessionManager.set_session(None) return game_number, p1_wins, p2_wins, draws # tf.reset_default_graph() nnplayer = NNQPlayer("QLearner1") randPlayer = RandomPlayer() nnplayer2 = NNQPlayer("QLearner2") greedyplayer = EGreedyNNQPlayer("Greedy") game_number, p1_wins, p2_wins, draws = eval_players(greedyplayer, randPlayer) p = plt.plot(game_number, draws, 'r-', game_number, p1_wins, 'g-', game_number, p2_wins, 'b-')
from tic_tac_toe.Board import Board, GameResult, CROSS, NAUGHT, EMPTY from util import print_board, play_game, battle from tic_tac_toe.RandomPlayer import RandomPlayer from tic_tac_toe.MinMaxAgent import MinMaxAgent from tic_tac_toe.RndMinMaxAgent import RndMinMaxAgent from tic_tac_toe.TabularQPlayer import TQPlayer from tic_tac_toe.SimpleNNQPlayer import NNQPlayer from tic_tac_toe.TFSessionManager import TFSessionManager import matplotlib.pyplot as plt import tensorflow as tf import random board = Board() #tf.reset_default_graph() player1 = RandomPlayer() player2 = RandomPlayer() p1_wins = [] p1count = 0 p2_wins = [] p2count = 0 draws = [] drawcount = 0 count = [] num_battles = 100 games_per_battle = 10 TFSessionManager.set_session(tf.Session()) TFSessionManager.get_session().run(tf.global_variables_initializer())