def run_experiment(player_policies, experiment_name, n): games = [] winners = [] X_wins = np.zeros(n) O_wins = np.zeros(n) X_win_rate = np.zeros(n) O_win_rate = np.zeros(n) for i in range(n): G, winner = play_game(player_policies) games.append(G) winners.append(winner) # Copy cumulative totals from previous timestep if i > 0: X_wins[i] = X_wins[i - 1] O_wins[i] = O_wins[i - 1] if winner == 'X': X_wins[i] += 1 elif winner == 'O': O_wins[i] += 1 X_win_rate[i] = X_wins[i] / (i + 1) O_win_rate[i] = O_wins[i] / (i + 1) from collections import Counter c = Counter(winners) print(c.items()) print(X_win_rate) print(X_wins) from matplotlib import pyplot as plt # Plot cumulative win rate over time plt.plot(X_win_rate) plt.plot(O_win_rate) plt.legend(['X', 'O']) plt.xlabel('simulation') plt.ylabel('cumulative win rate') plt.ylim([-0.1, 1.1]) plt.title('Experiment ID: {}'.format(name.replace('_', ' ').upper())) # Save figure to disk with a unique identifier plt.savefig('simulation_time_' + str(simulation_time) + '_' + '{}.png'.format(experiment_name)) plt.cla() # Save a visualization of the first n levels of # the MCTS trees used by each player that used MCTS for policy in player_policies: if type(policy) is MCTSPolicy: visualize_mcts_tree(mcts=policy, depth=0, filename='{}_{}_{}'.format( name, policy.player, n))
from gameplay import play_game from policies import RandomPolicy, MCTSPolicy import numpy as np import networkx as nx player_policies = [MCTSPolicy(), RandomPolicy()] # For reproducibility np.random.seed(0) games = [] for i in range(100): games.append(play_game(player_policies)) graphs = [game[0] for game in games] dot_graph_combined = nx.compose_all(graphs) dot_graph = nx.to_pydot(dot_graph_combined) dot_graph.set_graph_defaults(fontname='Courier') dot_graph.write_png('multiple_game_graph.png')
def run_experiment(player_policies, experiment_name): # For reproducibility np.random.seed(0) # Number of games to play per experiment n = 100 games = [] winners = [] X_wins = np.zeros(n) O_wins = np.zeros(n) X_win_rate = np.zeros(n) O_win_rate = np.zeros(n) for i in range(n): G, winner = play_game(player_policies) games.append(G) winners.append(winner) # Copy cumulative totals from previous timestep if i > 0: X_wins[i] = X_wins[i - 1] O_wins[i] = O_wins[i - 1] if winner == "X": X_wins[i] += 1 elif winner == "O": O_wins[i] += 1 X_win_rate[i] = X_wins[i] / (i + 1) O_win_rate[i] = O_wins[i] / (i + 1) from collections import Counter c = Counter(winners) print(c.items()) print(X_win_rate) print(X_wins) from matplotlib import pyplot as plt # Plot cumulative win count over time # plt.plot(X_wins) # plt.plot(O_wins) # plt.legend(['X wins', 'O wins']) # Plot cumulative win rate over time plt.plot(X_win_rate) plt.plot(O_win_rate) plt.legend(["X", "O"]) plt.xlabel("simulation") plt.ylabel("cumulative win rate") plt.ylim([-0.1, 1.1]) plt.title("Experiment ID: {}".format(name.replace("_", " ").upper())) # Save figure to disk with a unique identifier import uuid plt.savefig("{}.png".format(experiment_name)) # plt.show() plt.cla()