def play_chomp(mode): """Start Chomp game and training.""" print('=====CHOMP=====') # Square board has optimal strategy to allow for easy sanity check that agent is learning. game = Chomp(rows=4, cols=4) if mode == 'train': # Train agent to go first agent = Agent(game, epsilon=9e-3, learning_rate=25e-2) n = 10000 history = agent.train(n) print('After {} Episodes'.format(n)) # Plot Reward Stats rfig, raxs = plt.subplots(nrows=3, ncols=1) rax_reward1 = raxs[0] rax_reward1.grid() rax_reward2 = raxs[1] rax_reward2.grid() rax_reward3 = raxs[2] rax_reward3.grid() rax_reward1.plot(history[0][:100], history[1][:100]) rax_reward1.set(ylabel='Cumulative Reward', title='Chomp 4x4 Cumulative Reward') rax_reward2.plot(history[0][:1000], history[1][:1000], color='g') rax_reward2.set(ylabel='Cumulative Reward') rax_reward3.plot(history[0][:n], history[1][:n], color='r') rax_reward3.set(xlabel='Episode', ylabel='Cumulative Reward') rfig.savefig('chomp_reward.png') # Plot Qtable Memory Usage Stats memfig, memaxs = plt.subplots(nrows=3, ncols=1) memax_reward1 = memaxs[0] memax_reward1.grid() memax_reward2 = memaxs[1] memax_reward2.grid() memax_reward3 = memaxs[2] memax_reward3.grid() memax_reward1.plot(history[0][:100], history[2][:100]) memax_reward1.set(ylabel='Size (KB)', title='Chomp 4x4 QTable Size') memax_reward2.plot(history[0][:1000], history[2][:1000], color='g') memax_reward2.set(ylabel='Size (KB)') memax_reward3.plot(history[0][:n], history[2][:n], color='r') memax_reward3.set(xlabel='Episode', ylabel='Size (KB)') plt.show() agent.save_values(path='data/chomp_qtable.json') agent.demo() elif mode == 'hyper': # Hyper parameter optimization max_e = 0.0 max_lr = 0.0 max_reward = 0.0 epsilons = [1e-1, 2e-1, 9e-2, 1e-2, 9e-3] learning_rates = [1e-1, 2e-1, 3e-1, 25e-2, 9e-2] for epsilon in epsilons: for learning_rate in learning_rates: agent = Agent(game, qtable={}, player='X', epsilon=epsilon, learning_rate=learning_rate) n = 10000 history = agent.train(n, history=[]) total = history[1][len(history[1]) - 1] print(total) if total > max_reward: max_reward = total max_e = epsilon max_lr = learning_rate print('Max e: {}'.format(max_e)) print('Max lr: {}'.format(max_lr)) print('Max reward: {}'.format(max_reward)) elif mode == 'demo': qtable = json.load(open('data/chomp_qtable.json')) agent = Agent(game, qtable=qtable) agent.demo() else: print('Mode {} is invalid.'.format(mode))
def play_tictactoe(mode): """Start TicTacToe game with RL Agent.""" print('==TIC TAC TOE==') game = TicTacToe() if mode == 'train': agent = Agent(game) history = agent.train(10000) print('After 10000 Episodes') # Plot Reward Stats rfig, raxs = plt.subplots(nrows=3, ncols=1) rax_reward1 = raxs[0] rax_reward1.grid() rax_reward2 = raxs[1] rax_reward2.grid() rax_reward3 = raxs[2] rax_reward3.grid() rax_reward1.plot(history[0][:100], history[1][:100]) rax_reward1.set(ylabel='Cumulative Reward', title='Tic Tac Toe Cumulative Reward Episodes') rax_reward2.plot(history[0][:1000], history[1][:1000], color='g') rax_reward2.set(ylabel='Cumulative Reward') rax_reward3.plot(history[0][:10000], history[1][:10000], color='r') rax_reward3.set(xlabel='Episode', ylabel='Cumulative Reward') rfig.savefig('tictactoe_reward.png') # Plot Qtable Memory Usage Stats memfig, memaxs = plt.subplots(nrows=3, ncols=1) memax_reward1 = memaxs[0] memax_reward1.grid() memax_reward2 = memaxs[1] memax_reward2.grid() memax_reward3 = memaxs[2] memax_reward3.grid() memax_reward1.plot(history[0][:100], history[2][:100]) memax_reward1.set(ylabel='Size (KB)', title='Tic Tac Toe QTable Size Episodes') memax_reward2.plot(history[0][:1000], history[2][:1000], color='g') memax_reward2.set(ylabel='Size (KB)') memax_reward3.plot(history[0][:10000], history[2][:10000], color='r') memax_reward3.set(xlabel='Episode', ylabel='Size (KB)') memfig.savefig('tictactoe_memory.png') plt.show() agent.save_values(path='data/tictactoe_qtable.json') agent.stats() agent.demo() elif mode == 'demo': qtable = json.load(open('data/tictactoe_qtable.json')) agent = Agent(game, qtable=qtable) agent.demo() else: print('Mode {} is invalid.'.format(mode))
def play_connectfour(mode): """Start Connect Four game and training.""" print('==CONNECT FOUR==') game = ConnectFour() if mode == 'train': agent = Agent(game) history = agent.train(10000) print('After 10000 Episodes') # Plot Reward Stats rfig, raxs = plt.subplots(nrows=3, ncols=1) rax_reward1 = raxs[0] rax_reward1.grid() rax_reward2 = raxs[1] rax_reward2.grid() rax_reward3 = raxs[2] rax_reward3.grid() rax_reward1.plot(history[0][:100], history[1][:100]) rax_reward1.set(ylabel='Cumulative Reward', title='Connect Four Cumulative Reward (3 Column State)') rax_reward2.plot(history[0][:1000], history[1][:1000], color='g') rax_reward2.set(ylabel='Cumulative Reward') rax_reward3.plot(history[0][:10000], history[1][:10000], color='r') rax_reward3.set(xlabel='Episode', ylabel='Cumulative Reward') rfig.savefig('connectfour_reward.png') # Plot Qtable Memory Usage Stats memfig, memaxs = plt.subplots(nrows=3, ncols=1) memax_reward1 = memaxs[0] memax_reward1.grid() memax_reward2 = memaxs[1] memax_reward2.grid() memax_reward3 = memaxs[2] memax_reward3.grid() memax_reward1.plot(history[0][:100], history[2][:100]) memax_reward1.set(ylabel='Size (KB)', title='Connect Four QTable Size (3 Column State)') memax_reward2.plot(history[0][:1000], history[2][:1000], color='g') memax_reward2.set(ylabel='Size (KB)') memax_reward3.plot(history[0][:10000], history[2][:10000], color='r') memax_reward3.set(xlabel='Episode', ylabel='Size (KB)') memfig.savefig('connectfour_memory.png') plt.show() agent.save_values(path='data/connectfour_qtable.json') agent.demo() elif mode == 'demo': qtable = json.load(open('data/connectfour_qtable.json')) agent = Agent(game, qtable=qtable) agent.demo() else: print('Mode {} is invalid.'.format(mode))