def test_evaluate_d1(self): ''' X|X|X O|O|- -|-|- ''' ai_player = Player("AI_1", Token.A, True) game = TicTacToe(p1=ai_player) minmax = Minmax(ai_player, 7) depth = 10 self.assertEqual(minmax._evaluate(game, depth, Token.A), Minmax.DRAW_POINT) self.assertEqual(minmax._evaluate(game, depth, Token.B), Minmax.DRAW_POINT) game.play(Point(0, 0)) game.play(Point(0, 1)) game.play(Point(1, 0)) game.play(Point(1, 1)) game.play(Point(2, 0)) self.assertEqual(minmax._evaluate(game, depth, Token.A), Minmax.WIN_POINT + depth) self.assertEqual(minmax._evaluate(game, depth, Token.B), Minmax.LOOSE_POINT - depth)
def test_minmax_d1(self): ''' XX- OO- XO- Simulate max loop and calling min evaluation ''' ai_player = Player("AI_1", Token.A, True) game = TicTacToe(p1=ai_player) depth = 1 minmax = Minmax(ai_player, depth) game.play(Point(0, 0)) game.play(Point(0, 1)) game.play(Point(1, 0)) game.play(Point(1, 1)) game.play(Point(0, 2)) game.play(Point(1, 2)) game.play(Point(2, 0)) val, _ = minmax._min(game, depth) self.assertEqual(val, Minmax.WIN_POINT + depth) game.undo() game.play(Point(2, 1)) val, _ = minmax._min(game, depth) self.assertEqual(val, Minmax.DRAW_POINT) game.undo() game.play(Point(2, 2)) val, _ = minmax._min(game, depth) self.assertEqual(val, Minmax.LOOSE_POINT) game.undo()
def test_minmax_d2(self): ''' O|-|X X|X|O O|-|- ''' ai_player = Player("AI_1", Token.A, True) game = TicTacToe(p1=ai_player) depth = 2 minmax = Minmax(ai_player, depth) game.play(Point(2, 0)) game.play(Point(0, 0)) game.play(Point(0, 1)) game.play(Point(0, 2)) game.play(Point(1, 1)) game.play(Point(2, 1)) game.play(Point(1, 0)) val, _ = minmax._min(game, depth) self.assertEqual(val, Minmax.DRAW_POINT) game.undo() game.play(Point(1, 2)) val, _ = minmax._min(game, depth) self.assertEqual(val, Minmax.DRAW_POINT) game.undo()
def main(): pr = cProfile.Profile() pr.enable() depth = 9 p1 = Player("AI_1", Token.A, True) game = TicTacToe(p1=p1) minmax = Minmax_AlphaBeta(p1, depth) # moves = [Point(4, 4), Point(3, 3), Point(4, 3), # Point(3, 4), Point(3, 2), Point(4, 5)] # for m in moves: # game.play(m) minmax.compute(game) pr.disable() # Construct stats ps = pstats.Stats(pr) ps.strip_dirs() # ps.sort_stats(SortKey.CUMULATIVE) ps.sort_stats('tottime') ps.print_stats() ps.print_callers()
def test_win_vertical_player1(self): game = TicTacToe() self.assertTrue(game.play(Point(0, 0))) self.assertTrue(game.play(Point(1, 0))) self.assertTrue(game.play(Point(0, 1))) self.assertTrue(game.play(Point(1, 1))) self.assertTrue(game.play(Point(0, 2))) self.assertTrue(game.is_over) self.assertEqual(game.winner, game._p1)
def test_win_diag_up_player2(self): game = TicTacToe() self.assertTrue(game.play(Point(0, 0))) self.assertTrue(game.play(Point(0, 2))) self.assertTrue(game.play(Point(1, 0))) self.assertTrue(game.play(Point(1, 1))) self.assertTrue(game.play(Point(0, 1))) self.assertTrue(game.play(Point(2, 0))) self.assertTrue(game.is_over) self.assertEqual(game.winner, game._p2)
def test_next_player(self): game = TicTacToe() self.assertEqual(game._current_player, game._p1) self.assertTrue(game.play(Point(0, 0))) self.assertEqual(game._current_player, game._p2) self.assertTrue(game.play(Point(1, 0))) self.assertEqual(game._current_player, game._p1) # Cell not free, do not change current player self.assertFalse(game.play(Point(1, 0))) self.assertEqual(game._current_player, game._p1)
def make_game(game_name, p1, p2): game = None if game_name == 'TicTacToe': game = TicTacToe(p1=p1, p2=p2) elif game_name == 'Connect Four': game = ConnectFour(p1=p1, p2=p2) elif game_name == 'Gomoku': game = Gomoku(p1=p1, p2=p2) else: assert (False) return game
def test_draw(self): game = TicTacToe() self.assertTrue(game.play(Point(0, 0))) self.assertTrue(game.play(Point(1, 0))) self.assertTrue(game.play(Point(2, 0))) self.assertTrue(game.play(Point(0, 1))) self.assertTrue(game.play(Point(1, 1))) self.assertTrue(game.play(Point(2, 2))) self.assertTrue(game.play(Point(1, 2))) self.assertTrue(game.play(Point(0, 2))) self.assertTrue(game.play(Point(2, 1))) self.assertTrue(game.is_over) self.assertEqual(game.winner, None)
def test_generate_moves(self): game = TicTacToe() moves = game.generate_moves() self.assertEqual(moves, [ Point(0, 0), Point(1, 0), Point(2, 0), Point(0, 1), Point(1, 1), Point(2, 1), Point(0, 2), Point(1, 2), Point(2, 2) ])
def test_is_leaf(self): ai_player = Player("AI_1", Token.A, True) game = TicTacToe(p1=ai_player) minmax = Minmax(ai_player, 7) self.assertFalse(minmax._is_leaf(game, 1)) self.assertTrue(minmax._is_leaf(game, 0)) game.play(Point(0, 0)) game.play(Point(0, 1)) game.play(Point(1, 0)) game.play(Point(1, 1)) game.play(Point(2, 0)) self.assertTrue(minmax._is_leaf(game, 1)) self.assertTrue(minmax._is_leaf(game, 0))
def test_win_diag_up_p1(self): ''' 0 1 2 0|X|O|X| 1|O|X|O| 2|X|-|-| ''' game = TicTacToe() self.assertTrue(game.play(Point(0, 0))) self.assertTrue(game.play(Point(1, 0))) self.assertTrue(game.play(Point(2, 0))) self.assertTrue(game.play(Point(0, 1))) self.assertTrue(game.play(Point(1, 1))) self.assertTrue(game.play(Point(2, 1))) self.assertTrue(game.play(Point(0, 2))) self.assertTrue(game.is_over) self.assertEqual(game.winner, game._p1)
def test_tictactoe(self): from game.tictactoe import TicTacToe n = 10 depth = 9 duration = 0 p1 = Player("AI_1", Token.A, True) for i in range(n): minmax = Minmax_AB_Parallel(p1, depth) game = TicTacToe(p1=p1) start = time.time() minmax.compute(game) duration += time.time() - start print("Duration {}".format(duration / n)) r = expected['tictactoe'] delta = r * 3 / 100 self.assertAlmostEqual(duration / n, r, delta=delta)
for player in players: move = player(game, state) state = game.result(state, move) if game.terminal_test(state): print game.utility(state, game.to_move(game.initial)) return game.utility(state, game.to_move(game.initial)) times_random = [] res = [] for i in xrange(1, 2): t = 0 temp = [] for j in xrange(1, i+1): game = TicTacToe(4, 4, 4) t1 = time() winner = play_game(game, monte_carlo_player, alphabeta_player) t2 = time() t += t2-t1 temp.append(winner) times_random.append(t) res.append(temp) print "times_random =", times_random print "Winner = " print "\n".join(map(lambda val: " ".join(map(str, val)), res)) accuracy = [] for row in res: accuracy.append([row.count(1), row.count(-1), row.count(0), len(row), 100.0*row.count(1)/float(len(row)), 100.0*row.count(-1)/float(len(row)), 100.0*row.count(0)/float(len(row))])
def cmd_evaluate(game, playerType, args): player = playerType(game) if args.load: player.load_weights('./checkpoints/tictactoe') game = game.clone() ap = game.activePlayer() while game.activePlayer() is not None: inp = game.inputs() mp, eval = player.predict([inp]) p, m = max((p, i) for (i, p) in enumerate(mp[0])) game.move(m) print("%d %0.2f %0.2f" % (m, p, eval[0][ap])) print(game.winner()[ap]) if __name__ == "__main__": parser = ArgumentParser() parser.add_argument('--action', default='evaluate') parser.add_argument('--load', action="store_true") parser.add_argument('--k', type=int, default=1) parser.add_argument('--position', default="") args = parser.parse_args() moves = [int(m) for m in args.position.split()] if args.action == 'train': cmd_train(TicTacToe(moves), Player, args) elif args.action == 'evaluate': cmd_evaluate(TicTacToe(moves), Player, args)
states.append(game.get_board_verbose(state)) if game.terminal_test(state): return { "path": states, "winner": game.utility(state, game.to_move(game.initial)) } # for i in xrange(1, 501): # game = TicTacToe(5, 5, 5) # game_simulation = play_game(game, random_player, random_player) # file.write(str(game_simulation)) # file.write("\n") for i in xrange(1, 501): game = TicTacToe(5, 5, 5) game_simulation = play_game(game, random_player, monte_carlo_player) file.write(str(game_simulation)) file.write("\n") # for i in xrange(1, 501): # game = TicTacToe(5, 5, 5) # game_simulation = play_game(game, random_player, minimax_player) # file.write(str(game_simulation)) # file.write("\n") # for i in xrange(1, 501): # game = TicTacToe(5, 5, 5) # game_simulation = play_game(game, monte_carlo_player, monte_carlo_player) # file.write(str(game_simulation)) # file.write("\n")
def tearDown(self): """Reinitialize RL Agent.""" game = TicTacToe() self.agent = Agent(game)
def setUp(self): """Initialize RL Agent.""" game = TicTacToe() self.agent = Agent(game)
def play_tictactoe(mode): """Start TicTacToe game with RL Agent.""" print('==TIC TAC TOE==') game = TicTacToe() if mode == 'train': agent = Agent(game) history = agent.train(10000) print('After 10000 Episodes') # Plot Reward Stats rfig, raxs = plt.subplots(nrows=3, ncols=1) rax_reward1 = raxs[0] rax_reward1.grid() rax_reward2 = raxs[1] rax_reward2.grid() rax_reward3 = raxs[2] rax_reward3.grid() rax_reward1.plot(history[0][:100], history[1][:100]) rax_reward1.set(ylabel='Cumulative Reward', title='Tic Tac Toe Cumulative Reward Episodes') rax_reward2.plot(history[0][:1000], history[1][:1000], color='g') rax_reward2.set(ylabel='Cumulative Reward') rax_reward3.plot(history[0][:10000], history[1][:10000], color='r') rax_reward3.set(xlabel='Episode', ylabel='Cumulative Reward') rfig.savefig('tictactoe_reward.png') # Plot Qtable Memory Usage Stats memfig, memaxs = plt.subplots(nrows=3, ncols=1) memax_reward1 = memaxs[0] memax_reward1.grid() memax_reward2 = memaxs[1] memax_reward2.grid() memax_reward3 = memaxs[2] memax_reward3.grid() memax_reward1.plot(history[0][:100], history[2][:100]) memax_reward1.set(ylabel='Size (KB)', title='Tic Tac Toe QTable Size Episodes') memax_reward2.plot(history[0][:1000], history[2][:1000], color='g') memax_reward2.set(ylabel='Size (KB)') memax_reward3.plot(history[0][:10000], history[2][:10000], color='r') memax_reward3.set(xlabel='Episode', ylabel='Size (KB)') memfig.savefig('tictactoe_memory.png') plt.show() agent.save_values(path='data/tictactoe_qtable.json') agent.stats() agent.demo() elif mode == 'demo': qtable = json.load(open('data/tictactoe_qtable.json')) agent = Agent(game, qtable=qtable) agent.demo() else: print('Mode {} is invalid.'.format(mode))
def test_compute_ending(self): game = TicTacToe() self.assertEqual(game._board.cell_used_count, 0) game.play(Point(0, 0)) self.assertEqual(game._board.cell_used_count, 1)
def test_tictactoe(self): ai_player = Player("AI_1", Token.A, True) game = TicTacToe(p1=ai_player) depth = 2 minmax = Minmax_AB_Parallel(ai_player, depth) minmax.compute(game)
def test_init(self): game = TicTacToe() self.assertFalse(game.is_over) self.assertEqual(game.winner, None) self.assertEqual(len(game.history), 0) self.assertEqual(game._current_player, game._p1)