def test_choose_optimal_move(): '''(5 points) choose_optimal_move()''' #--------------------- # Game: TicTacToe g = TicTacToe() # game p=MCTSPlayer() #------------------------- s=np.array([[ 1,-1, 1], [ 0, 0,-1], [ 0, 1,-1]]) n = MCNode(s, x=1) n.build_tree(g,100) r,c=p.choose_optimal_move(n) assert r == 2 assert c == 0 #------------------------- s=np.array([[ 1,-1, 1], [ 0, 1,-1], [ 0, 1,-1]]) n = MCNode(s, x=-1) n.build_tree(g,100) r,c=p.choose_optimal_move(n) assert r == 2 assert c == 0 #------------------------- s=np.array([[ 1,-1, 1], [ 0, 0, 0], [ 0, 0, 0]]) n = MCNode(s, x=-1) n.build_tree(g,200) r,c=p.choose_optimal_move(n) assert r == 1 assert c == 1 # The AI agent should be compatible with both games: TicTacToe and Othello. # now let's test on the game "Othello": #--------------------- # Game: Othello g = Othello() # game s=np.array([[ 0,-1, 1,-1, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0]]) s_ = s.copy() n = MCNode(s, x=1) # it's X player's turn n.build_tree(g,100) assert np.allclose(s,s_) r,c=p.choose_optimal_move(n) assert r == 0 assert c == 0
def A3C_train(shared_model: nn.Module, optimizer, counter, n): model = Net(n) model.train() env = Othello(n) env.play((4, 6)) state = env.data max_episode_count = 10000 while counter.value < max_episode_count: model.load_state_dict(shared_model.state_dict()) history = [] # (state, action, reward) done = False episode_length = 0 while not done and episode_length < n * n: action = model.sample_action( torch.from_numpy(state.astype(np.float32)).unsqueeze(0)) next_state, reward, done = env.step(action) history.append((state, action, reward)) state = next_state states, actions, rewards = zip(*history) return_ = reward_to_return(rewards, 0.95, 0) policy_logit, value = model(torch.Tensor(states)) td = torch.Tensor(return_) - value.squeeze(1) policy_dist = [ logits_to_dist(pl) for pl in policy_logit.view(policy_logit.size(0), -1) ] advantage = td.detach() value_loss = td.pow(2).mean() policy_loss = (torch.cat([ -pd.log_prob(torch.from_numpy(np.where(a.flatten())[0])) for pd, a in zip(policy_dist, actions) ]) * advantage).sum() loss = value_loss + policy_loss optimizer.zero_grad() loss.backward() # shared_modelの勾配として設定 for param, shared_param in zip(model.parameters(), shared_model.parameters()): if shared_param.grad is not None: break shared_param._grad = param.grad optimizer.step() # 終了条件を満たす場合、初期化 if done: with counter.get_lock(): counter.value += 1 episode_length = 0 env = Othello(n) state = env.data
class ReversiEnv(object): """docstring for ReversiEnv.""" def __init__(self): super(ReversiEnv, self).__init__() self.n = 8 def length(self): return self.n * self.n def reset(self): self.game = Othello() return self.game.get_state(), self.game.get_turn() def action_space(self): return self.game.get_actions() def step(self, action): self.game.move(action) reward = self.game.get_winner() return self.game.get_state(), self.game.get_turn(), reward
def test_choose_a_move(): '''(5 points) random choose_a_move()''' # Game: TicTacToe g = TicTacToe() # game p = RandomPlayer() s = np.array([[0, 1, 1], [1, 0, -1], [1, 1, 0]]) s_ = np.array([[0, 1, 1], [1, 0, -1], [1, 1, 0]]) count = np.zeros(3) for _ in range(100): r, c = p.choose_a_move(g, s, x=1) assert s[r, c] == 0 # player needs to choose a valid move assert np.allclose( s, s_) # the player should never change the game state object assert r == c # in this example the valid moves are on the diagonal of the matrix assert r > -1 and r < 3 count[c] += 1 assert count[ 0] > 20 # the random player should give roughly equal chance to each valid move assert count[1] > 20 assert count[2] > 20 s = np.array([[1, 1, 0], [1, 0, -1], [0, 1, 1]]) for _ in range(100): r, c = p.choose_a_move(g, s, x=1) assert s[r, c] == 0 assert r == 2 - c assert r > -1 and r < 3 # The AI agent should be compatible with both games: TicTacToe and Othello. # now let's test on the game "Othello": g = Othello() # game s = np.array([[0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, -1, -1, -1, 0, 0], [0, 0, 0, 1, 1, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) p = RandomPlayer() s_ = s.copy() count = np.zeros(5) for _ in range(200): r, c = p.choose_a_move(g, s, x=1) assert np.allclose( s, s_) # the player should never change the game state object assert s[r, c] == 0 # player needs to choose a valid move assert r == 2 assert c > 1 and c < 7 count[c - 2] += 1 assert count[ 0] > 20 # the random player should give roughly equal chance to each valid move assert count[1] > 20 assert count[2] > 20 assert count[3] > 20 assert count[4] > 20 # test whether we can run a game using random player s = np.array([[0, 0, -1, 1, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) for i in range(10): e = g.run_a_game(p, p, s=s, x=1) assert e == -1 s = np.array([[0, -1, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) w = 0 for i in range(10): e = g.run_a_game(p, p, s=s, x=1) w += e assert np.abs(w) < 9
players = 0 size = 8 gui = True verbose = True if players == 2: player1 = Player() player2 = Player() elif players == 1: player1 = Player() player2 = AI(greedy_move, 'Greedy AI') else: player1 = AI(greedy_move, 'Greedy AI') player2 = AI(Negamax(4, score=EdgeScore(size)), 'Negamax AI') game = Othello(player1, player2, size, verbose) if gui: size = 500 margin = 50 GUI(game, size, margin).mainloop() elif players == 0: games = 100 results = {player1: 0, player2: 0, GridGame.DRAW: 0} for i in range(games): game.play() results[game.winner] += 1 game.reset() print(results)
def run_a_game(p): ''' Run a game Input: p: the AI player that you are playing with ''' # initialize game state g = Othello() win = init_screen() # initialize the game state s = g.initial_game_state() x = 1 # current turn (x player's turn) # draw empty board draw_empty_board(win,g,s,x) canPlay = True pygame.display.update() # run the game while True: event = pygame.event.wait() # close the window if event.type == pygame.QUIT: pygame.quit() sys.exit() # Press Key if event.type == pygame.KEYDOWN: # press F button (restart game) if event.key == pygame.K_f: s = g.initial_game_state() x=1 # X player's turn draw_empty_board(win,g,s,x) canPlay = True pygame.display.update() # press ESC button (exit game) if event.key == pygame.K_ESCAPE: pygame.quit() sys.exit() # Click Mouse if event.type is pygame.MOUSEBUTTONDOWN and canPlay and x==1: # Human player's turn to choose a move # get mouse position (mouseX, mouseY) = pygame.mouse.get_pos() # convert to board grid (row,column) r, c = map_mouse_to_board(mouseX, mouseY) # if the move is valid if g.check_valid_move(s,r,c,x): # update game state x=g.apply_a_move(s,r,c,x) # draw the board draw_board(win,g,s,x) # check if the game has ended already e = g.check_game(s) if e is not None: draw_result(win,e) canPlay = False e=pygame.event.Event(pygame.USEREVENT) pygame.event.post(e) print("X player chooses:",str(r),str(c)) if event.type == pygame.USEREVENT and x== -1 and canPlay: # computer's turn to choose a move r,c = p.choose_a_move(g,s,x) # if the move is valid assert g.check_valid_move(s,r,c,x) # update game state x=g.apply_a_move(s,r,c,x) # draw the board draw_board(win,g,s,x) # check if the game has ended already e = g.check_game(s) if e is not None: draw_result(win,e) canPlay = False e=pygame.event.Event(pygame.USEREVENT) pygame.event.post(e) print("O player chooses:",str(r),str(c)) # update the UI display pygame.display.update()
def main(): mcts = MCTS() game = Othello() for i in range(32): action = mcts.get_action(game) game.move(action) if game.game_over(): break actions = game.get_actions() probs = np.ones(actions.shape[0]) action = sample(probs, actions) game.move(action) if game.game_over(): break # input('waiting') print(game.get_true_state()) print(game.get_score()) print(game.get_winner()) print('game over')
def test_choose_optimal_move(): '''(5 points) choose_optimal_move()''' #--------------------- # Game: TicTacToe g = TicTacToe() # game p = MiniMaxPlayer() #------------------------- b = np.array([[1, -1, 1], [0, 0, -1], [0, 1, -1]]) s = GameState(b, x=1) # it's X player's turn n = MMNode(s) n.build_tree(g) n.compute_v(g) r, c = p.choose_optimal_move(n) assert r == 2 assert c == 0 #------------------------- b = np.array([[1, -1, 1], [0, 1, -1], [0, 1, -1]]) s = GameState(b, x=-1) # it's O player's turn n = MMNode(s) n.build_tree(g) n.compute_v(g) r, c = p.choose_optimal_move(n) assert r == 2 assert c == 0 #------------------------- b = np.array([[1, -1, 1], [0, 0, 0], [0, 0, 0]]) s = GameState(b, x=-1) # it's O player's turn n = MMNode(s) n.build_tree(g) n.compute_v(g) r, c = p.choose_optimal_move(n) assert r == 1 assert c == 1 #------------------------- b = np.array([[1, -1, 1], [0, 1, -1], [-1, 1, -1]]) s = GameState(b, x=1) # it's X player's turn n = MMNode(s) n.build_tree(g) n.compute_v(g) r, c = p.choose_optimal_move(n) assert r == 1 assert c == 0 # The AI agent should also be compatible with Othello. # now let's test on the game "Othello": #--------------------- # Game: Othello g = Othello() # game b = np.array([[0, -1, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) b_ = b.copy() s = GameState(b, x=1) # it's X player's turn n = MMNode(s) n.build_tree(g) n.compute_v(g) assert np.allclose(n.s.b, b_) r, c = p.choose_optimal_move(n) assert r == 0 assert c == 0
def test_compute_v(): '''(5 points) compute_v()''' #--------------------- # Game: TicTacToe g = TicTacToe() # game #------------------------- # the value of a terminal node is its game result s = np.array([[1, 0, 0], [0, 1, -1], [0, -1, 1]]) n = MMNode(s, x=-1) n.compute_v(g) assert n.v == 1 # X player won the game # the value of a terminal node is its game result s = np.array([[1, 1, -1], [-1, 1, 1], [1, -1, -1]]) n = MMNode(s, x=-1) n.compute_v(g) assert n.v == 0 # A tie # the value of a terminal node is its game result s = np.array([[1, 0, 1], [0, 0, 1], [-1, -1, -1]]) n = MMNode(s, x=1) n.compute_v(g) assert n.v == -1 # O player won the game #------------------------- # if it is X player's turn, the value of the current node is the max value of all its children nodes. s = np.array([[0, -1, 1], [0, 1, -1], [0, -1, 1]]) n = MMNode(s, x=1) n.build_tree(g) # the current node has 3 children nodes, two of which are terminal nodes (X player wins) n.compute_v(g) # so the max value among the three children nodes max(1,?,1) = 1 (here ? is either 1 or 0 or -1) assert n.v == 1 # X player won the game #------------------------- # if it is O player's turn, the value of the current node is the min value of all its children nodes. s = np.array([[0, 1, -1], [0, -1, 1], [1, 1, -1]]) n = MMNode(s, x=-1) n.build_tree(g) # the current node has 2 children nodes, one of them is a terminal node (O player wins) n.compute_v(g) # so the min value among the two children nodes min(-1,0) =-1 assert n.v == -1 # O player won the game #------------------------- # a tie after one move s = np.array([[-1, 1, -1], [-1, 1, 1], [0, -1, 1]]) n = MMNode(s, x=1) n.build_tree(g) n.compute_v(g) assert n.v == 0 #------------------------- # optimal moves lead to: O player wins s = np.array([[-1, 1, -1], [1, 0, 0], [1, 0, 0]]) n = MMNode(s, x=-1) n.build_tree(g) n.compute_v(g) assert n.v == -1 #------------------------- # optimal moves lead to a tie s = np.array([[0, 1, 0], [0, 1, 0], [0, 0, -1]]) n = MMNode(s, x=-1) n.build_tree(g) n.compute_v(g) assert n.v == 0 #------------------------- # optimal moves lead to: X player wins s = np.array([[1, -1, 1], [0, 0, 0], [0, -1, 0]]) n = MMNode(s, x=1) n.build_tree(g) n.compute_v(g) assert n.v == 1 s = np.array([[1, -1, 1], [0, 0, 0], [0, 0, -1]]) n = MMNode(s, x=1) n.build_tree(g) n.compute_v(g) assert n.v == 1 s = np.array([[1, -1, 1], [0, 0, -1], [0, 0, 0]]) n = MMNode(s, x=1) n.build_tree(g) n.compute_v(g) assert n.v == 1 s = np.array([[1, -1, 1], [-1, 0, 0], [0, 0, 0]]) n = MMNode(s, x=1) n.build_tree(g) n.compute_v(g) assert n.v == 1 s = np.array([[1, -1, 1], [0, 0, 0], [-1, 0, 0]]) n = MMNode(s, x=1) n.build_tree(g) n.compute_v(g) assert n.v == 1 s = np.array([[1, -1, 1], [0, 0, 1], [0, 0, -1]]) n = MMNode(s, x=-1) n.build_tree(g) n.compute_v(g) assert n.v == -1 s = np.array([[1, -1, 1], [0, 0, -1], [0, 1, -1]]) n = MMNode(s, x=1) n.build_tree(g) n.compute_v(g) assert n.v == 1 s = np.array([[1, -1, 1], [0, 0, 0], [0, 1, -1]]) n = MMNode(s, x=-1) n.build_tree(g) n.compute_v(g) assert n.v == 0 # The AI agent should be compatible with both games: TicTacToe and Othello. # now let's test on the game "Othello": #--------------------- # Game: Othello g = Othello() # game s = np.array([[0, -1, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) s_ = s.copy() n = MMNode(s, x=1) # it's X player's turn n.build_tree(g) n.compute_v(g) assert np.allclose(s, s_) assert n.v == 1 s = np.array([[0, 0, -1, 1, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) n = MMNode(s, x=1) # it's X player's turn n.build_tree(g) n.compute_v(g) assert n.v == -1 s = np.array([[0, 0, -1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) n = MMNode(s, x=-1) # it's O player's turn n.build_tree(g) n.compute_v(g) assert n.v == -1 n = MMNode(s, x=1) # it's X player's turn n.build_tree(g) n.compute_v(g) assert n.v == 1 s = np.array([[0, -1, 1, -1, 1, -1, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0], [-1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) n = MMNode(s, x=1) # it's X player's turn n.build_tree(g) n.compute_v(g) assert n.v == 1
def test_expand(): '''(5 points) expand''' #--------------------- # Game: TicTacToe g = TicTacToe() # game # Current Node (root) b = np.array([[0, 1, -1], [0, -1, 1], [0, 1, -1]]) s = GameState(b, x=1) #it's X player's turn n = MCNode(s) # expand sc = n.expand(g) assert n.s.x == 1 assert len(n.c) == 3 assert type(sc) == MCNode assert sc.p == n assert sc.s.x == -1 assert sc.p == n assert sc.c == [] assert sc.v == 0 assert sc.N == 0 b_ = np.array([[0, 1, -1], [0, -1, 1], [0, 1, -1]]) # the current game state should not change after expanding assert np.allclose(n.s.b, b_) for c in n.c: assert c.s.x == -1 assert c.p == n assert c.c == [] assert c.v == 0 assert c.N == 0 # child node A b = np.array([[1, 1, -1], [0, -1, 1], [0, 1, -1]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (0, 0) assert c # child node B b = np.array([[0, 1, -1], [1, -1, 1], [0, 1, -1]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (1, 0) assert c # child node C b = np.array([[0, 1, -1], [0, -1, 1], [1, 1, -1]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (2, 0) assert c # the selected child node should be in the children list c = False for x in n.c: if sc == x: c = True assert c #-------------------------- # Current Node (root) b = np.array([[1, 1, -1], [0, -1, 1], [0, 1, -1]]) s = GameState(b, x=-1) #it's O player's turn n = MCNode(s) sc = n.expand(g) assert n.s.x == -1 assert len(n.c) == 2 assert type(sc) == MCNode assert sc.p == n assert sc.s.x == 1 assert sc.p == n assert sc.c == [] assert sc.v == 0 assert sc.N == 0 for c in n.c: assert c.s.x == 1 assert c.p == n assert c.c == [] assert c.v == 0 assert c.N == 0 # child node A b = np.array([[1, 1, -1], [-1, -1, 1], [0, 1, -1]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (1, 0) assert c # child node B b = np.array([[1, 1, -1], [0, -1, 1], [-1, 1, -1]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (2, 0) assert c # the selected child node should be in the children list c = False for x in n.c: if sc == x: c = True assert c #--------------------------- s = GameState(np.zeros((3, 3)), x=1) n = MCNode(s) sc = n.expand(g) assert n.s.x == 1 assert len(n.c) == 9 a = False for c in n.c: assert c.s.x == -1 assert c.p == n assert c.c == [] assert np.sum(c.s.b) == 1 assert c.v == 0 assert c.N == 0 if sc == c: a = True assert a # the selected child node should be in the children list # The AI agent should be compatible with both games: TicTacToe and Othello. # now let's test on the game "Othello": #--------------------- # Game: Othello g = Othello() # game b = np.array([[0, -1, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) b_ = b.copy() s = GameState(b, x=1) #it's X player's turn n = MCNode(s) # expand n.expand(g) assert len(n.c) == 2 assert n.s.x == 1 # the current game state should not change after expanding assert np.allclose(n.s.b, b_) for c in n.c: assert type(c) == MCNode assert c.p == n assert c.c == [] assert c.v == 0 # child node A b = np.array([[1, 1, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (0, 0) assert x.s.x == 1 # it is still X player's turn because there is no valid move for O player assert c # child node B b = np.array([[0, -1, 1, 1, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (0, 4) assert x.s.x == -1 assert c #--------------------- b = np.array([[0, 1, -1, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) b_ = b.copy() s = GameState(b, x=-1) #it's O player's turn n = MCNode(s) # expand n.expand(g) print(n.c) assert len(n.c) == 3 assert n.s.x == -1 # the current game state should not change after expanding assert np.allclose(n.s.b, b_) for c in n.c: assert type(c) == MCNode assert c.p == n assert c.c == [] assert c.v == 0 # child node A b = np.array([[-1, -1, -1, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (0, 0) assert x.s.x == -1 # no valid move for X player assert c # child node B b = np.array([[0, 1, -1, -1, -1, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (0, 4) assert x.s.x == 1 assert c # child node C b = np.array([[0, 1, -1, 1, 0, 0, 0, 0], [0, 0, -1, 0, 0, 0, 0, 0], [0, 0, -1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (2, 2) assert x.s.x == 1 assert c
def reset(self): self.game = Othello() return self.game.get_state(), self.game.get_turn()
def test_build_tree(): '''(5 points) build_tree''' #--------------------- # Game: TicTacToe g = TicTacToe() # game # current node (root node) b = np.array([[0, 1, -1], [0, -1, 1], [0, 1, -1]]) b_ = b.copy() s = GameState(b, x=1) # it's X player's turn n = MMNode(s) n.build_tree(g) # the current game state should not change after building the tree assert np.allclose(b, b_) assert len(n.c) == 3 assert n.s.x == 1 assert n.v == None assert n.p == None assert n.m == None assert np.allclose(n.s.b, b_) for c in n.c: assert type(c) == MMNode assert c.s.x == -1 assert c.p == n assert len(c.c) == 2 assert c.v == None #----------------------- # child node A b = np.array([[1, 1, -1], [0, -1, 1], [0, 1, -1]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (0, 0) ca = x assert c # child node B b = np.array([[0, 1, -1], [1, -1, 1], [0, 1, -1]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (1, 0) cb = x assert c # child node C b = np.array([[0, 1, -1], [0, -1, 1], [1, 1, -1]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (2, 0) cc = x assert c #----------------------- # Child Node A's children for c in ca.c: assert c.s.x == 1 assert c.p == ca assert c.v == None # grand child node A1 b = np.array([[1, 1, -1], [-1, -1, 1], [0, 1, -1]]) c = False for x in ca.c: if np.allclose(x.s.b, b): c = True assert x.m == (1, 0) assert len(x.c) == 1 #----------------------- # Great Grand Child Node A11 assert x.c[0].s.x == -1 assert x.c[0].p == x assert x.c[0].v == None assert x.c[0].c == [] assert c # grand child node A2 b = np.array([[1, 1, -1], [0, -1, 1], [-1, 1, -1]]) c = False for x in ca.c: if np.allclose(x.s.b, b): c = True assert x.m == (2, 0) assert x.c == [] assert c #----------------------- # Child Node B's children for c in cb.c: assert c.s.x == 1 assert c.p == cb assert c.c == [] assert c.v == None # grand child node B1 b = np.array([[-1, 1, -1], [1, -1, 1], [0, 1, -1]]) c = False for x in cb.c: if np.allclose(x.s.b, b): c = True assert x.m == (0, 0) assert c # grand child node B2 b = np.array([[0, 1, -1], [1, -1, 1], [-1, 1, -1]]) c = False for x in cb.c: if np.allclose(x.s.b, b): c = True assert x.m == (2, 0) assert c #----------------------- # Child Node C's children for c in cc.c: assert c.s.x == 1 assert c.p == cc assert c.v == None # grand child node C1 b = np.array([[-1, 1, -1], [0, -1, 1], [1, 1, -1]]) c = False for x in cc.c: if np.allclose(x.s.b, b): c = True assert x.m == (0, 0) assert x.c == [] assert c # grand child node C2 b = np.array([[0, 1, -1], [-1, -1, 1], [1, 1, -1]]) c = False for x in cc.c: if np.allclose(x.s.b, b): c = True assert x.m == (1, 0) assert len(x.c) == 1 # Great Grand Child Node C21 assert x.c[0].s.x == -1 assert x.c[0].p == x assert x.c[0].v == None assert x.c[0].c == [] assert c #----------------------- b = np.array([[0, 0, 1], [0, 1, 1], [-1, 0, -1]]) s = GameState(b, x=-1) #it's O player's turn n = MMNode(s) n.build_tree(g) assert len(n.c) == 4 assert n.s.x == -1 assert n.v == None assert n.p == None assert n.m == None b1 = np.array([[-1, 0, 1], [0, 1, 1], [-1, 0, -1]]) b2 = np.array([[0, -1, 1], [0, 1, 1], [-1, 0, -1]]) b3 = np.array([[0, 0, 1], [-1, 1, 1], [-1, 0, -1]]) b4 = np.array([[0, 0, 1], [0, 1, 1], [-1, -1, -1]]) for c in n.c: assert c.s.x == 1 assert c.v == None assert c.p == n if np.allclose(c.s.b, b1): assert c.m == (0, 0) assert len(c.c) == 3 if np.allclose(c.s.b, b2): assert c.m == (0, 1) assert len(c.c) == 3 if np.allclose(c.s.b, b3): assert c.m == (1, 0) assert len(c.c) == 3 if np.allclose(c.s.b, b4): assert c.m == (2, 1) assert c.c == [] #terminal node, no child # The AI agent should be compatible with both games: TicTacToe and Othello. # now let's test on the game "Othello": #--------------------- # Game: Othello g = Othello() # game b = np.array([[0, 0, -1, 1, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) b_ = b.copy() s = GameState(b, x=1) # it's X player's turn n = MMNode(s) n.build_tree(g) # the current game state should not change after building the tree assert np.allclose(n.s.b, b_) assert len(n.c) == 2 assert n.s.x == 1 assert n.v == None assert n.p == None assert n.m == None for c in n.c: assert type(c) == MMNode assert c.s.x == -1 assert c.p == n assert c.v == None assert len(c.c) == 1 #----------------------- # child node A b = np.array([[0, 0, -1, 1, 1, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (0, 5) ca = x assert c #----------------------- # child node B b = np.array([[0, 1, 1, 1, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (0, 1) cb = x assert c #----------------------- # Child Node A's children # grand child node A1 assert ca.c[0].p == ca assert ca.c[0].v == None assert ca.c[0].m == (0, 6) assert ca.c[0].c == [] b = np.array([[0, 0, -1, -1, -1, -1, -1, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) assert np.allclose(ca.c[0].s.b, b) #----------------------- # Child Node B's children # grand child node B1 assert cb.c[0].p == cb assert cb.c[0].v == None assert cb.c[0].m == (0, 0) assert cb.c[0].c == [] b = np.array([[-1, -1, -1, -1, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) assert np.allclose(cb.c[0].s.b, b) #------------------------------------ b = np.array([[0, -1, 1, 1, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) b_ = b.copy() s = GameState(b, x=1) # it's X player's turn n = MMNode(s) n.build_tree(g) # the current game state should not change after building the tree assert np.allclose(n.s.b, b_) assert len(n.c) == 2 assert n.s.x == 1 assert n.v == None assert n.p == None assert n.m == None for c in n.c: assert type(c) == MMNode assert c.p == n assert c.v == None assert len(c.c) == 1 #----------------------- # child node A b = np.array([[1, 1, 1, 1, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (0, 0) assert x.s.x == 1 # there is no valid move for O player, so O player needs to give up the chance ca = x assert c #----------------------- # child node B b = np.array([[0, -1, 1, 1, 1, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (0, 5) assert x.s.x == -1 cb = x assert c #----------------------- # Child Node A's children # grand child node A1 assert ca.c[0].p == ca assert ca.c[0].v == None assert ca.c[0].m == (0, 5) assert ca.c[0].c == [] b = np.array([[1, 1, 1, 1, 1, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) assert np.allclose(ca.c[0].s.b, b) #----------------------- # Child Node B's children # grand child node B1 assert cb.c[0].p == cb assert cb.c[0].v == None assert cb.c[0].m == (0, 6) assert cb.c[0].c == [] b = np.array([[0, -1, -1, -1, -1, -1, -1, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) assert np.allclose(cb.c[0].s.b, b) #--------------------- # The AI agent should also be compatible with the game: GO # now let's test on the game "GO": g = GO(board_size=2) # game (2 x 2 board) b = np.array([[1, 1], [1, 0]]) s = GO_state(b, x=1, a=1) #it's X player's turn n = MMNode(s) n.build_tree(g) assert len(n.c) == 1 assert n.c[0].s.x == -1 assert n.c[0].s.a == 2 assert len(n.c[0].c) == 0 l = np.array([[0, 0], [0, -1]]) p = {np.array2string(l)} s = GO_state(b, x=-1, p=p, a=1) #it's O player's turn n = MMNode(s) n.build_tree(g) assert len(n.c) == 1 assert n.c[0].s.x == 1 assert n.c[0].s.a == 2 assert len(n.c[0].c) == 0 g = GO(board_size=2, max_game_length=1) # game (2 x 2 board) b = np.array([[1, 1], [1, 0]]) s = GO_state(b, x=-1) #it's X player's turn n = MMNode(s) n.build_tree(g) assert len(n.c) == 2 assert n.c[0].s.x == 1 assert len(n.c[0].c) == 0 g = GO(board_size=2, max_game_length=2) # game (2 x 2 board) b = np.array([[1, 1], [1, 0]]) s = GO_state(b, x=-1) #it's X player's turn n = MMNode(s) n.build_tree(g) assert len(n.c) == 2 for c in n.c: assert c.s.x == 1 if np.allclose(c.s.b, b): assert len(c.c) == 1 else: assert len(c.c) == 4
def test_expand(): '''(5 points) expand''' #--------------------- # Game: TicTacToe g = TicTacToe() # game # Current Node (root) b = np.array([[0, 1, -1], [0, -1, 1], [0, 1, -1]]) s = GameState(b, x=1) #it's X player's turn n = MMNode(s) # expand n.expand(g) assert len(n.c) == 3 assert n.s.x == 1 b_ = np.array([[0, 1, -1], [0, -1, 1], [0, 1, -1]]) # the current game state should not change after expanding assert np.allclose(n.s.b, b_) for c in n.c: assert type(c) == MMNode assert c.s.x == -1 assert c.p == n assert c.c == [ ] #only add one level of children nodes, not two levels. assert c.v == None # child node A b = np.array([[1, 1, -1], [0, -1, 1], [0, 1, -1]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (0, 0) assert c # child node B b = np.array([[0, 1, -1], [1, -1, 1], [0, 1, -1]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (1, 0) assert c # child node C b = np.array([[0, 1, -1], [0, -1, 1], [1, 1, -1]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (2, 0) assert c #-------------------------- # Current Node (root) b = np.array([[1, 1, -1], [0, -1, 1], [0, 1, -1]]) s = GameState(b, x=-1) #it's O player's turn n = MMNode(s) n.expand(g) assert n.s.x == -1 assert len(n.c) == 2 for c in n.c: assert c.s.x == 1 assert c.p == n assert c.c == [] # child node A b = np.array([[1, 1, -1], [-1, -1, 1], [0, 1, -1]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (1, 0) assert c # child node B b = np.array([[1, 1, -1], [0, -1, 1], [-1, 1, -1]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (2, 0) assert c #--------------------------- s = GameState(np.zeros((3, 3)), x=1) #it's X player's turn n = MMNode(s) n.expand(g) assert n.s.x == 1 assert len(n.c) == 9 for c in n.c: assert c.s.x == -1 assert c.p == n assert c.c == [] assert np.sum(c.s.b) == 1 assert c.v == None #--------------------- # The AI agent should also be compatible with Othello game. # now let's test on the game "Othello": #--------------------- # Game: Othello g = Othello() # game b = np.array([[0, -1, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) b_ = b.copy() s = GameState(b, x=1) #it's X player's turn n = MMNode(s) # expand n.expand(g) assert len(n.c) == 2 assert n.s.x == 1 # the current game state should not change after expanding assert np.allclose(n.s.b, b_) for c in n.c: assert type(c) == MMNode assert c.p == n assert c.c == [] assert c.v == None # child node A b = np.array([[1, 1, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (0, 0) assert x.s.x == 1 # it is still X player's turn because there is no valid move for O player assert c # child node B b = np.array([[0, -1, 1, 1, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (0, 4) assert x.s.x == -1 assert c #--------------------- b = np.array([[0, 1, -1, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) b_ = b.copy() s = GameState(b, x=-1) #it's O player's turn n = MMNode(s) # expand n.expand(g) print(n.c) assert len(n.c) == 3 assert n.s.x == -1 # the current game state should not change after expanding assert np.allclose(n.s.b, b_) for c in n.c: assert type(c) == MMNode assert c.p == n assert c.c == [] assert c.v == None # child node A b = np.array([[-1, -1, -1, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (0, 0) assert x.s.x == -1 # no valid move for X player assert c # child node B b = np.array([[0, 1, -1, -1, -1, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (0, 4) assert x.s.x == 1 assert c # child node C b = np.array([[0, 1, -1, 1, 0, 0, 0, 0], [0, 0, -1, 0, 0, 0, 0, 0], [0, 0, -1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s.b, b): c = True assert x.m == (2, 2) assert x.s.x == 1 assert c #--------------------- # The AI agent should also be compatible with the game: GO # now let's test on the game "GO": g = GO(board_size=2) # game (2 x 2 board) b = np.array([[0, 1], [1, 0]]) b_ = b.copy() s = GO_state(b, x=-1) #it's O player's turn n = MMNode(s) # expand n.expand(g) assert len(n.c) == 1 # only one valid move for O player: 'pass' assert n.s.x == -1 # the current game state should not change after expanding assert np.allclose(n.s.b, b_) c = n.c[0] assert type(c) == MMNode assert c.p == n assert c.c == [] assert c.v == None assert np.allclose(c.s.b, b_) assert c.m[0] is None assert c.m[1] is None s = GO_state(b, x=1) #it's X player's turn n = MMNode(s) # expand n.expand(g) assert len(n.c) == 3
def test_choose_a_move(): '''(5 points) random choose_a_move()''' #--------------------- # Game: TicTacToe g = TicTacToe() # game p = RandomPlayer() b = np.array([[0, 1, 1], [1, 0, -1], [1, 1, 0]]) b_ = np.array([[0, 1, 1], [1, 0, -1], [1, 1, 0]]) s = GameState(b, x=1) count = np.zeros(3) for _ in range(100): r, c = p.choose_a_move(g, s) assert b_[r, c] == 0 # player needs to choose a valid move assert np.allclose( s.b, b_) # the player should never change the game state object assert r == c # in this example the valid moves are on the diagonal of the matrix assert r > -1 and r < 3 count[c] += 1 assert count[ 0] > 20 # the random player should give roughly equal chance to each valid move assert count[1] > 20 assert count[2] > 20 b = np.array([[1, 1, 0], [1, 0, -1], [0, 1, 1]]) s = GameState(b, x=1) for _ in range(100): r, c = p.choose_a_move(g, s) assert b[r, c] == 0 assert r == 2 - c assert r > -1 and r < 3 #--------------------- # The AI agent should also be compatible with the game Othello. # now let's test on the game "Othello": g = Othello() # game b = np.array([[0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, -1, -1, -1, 0, 0], [0, 0, 0, 1, 1, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) b_ = b.copy() p = RandomPlayer() s = GameState(b, x=1) count = np.zeros(5) for _ in range(200): r, c = p.choose_a_move(g, s) assert np.allclose( b, b_) # the player should never change the game state object assert b[r, c] == 0 # player needs to choose a valid move assert r == 2 assert c > 1 and c < 7 count[c - 2] += 1 assert count[ 0] > 20 # the random player should give roughly equal chance to each valid move assert count[1] > 20 assert count[2] > 20 assert count[3] > 20 assert count[4] > 20 # test whether we can run a game using random player b = np.array([[0, 0, -1, 1, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) s = GameState(b, x=1) for i in range(10): e = g.run_a_game(p, p, s=s) assert e == -1 b = np.array([[0, -1, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) s = GameState(b, x=1) w = 0 for i in range(10): e = g.run_a_game(p, p, s=s) w += e assert np.abs(w) < 9 # test whether we can run a game using random player b = np.array([[0, 0, -1, 1, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) s = GameState(b, x=1) for i in range(10): e = g.run_a_game(p, p, s=s) assert e == -1 b = np.array([[0, -1, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) w = 0 s = GameState(b, x=1) for i in range(10): e = g.run_a_game(p, p, s=s) w += e assert np.abs(w) < 9 #--------------------- # The AI agent should also be compatible with the game: GO # now let's test on the game "GO": g = GO(board_size=2) # game (2 x 2 board) s = g.initial_game_state() p = RandomPlayer() b_ = s.b.copy() count = np.zeros(5) for _ in range(200): r, c = p.choose_a_move(g, s) assert np.allclose( s.b, b_) # the player should never change the game state object assert s.a == 0 if r is None and c is None: # the player choose to pass without placing any stone in the step count[-1] += 1 else: count[2 * r + c] += 1 assert count[ 0] > 20 # the random player should give roughly equal chance to each valid move assert count[1] > 20 assert count[2] > 20 assert count[3] > 20 assert count[4] > 20
def test_expand(): '''(5 points) expand''' #--------------------- # Game: TicTacToe g = TicTacToe() # game # Current Node (root) s = np.array([[0, 1, -1], [0, -1, 1], [0, 1, -1]]) n = MMNode(s, x=1) #it's X player's turn # expand n.expand(g) assert len(n.c) == 3 assert n.x == 1 s_ = np.array([[0, 1, -1], [0, -1, 1], [0, 1, -1]]) # the current game state should not change after expanding assert np.allclose(n.s, s_) for c in n.c: assert type(c) == MMNode assert c.x == -1 assert c.p == n assert c.c == [ ] #only add one level of children nodes, not two levels. assert c.v == None # child node A s = np.array([[1, 1, -1], [0, -1, 1], [0, 1, -1]]) c = False for x in n.c: if np.allclose(x.s, s): c = True assert x.m == (0, 0) assert c # child node B s = np.array([[0, 1, -1], [1, -1, 1], [0, 1, -1]]) c = False for x in n.c: if np.allclose(x.s, s): c = True assert x.m == (1, 0) assert c # child node C s = np.array([[0, 1, -1], [0, -1, 1], [1, 1, -1]]) c = False for x in n.c: if np.allclose(x.s, s): c = True assert x.m == (2, 0) assert c #-------------------------- # Current Node (root) s = np.array([[1, 1, -1], [0, -1, 1], [0, 1, -1]]) n = MMNode(s, -1) #it's O player's turn n.expand(g) assert n.x == -1 assert len(n.c) == 2 for c in n.c: assert c.x == 1 assert c.p == n assert c.c == [] # child node A s = np.array([[1, 1, -1], [-1, -1, 1], [0, 1, -1]]) c = False for x in n.c: if np.allclose(x.s, s): c = True assert x.m == (1, 0) assert c # child node B s = np.array([[1, 1, -1], [0, -1, 1], [-1, 1, -1]]) c = False for x in n.c: if np.allclose(x.s, s): c = True assert x.m == (2, 0) assert c #--------------------------- n = MMNode(np.zeros((3, 3)), 1) n.expand(g) assert n.x == 1 assert len(n.c) == 9 for c in n.c: assert c.x == -1 assert c.p == n assert c.c == [] assert np.sum(c.s) == 1 assert c.v == None # The AI agent should be compatible with both games: TicTacToe and Othello. # now let's test on the game "Othello": #--------------------- # Game: Othello g = Othello() # game s = np.array([[0, -1, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) s_ = s.copy() n = MMNode(s, x=1) #it's X player's turn # expand n.expand(g) assert len(n.c) == 2 assert n.x == 1 # the current game state should not change after expanding assert np.allclose(n.s, s_) for c in n.c: assert type(c) == MMNode assert c.p == n assert c.c == [] assert c.v == None # child node A s = np.array([[1, 1, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s, s): c = True assert x.m == (0, 0) assert x.x == 1 # it is still X player's turn because there is no valid move for O player assert c # child node B s = np.array([[0, -1, 1, 1, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s, s): c = True assert x.m == (0, 4) assert x.x == -1 assert c #--------------------- s = np.array([[0, 1, -1, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) s_ = s.copy() n = MMNode(s, x=-1) #it's O player's turn # expand n.expand(g) print(n.c) assert len(n.c) == 3 assert n.x == -1 # the current game state should not change after expanding assert np.allclose(n.s, s_) for c in n.c: assert type(c) == MMNode assert c.p == n assert c.c == [] assert c.v == None # child node A s = np.array([[-1, -1, -1, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s, s): c = True assert x.m == (0, 0) assert x.x == -1 # no valid move for X player assert c # child node B s = np.array([[0, 1, -1, -1, -1, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s, s): c = True assert x.m == (0, 4) assert x.x == 1 assert c # child node C s = np.array([[0, 1, -1, 1, 0, 0, 0, 0], [0, 0, -1, 0, 0, 0, 0, 0], [0, 0, -1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s, s): c = True assert x.m == (2, 2) assert x.x == 1 assert c
def test_build_tree(): '''(5 points) build_tree''' #--------------------- # Game: TicTacToe g = TicTacToe() # game # current node (root node) s = np.array([[0, 1, -1], [0, -1, 1], [0, 1, -1]]) s_ = s.copy() n = MMNode(s, x=1) # it's X player's turn n.build_tree(g) # the current game state should not change after building the tree assert np.allclose(s, s_) assert len(n.c) == 3 assert n.x == 1 assert n.v == None assert n.p == None assert n.m == None assert np.allclose(n.s, s_) for c in n.c: assert type(c) == MMNode assert c.x == -1 assert c.p == n assert len(c.c) == 2 assert c.v == None #----------------------- # child node A s = np.array([[1, 1, -1], [0, -1, 1], [0, 1, -1]]) c = False for x in n.c: if np.allclose(x.s, s): c = True assert x.m == (0, 0) ca = x assert c # child node B s = np.array([[0, 1, -1], [1, -1, 1], [0, 1, -1]]) c = False for x in n.c: if np.allclose(x.s, s): c = True assert x.m == (1, 0) cb = x assert c # child node C s = np.array([[0, 1, -1], [0, -1, 1], [1, 1, -1]]) c = False for x in n.c: if np.allclose(x.s, s): c = True assert x.m == (2, 0) cc = x assert c #----------------------- # Child Node A's children for c in ca.c: assert c.x == 1 assert c.p == ca assert c.v == None # grand child node A1 s = np.array([[1, 1, -1], [-1, -1, 1], [0, 1, -1]]) c = False for x in ca.c: if np.allclose(x.s, s): c = True assert x.m == (1, 0) assert len(x.c) == 1 #----------------------- # Great Grand Child Node A11 assert x.c[0].x == -1 assert x.c[0].p == x assert x.c[0].v == None assert x.c[0].c == [] assert c # grand child node A2 s = np.array([[1, 1, -1], [0, -1, 1], [-1, 1, -1]]) c = False for x in ca.c: if np.allclose(x.s, s): c = True assert x.m == (2, 0) assert x.c == [] assert c #----------------------- # Child Node B's children for c in cb.c: assert c.x == 1 assert c.p == cb assert c.c == [] assert c.v == None # grand child node B1 s = np.array([[-1, 1, -1], [1, -1, 1], [0, 1, -1]]) c = False for x in cb.c: if np.allclose(x.s, s): c = True assert x.m == (0, 0) assert c # grand child node B2 s = np.array([[0, 1, -1], [1, -1, 1], [-1, 1, -1]]) c = False for x in cb.c: if np.allclose(x.s, s): c = True assert x.m == (2, 0) assert c #----------------------- # Child Node C's children for c in cc.c: assert c.x == 1 assert c.p == cc assert c.v == None # grand child node C1 s = np.array([[-1, 1, -1], [0, -1, 1], [1, 1, -1]]) c = False for x in cc.c: if np.allclose(x.s, s): c = True assert x.m == (0, 0) assert x.c == [] assert c # grand child node C2 s = np.array([[0, 1, -1], [-1, -1, 1], [1, 1, -1]]) c = False for x in cc.c: if np.allclose(x.s, s): c = True assert x.m == (1, 0) assert len(x.c) == 1 # Great Grand Child Node C21 assert x.c[0].x == -1 assert x.c[0].p == x assert x.c[0].v == None assert x.c[0].c == [] assert c #----------------------- s = np.array([[0, 0, 1], [0, 1, 1], [-1, 0, -1]]) n = MMNode(s, x=-1) #it's O player's turn n.build_tree(g) assert len(n.c) == 4 assert n.x == -1 assert n.v == None assert n.p == None assert n.m == None s1 = np.array([[-1, 0, 1], [0, 1, 1], [-1, 0, -1]]) s2 = np.array([[0, -1, 1], [0, 1, 1], [-1, 0, -1]]) s3 = np.array([[0, 0, 1], [-1, 1, 1], [-1, 0, -1]]) s4 = np.array([[0, 0, 1], [0, 1, 1], [-1, -1, -1]]) for c in n.c: assert c.x == 1 assert c.v == None assert c.p == n if np.allclose(c.s, s1): assert c.m == (0, 0) assert len(c.c) == 3 if np.allclose(c.s, s2): assert c.m == (0, 1) assert len(c.c) == 3 if np.allclose(c.s, s3): assert c.m == (1, 0) assert len(c.c) == 3 if np.allclose(c.s, s4): assert c.m == (2, 1) assert c.c == [] #terminal node, no child # The AI agent should be compatible with both games: TicTacToe and Othello. # now let's test on the game "Othello": #--------------------- # Game: Othello g = Othello() # game s = np.array([[0, 0, -1, 1, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) s_ = s.copy() n = MMNode(s, x=1) # it's X player's turn n.build_tree(g) # the current game state should not change after building the tree assert np.allclose(s, s_) assert len(n.c) == 2 assert n.x == 1 assert n.v == None assert n.p == None assert n.m == None for c in n.c: assert type(c) == MMNode assert c.x == -1 assert c.p == n assert c.v == None assert len(c.c) == 1 #----------------------- # child node A s = np.array([[0, 0, -1, 1, 1, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s, s): c = True assert x.m == (0, 5) ca = x assert c #----------------------- # child node B s = np.array([[0, 1, 1, 1, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s, s): c = True assert x.m == (0, 1) cb = x assert c #----------------------- # Child Node A's children # grand child node A1 assert ca.c[0].p == ca assert ca.c[0].v == None assert ca.c[0].m == (0, 6) assert ca.c[0].c == [] s = np.array([[0, 0, -1, -1, -1, -1, -1, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) assert np.allclose(ca.c[0].s, s) #----------------------- # Child Node B's children # grand child node B1 assert cb.c[0].p == cb assert cb.c[0].v == None assert cb.c[0].m == (0, 0) assert cb.c[0].c == [] s = np.array([[-1, -1, -1, -1, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) assert np.allclose(cb.c[0].s, s) #------------------------------------ s = np.array([[0, -1, 1, 1, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) s_ = s.copy() n = MMNode(s, x=1) # it's X player's turn n.build_tree(g) # the current game state should not change after building the tree assert np.allclose(s, s_) assert len(n.c) == 2 assert n.x == 1 assert n.v == None assert n.p == None assert n.m == None for c in n.c: assert type(c) == MMNode assert c.p == n assert c.v == None assert len(c.c) == 1 #----------------------- # child node A s = np.array([[1, 1, 1, 1, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s, s): c = True assert x.m == (0, 0) assert x.x == 1 # there is no valid move for O player, so O player needs to give up the chance ca = x assert c #----------------------- # child node B s = np.array([[0, -1, 1, 1, 1, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) c = False for x in n.c: if np.allclose(x.s, s): c = True assert x.m == (0, 5) assert x.x == -1 cb = x assert c #----------------------- # Child Node A's children # grand child node A1 assert ca.c[0].p == ca assert ca.c[0].v == None assert ca.c[0].m == (0, 5) assert ca.c[0].c == [] s = np.array([[1, 1, 1, 1, 1, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) assert np.allclose(ca.c[0].s, s) #----------------------- # Child Node B's children # grand child node B1 assert cb.c[0].p == cb assert cb.c[0].v == None assert cb.c[0].m == (0, 6) assert cb.c[0].c == [] s = np.array([[0, -1, -1, -1, -1, -1, -1, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) assert np.allclose(cb.c[0].s, s)
def test_sample(): '''(5 points) sample''' #--------------------- # Game: TicTacToe g = TicTacToe() # game #------------------------ b = np.array([[0, 1, 1], [0, -1, 1], [-1, 1, -1]]) bc = np.array([[0, 1, 1], [0, -1, 1], [-1, 1, -1]]) s = GameState(b, x=-1) # "O" player's turn n = MCNode(s) assert np.allclose(n.s.b, bc) # the game state should not change after simulation v = 0 for _ in range(100): e = n.sample(g) assert e == -1 or e == 1 v += e assert np.abs( v) < 25 # the two results should have roughly the same chance #------------------------ b = np.array([[0, 1, 1], [-1, -1, 1], [-1, 1, -1]]) s = GameState(b, x=1) # "X" player's turn n = MCNode(s) for _ in range(100): e = n.sample(g) assert e == 1 #------------------------ b = np.array([[0, 1, 0], [-1, -1, 1], [-1, 1, 1]]) s = GameState(b, x=-1) # "O" player's turn n = MCNode(s) for _ in range(100): e = n.sample(g) assert e == -1 #------------------------ b = np.array([[0, 1, 1], [0, -1, 1], [0, -1, -1]]) s = GameState(b, x=1) # "X" player's turn n = MCNode(s) v = 0 for _ in range(100): e = n.sample(g) assert e == -1 or e == 1 v += e assert np.abs(v) < 25 # X player has 1/2 chance to win and 1/2 to lose #------------------------ # Terminal node, the game has already ended, the simulation result should always be the same. b = np.array([[-1, 0, 0], [1, -1, 1], [0, 1, -1]]) # terminal node: O player won s = GameState(b, x=1) # "X" player's turn n = MCNode(s) for _ in range(100): assert n.sample(g) == -1 b_ = np.array([[-1, 0, 0], [1, -1, 1], [0, 1, -1]]) assert np.allclose(n.s.b, b_) # the game state should not change after simulation b = np.array([[-1, -1, 1], [1, 1, -1], [-1, 1, 1]]) s = GameState(b, x=1) # "X" player's turn n = MCNode(s) for _ in range(100): assert n.sample(g) == 0 #------------------------ b = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]]) s = GameState(b, x=-1) # "O" player's turn n = MCNode(s) v = 0 for _ in range(1000): e = n.sample(g) assert e == -1 or e == 1 or e == 0 v += e assert np.abs(v - 500) < 100 #----------------------------- # The AI agent should be compatible with both games: TicTacToe and Othello. # now let's test on the game "Othello": #--------------------- # Game: Othello g = Othello() # game #------------------------ b = np.array([[0, 0, -1, 1, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) s = GameState(b, x=1) # "X" player's turn n = MCNode(s) for _ in range(10): e = n.sample(g) assert e == -1 #------------------------ b = np.array([[0, -1, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) s = GameState(b, x=1) # "X" player's turn n = MCNode(s) w = 0 for _ in range(10): e = n.sample(g) w += e assert np.abs( w ) < 9 # the two results (1 and -1) should roughly have the same chance
def test_minmax_choose_a_move(): '''(10 points) minmax choose_a_move()''' #--------------------- # Game: TicTacToe g = TicTacToe() # game # two possible moves: one leads to win p = MiniMaxPlayer() s = np.array([[0, -1, 1], [-1, 1, 1], [0, 1, -1]]) s_ = s.copy() r, c = p.choose_a_move(g, s, x=1) assert np.allclose(s, s_) assert r == 2 assert c == 0 # three possible moves, one leads to win p = MiniMaxPlayer() s = np.array([[1, -1, 1], [0, 0, -1], [0, 1, -1]]) r, c = p.choose_a_move(g, s, x=1) assert r == 2 assert c == 0 #------------------------- p = MiniMaxPlayer() s = np.array([[1, -1, 1], [0, 0, 0], [0, 0, 0]]) r, c = p.choose_a_move(g, s, x=-1) # O player's turn assert r == 1 assert c == 1 #------------------------- # play against random player in the game p1 = MiniMaxPlayer() p2 = RandomPlayer() # X Player: MinMax # O Player: Random s = np.array([[1, -1, 1], [0, 0, 0], [0, 0, -1]]) for i in range(10): e = g.run_a_game(p1, p2, s=s, x=1) assert e == 1 #------------------------- # play against MinMax player in the game # X Player: MinMax # O Player: MinMax s = np.array([[1, -1, 1], [0, 0, -1], [0, 1, -1]]) for i in range(10): e = g.run_a_game(p1, p1, s=s, x=1) assert e == 1 s = np.array([[0, 0, 1], [0, -1, 0], [1, -1, 0]]) e = g.run_a_game(p1, p1, s=s) assert e == 0 s = np.array([[0, 0, 0], [0, -1, 0], [1, 0, 0]]) e = g.run_a_game(p1, p1, s=s) assert e == 0 s = np.array([[0, 0, 0], [0, 0, 0], [1, -1, 0]]) e = g.run_a_game(p1, p1, s=s) assert e == 1 s = np.array([[0, 0, 0], [0, 1, 0], [0, -1, 0]]) e = g.run_a_game(p1, p1, s) assert e == 1 s = np.array([[0, 0, 0], [0, 1, 0], [-1, 0, 0]]) e = g.run_a_game(p1, p1, s) assert e == 0 #****************************************************** #*******************(TRY ME)*************************** #****************************************************** '''Run A Complete Game (TicTacToe): the following code will run a complete TicTacToe game using MiniMaxPlayer, if you want to try this, uncomment the following three lines of code. Note: it may take 1 or 2 minutes to run ''' #g = TicTacToe() #e = g.run_a_game(p1,p1) #assert e==0 #****************************************************** #****************************************************** #****************************************************** #---------------------------------------------- # The AI agent should be compatible with both games: TicTacToe and Othello. # now let's test on the game "Othello": #--------------------- # Game: Othello g = Othello() # game s = np.array([[0, -1, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) for i in range(10): e = g.run_a_game(p1, p2, s=s, x=1) assert e == 1 w = 0 for i in range(10): e = g.run_a_game(p2, p2, s=s, x=1) w += e assert np.abs(w) < 9 #****************************************************** #*******************(DO NOT TRY ME:)******************* #****************************************************** ''' Run A Complete Game (Othello):
def test_MCTS_choose_a_move(): '''(10 points) MCTS choose_a_move''' #--------------------- # Game: TicTacToe g = TicTacToe() # game p = MCTSPlayer() b = np.array([[0, -1, -1], [0, 1, 0], [0, 0, 0]]) s = GameState(b, x=1) r, c = p.choose_a_move(g, s) assert r == 0 assert c == 0 b = np.array([[0, 0, -1], [0, 1, -1], [0, 0, 0]]) s = GameState(b, x=1) r, c = p.choose_a_move(g, s) assert r == 2 assert c == 2 b = np.array([[0, 0, 1], [0, -1, 1], [0, 0, 0]]) s = GameState(b, x=-1) r, c = p.choose_a_move(g, s) assert r == 2 assert c == 2 p1 = MCTSPlayer() p2 = RandomPlayer() p3 = MiniMaxPlayer() '''random vs MCTS''' for i in range(10): b = np.array([[0, -1, 1], [-1, 1, -1], [0, -1, -1]]) s = GameState(b, x=1) e = g.run_a_game(p1, p2, s) assert e == 1 for i in range(10): b = np.array([[0, -1, 1], [-1, 1, -1], [-1, 1, 0]]) s = GameState(b, x=1) e = g.run_a_game(p1, p2, s) assert e == 0 ''' Minimax vs MCTS ''' for i in range(10): b = np.array([[0, 0, 1], [0, -1, 0], [1, -1, 0]]) s = GameState(b, x=1) e = g.run_a_game(p1, p3, s) assert e == 0 w = 0 for i in range(10): b = np.array([[0, 0, 0], [0, 0, 0], [1, -1, 0]]) s = GameState(b, x=1) e = g.run_a_game(p1, p3, s) w += e assert w > 1 ''' MCTS vs MCTS ''' w = 0 for i in range(10): b = np.array([[0, 0, 0], [1, -1, 0], [0, 0, 0]]) s = GameState(b, x=1) e = g.run_a_game(p1, p1, s) w += e assert np.abs(w) < 5 ''' MCTS(n_iter=1) vs MCTS(n_iter=100) ''' pm1 = MCTSPlayer(1) pm100 = MCTSPlayer(100) w = 0 for i in range(10): b = np.array([[0, 0, 0], [0, 0, 0], [1, -1, 0]]) s = GameState(b, x=1) e = g.run_a_game(pm100, pm1, s) w += e assert np.abs(w) > 4 #---------------------------------------------- # The AI agent should be compatible with both games: TicTacToe and Othello. # now let's test on the game "Othello": #--------------------- # Game: Othello g = Othello() # game b = np.array([[0, -1, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) for i in range(10): s = GameState(b.copy(), x=1) e = g.run_a_game(p1, p2, s) assert e == 1 ''' MCTS vs random''' s = GameState(b, x=1) e = g.run_a_game(p1, p2, s) assert e == 1