def test_board_full(self): board = np.asarray([[0, 0, 0], [0, 0, 0], [0, 0, 0]]) self.assertFalse(rules.board_full(board)) board = np.asarray([[-1, 1, 1], [0, 1, -2], [1, 1, -1]]) self.assertFalse(rules.board_full(board)) board = np.asarray([[1, 1, 1], [1, 1, 1], [1, 1, 1]]) self.assertTrue(rules.board_full(board)) board = np.asarray([[1, -1, 1], [1, 1, -1], [-1, 1, 1]]) self.assertTrue(rules.board_full(board))
def mcts(self, board): max_time = time.time() + self.time_budget root_node = TreeNode(board) playout_count = 0 while time.time() < max_time and playout_count < self.max_playouts: # Start at tree root (current actual state) current_node = root_node current_player = self.side while True: # Check for terminal state winner = rules.winner(current_node.state) if winner or rules.board_full(current_node.state): break # Pick a random move empty_cells = rules.empty_cells(current_node.state) move = tuple(random.choice(empty_cells)) # Add to tree if not present if move not in current_node.child_nodes.keys(): # If not, create a TreeNode for it new_board = current_node.state.copy() new_board[move] = current_player current_node.child_nodes[move] = TreeNode( new_board, current_node) current_node = current_node.child_nodes[move] # Swap players current_player = -current_player # Terminal state reached so backpropagate result if winner == self.side: result = 1.0 elif winner == -self.side: result = 0.0 else: result = 0.5 while current_node is not root_node: current_node.visits += 1 current_node.wins += result current_node = current_node.parent playout_count += 1 print "Number of MCTS playouts:", playout_count self.root_node = current_node # Return move with highest score best_move = root_node.best_move() return best_move
def play(self): """ Plays the game, alternating turns between the players. Moves are requested sequentially from each player in turn until there is a winner. The moves are checked for validity. Returns: int: the side of the winning player, or None if there was a draw """ if self.shuffle: random.shuffle(self.players()) player_cycle = cycle(self.players()) # Request moves from each player until there is a win or draw for player in player_cycle: # Uncomment to log board state each turn # if self.logger: # self.logger.debug(rules.board_str(self.board)) # Check for a win or draw winning_side = rules.winner(self.board) if winning_side is not None: winner = self.player(winning_side) if self.logger: self.logger.info("{2}\nGame over: {0} win ({1})".format( rules.side_name(winning_side), type(winner).__name__, rules.board_str(self.board))) # Return the side of the winning player return winning_side elif rules.board_full(self.board): # The board is full so the game concluded with a draw if self.logger: self.logger.info("{0}\nGame over: Draw".format( rules.board_str(self.board))) # Return None for a draw return None # Request a move from the player move = player.move(self.board.copy()) # Apply the move if it is valid if rules.valid_move(self.board, move): self.board[move] = player.side else: if self.logger: self.logger.fatal("Invalid move") raise ValueError("Not a valid move: {0}".format(move))
def mcts(self, board): # Start at tree root (current actual state) current_node = self.root_node current_player = self.side # Select while current_node.child_nodes and not current_node.untried_moves: # This node has been fully expanded (no untried moves) and is # not terminal so use UCB1 to select a child and descend tree ucb1 = lambda child: self.ucb1_score(child, current_player) child_nodes = sorted(current_node.child_nodes.values(), key=ucb1) # Choose move with highest UCB1 score after sorting current_node = child_nodes[-1] # Swap players current_player = -current_player # Expand / rollout if current_node.untried_moves != []: # Now do a random playout since we don't have any # information from this move on while True: # Check for terminal state winner = rules.winner(current_node.state) if winner or rules.board_full(current_node.state): break # There are untried moves so pick one at random move = current_node.untried_moves.pop( random.randrange(len(current_node.untried_moves))) move = tuple(move) # Note that usually only the first new move is added to the # tree (i.e. one node per iteration) possibly to save space, # not sure yet # Add new node to the tree and remove from untried moves new_board = current_node.state.copy() new_board[move] = current_player # apply the move current_node.child_nodes[move] = UCTTreeNode( new_board, current_player, current_node) # Move down the tree current_node = current_node.child_nodes[move] # Swap players current_player = -current_player # Backpropagate # Terminal state reached so backpropagate result winner = rules.winner(current_node.state) while current_node: current_node.visits += 1 if winner == self.side: current_node.wins += 1 elif winner == -self.side: current_node.wins += 0 else: current_node.wins += 0.5 current_node = current_node.parent self.playout_count += 1
def minimax(self, board, player): """ Recursive method that returns the optimal next moves and their value. The depth of the current move in the tree is recorded so that the agent can favour moves that win quicker (or lose slower) when there are multiple moves with the same expected game result. Args: state (numpy.ndarray): two dimensional array representing the board state player (int): the side of the current player depth (int): the depth of the move Returns: result (int): the return value of the moves (100 - depth for a win, 0 for a draw or depth - 100 for a loss) optimal_moves ([(int, int)]): a list of the optimal next moves """ empty_cells = rules.empty_cells(board) # Choose default cell if board is empty to reduce processing time # if len(empty_cells) == board.size: # import numpy as np # return None, np.asarray([(0, 0)]) # Check if this move resulted in a win or draw (base case) winner = rules.winner(board) if winner is not None: if winner == self.side: # Player won so return score for a win return 1, None else: # Opponent won so return score for a loss return -1, None elif rules.board_full(board): # Board is full so return score for a draw return 0, None # Test each child move recursively and add results to the list results_list = [] for cell in empty_cells: # Make the move cell = tuple(cell) board[cell] = player # Get the value of this child move and add it to the results result, _ = self.minimax(board, -player) results_list.append(result) # Reverse the move board[cell] = rules.EMPTY if player is self.side: # Return best move for player from list of child moves max_score = max(results_list) max_inds = [ i for i, x in enumerate(results_list) if x == max_score ] optimal_moves = empty_cells[max_inds] return max_score, optimal_moves else: # Return worst move for opponent from list of child moves min_element = min(results_list) # move = tuple(empty_cells[results_list.index(min_element)]) # return min_element, move return min_element, None # don't need the actual move