def move(self, board): empty_cells = rules.empty_cells(board) # Check if any of the empty cells represents a winning move for cell in empty_cells: cell = tuple(cell) new_board = board.copy() new_board[cell] = self.side if rules.winning_move(new_board): return cell # Check if any of the empty cells represents a winning move for the # other player, if so block it for cell in empty_cells: cell = tuple(cell) new_board = board.copy() new_board[cell] = -self.side if rules.winning_move(new_board): if self.logger: self.logger.debug("Blocked {0}".format(cell)) return cell else: # Otherwise pick a random cell return tuple(empty_cells[random.randint(0, len(empty_cells) - 1)])
def test_empty_cells(self): board = np.asarray([[1, -1, -1], [-1, 1, 1], [1, -1, 1]]) expected = [] empty_cells = rules.empty_cells(board) self.assertEqual(list(empty_cells), expected) board = np.asarray([[1, 1, 1], [1, 1, 1], [0, 1, 1]]) expected = [[2, 0]] empty_cells = rules.empty_cells(board) np.testing.assert_array_equal(empty_cells, expected) board = np.asarray([[1, 0, 1], [1, 1, 1], [0, 1, 1]]) expected = [[0, 1], [2, 0]] empty_cells = rules.empty_cells(board) np.testing.assert_array_equal(empty_cells, expected) board = np.asarray([[1, 1, 1], [0, 0, 0], [-1, 1, 1]]) expected = [[1, 0], [1, 1], [1, 2]] empty_cells = rules.empty_cells(board) np.testing.assert_array_equal(empty_cells, expected)
def mcts(self, board): max_time = time.time() + self.time_budget root_node = TreeNode(board) playout_count = 0 while time.time() < max_time and playout_count < self.max_playouts: # Start at tree root (current actual state) current_node = root_node current_player = self.side while True: # Check for terminal state winner = rules.winner(current_node.state) if winner or rules.board_full(current_node.state): break # Pick a random move empty_cells = rules.empty_cells(current_node.state) move = tuple(random.choice(empty_cells)) # Add to tree if not present if move not in current_node.child_nodes.keys(): # If not, create a TreeNode for it new_board = current_node.state.copy() new_board[move] = current_player current_node.child_nodes[move] = TreeNode( new_board, current_node) current_node = current_node.child_nodes[move] # Swap players current_player = -current_player # Terminal state reached so backpropagate result if winner == self.side: result = 1.0 elif winner == -self.side: result = 0.0 else: result = 0.5 while current_node is not root_node: current_node.visits += 1 current_node.wins += result current_node = current_node.parent playout_count += 1 print "Number of MCTS playouts:", playout_count self.root_node = current_node # Return move with highest score best_move = root_node.best_move() return best_move
def valid_move(board, move): """ Returns whether the move is valid for the given board, i.e. whether it is one of the empty cells. Args: board (numpy.ndarray): two dimensional array representing the game board move ((int, int)): tuple with the coordinates of the new move (x, y) Returns: bool: True if the move is valid, False otherwise """ return list(move) in rules.empty_cells(board).tolist()
def move(self, board): empty_cells = rules.empty_cells(board) # Check if any of the empty cells represents a winning move for cell in empty_cells: cell = tuple(cell) new_board = board.copy() new_board[cell] = self.side if rules.winning_move(new_board): return cell else: # Otherwise pick a random cell return tuple(empty_cells[random.randint(0, len(empty_cells) - 1)])
def move(self, board): empty_cells = rules.empty_cells(board) # Check if any of the empty cells represents a winning move for cell in empty_cells: cell = tuple(cell) new_board = board.copy() new_board[cell] = self.side if rules.winning_move(new_board, cell): return cell else: # Otherwise pick a random cell return tuple(empty_cells[random.randint(0, len(empty_cells) - 1)])
def move_values(self, board): """ Returns a list of the possible moves for the given board with the value of each move. Args: board (numpy.ndarray): two dimensional array representing the board Returns: [[cell, value]]: a list of cell-value pairs """ values = [] for cell in rules.empty_cells(board): values.append([cell, self.move_value(cell, board)]) return np.asarray(values)
def __init__(self, board, side, parent=None): """ Constructor. Args: board (numpy.ndarray): two dimensional array representing the game board side (int): the player side, defined in the game rules parent (int): id of the parent of this node or None """ self.id = UCTTreeNode.new_id( ) # get a unique number to identify the node self.state = board self.side = side self.parent = parent self.visits = 0 self.wins = 0 self.ucb1_score = None self.untried_moves = rules.empty_cells(self.state).tolist() self.child_nodes = {}
def move(self, board): # Look up the possible moves in the state values list empty_cells = rules.empty_cells(board) possible_moves = [] # [[cell, value]] for cell in empty_cells: possible_moves.append([cell, self.move_value(cell, board)]) # Sort moves by value (last element has highest value) possible_moves = np.asarray(possible_moves) possible_moves = possible_moves[possible_moves[:, 1].argsort()] # Choose move behaviour based on the bias probability # TODO: adjust bias down over time if random.random() < self.BIAS: self.state = self.EXPLORING else: self.state = self.EXPLOITING # Choose either highest value (exploit) or a random other cell (explore) if self.state == self.EXPLOITING or len(possible_moves) == 1: # Find the highest value and get all free cells with this value, # then choose one at random best_value = possible_moves[-1][1] best_cells = [x[0] for x in possible_moves if x[1] == best_value] i = random.randint(0, len(best_cells) - 1) cell = tuple(best_cells[i]) elif self.state == self.EXPLORING: # Choose a random cell that does not have the highest value # TODO: weight other moves according to value? i = random.randint(0, len(possible_moves) - 2) cell = tuple(possible_moves[i][0]) else: raise ValueError("State is unexpected value: {0}".format( self.state)) # Record move state for later move_state = board.copy() move_state[cell] = self.side self.move_states.append(move_state) return cell
def move(self, board): # Look up the possible moves in the state values list empty_cells = rules.empty_cells(board) possible_moves = [] # [[cell, value]] for cell in empty_cells: possible_moves.append([cell, self.move_value(cell, board)]) # Sort moves by value (last element has highest value) possible_moves = np.asarray(possible_moves) possible_moves = possible_moves[possible_moves[:, 1].argsort()] # Choose move behaviour based on the bias probability # TODO: adjust bias down over time if random.random() < self.BIAS: self.state = self.EXPLORING else: self.state = self.EXPLOITING # Choose either highest value (exploit) or a random other cell (explore) if self.state == self.EXPLOITING or len(possible_moves) == 1: # Find the highest value and get all free cells with this value, # then choose one at random best_value = possible_moves[-1][1] best_cells = [x[0] for x in possible_moves if x[1] == best_value] i = random.randint(0, len(best_cells) - 1) cell = tuple(best_cells[i]) elif self.state == self.EXPLORING: # Choose a random cell that does not have the highest value # TODO: weight other moves according to value? i = random.randint(0, len(possible_moves) - 2) cell = tuple(possible_moves[i][0]) else: raise ValueError("State is unexpected value: {0}".format(self.state)) # Record move state for later move_state = board.copy() move_state[cell] = self.side self.move_states.append(move_state) return cell
def move(self, board): empty_cells = rules.empty_cells(board) # Check if any of the empty cells represents a winning move for cell in empty_cells: cell = tuple(cell) new_board = board.copy() new_board[cell] = self.side if rules.winning_move(new_board, cell): return cell # Check if any of the empty cells represents a winning move for the # other player, if so block it for cell in empty_cells: cell = tuple(cell) new_board = board.copy() new_board[cell] = -self.side if rules.winning_move(new_board, cell): if self.logger: self.logger.debug("Blocked {0}".format(cell)) return cell else: # Otherwise pick a random cell return tuple(empty_cells[random.randint(0, len(empty_cells) - 1)])
def minimax(self, board, player): """ Recursive method that returns the optimal next moves and their value. The depth of the current move in the tree is recorded so that the agent can favour moves that win quicker (or lose slower) when there are multiple moves with the same expected game result. Args: state (numpy.ndarray): two dimensional array representing the board state player (int): the side of the current player depth (int): the depth of the move Returns: result (int): the return value of the moves (100 - depth for a win, 0 for a draw or depth - 100 for a loss) optimal_moves ([(int, int)]): a list of the optimal next moves """ empty_cells = rules.empty_cells(board) # Choose default cell if board is empty to reduce processing time # if len(empty_cells) == board.size: # import numpy as np # return None, np.asarray([(0, 0)]) # Check if this move resulted in a win or draw (base case) winner = rules.winner(board) if winner is not None: if winner == self.side: # Player won so return score for a win return 1, None else: # Opponent won so return score for a loss return -1, None elif rules.board_full(board): # Board is full so return score for a draw return 0, None # Test each child move recursively and add results to the list results_list = [] for cell in empty_cells: # Make the move cell = tuple(cell) board[cell] = player # Get the value of this child move and add it to the results result, _ = self.minimax(board, -player) results_list.append(result) # Reverse the move board[cell] = rules.EMPTY if player is self.side: # Return best move for player from list of child moves max_score = max(results_list) max_inds = [ i for i, x in enumerate(results_list) if x == max_score ] optimal_moves = empty_cells[max_inds] return max_score, optimal_moves else: # Return worst move for opponent from list of child moves min_element = min(results_list) # move = tuple(empty_cells[results_list.index(min_element)]) # return min_element, move return min_element, None # don't need the actual move
def move(self, board): # Select an empty cell at random empty_cells = rules.empty_cells(board) return tuple(empty_cells[random.randint(0, len(empty_cells) - 1)])
def move(self, board): # Select the first empty cell empty_cells = rules.empty_cells(board) return tuple(empty_cells[0])