def expand_leaf(parent_node: MCTSNode, state, board): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. state: The state of the game. Returns: The added child node. """ # The parent node can execute all the actions in the current state current_state = ravel_states(board, state, parent_node) parent_node.untried_actions = board.legal_actions(current_state) if len(parent_node.untried_actions) == 0: print("Cant expand leaf there are no possıble plays proceed.") parent_node.parent.wins = -inf parent_node.wins = -inf return None #select a random action that can be executed in that node #!!! Make this random. It is kinda random? p_action = parent_node.untried_actions.pop() #!!! action list might not be correct. #create a new node which would be the next state as a result of the chosen action new_node = MCTSNode(parent=parent_node, parent_action=p_action, action_list=parent_node.untried_actions) parent_node.child_nodes[p_action] = new_node return new_node
def expand_leaf(node, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. state: The state of the game. Returns: The added child node. ###################### Expansion Phase if unexplored child expand (random order reduces bias) if terminal state return """ ### testing syntax since using dict if node.untried_actions: # if the node has any untried actions # expand node # __init__(self, parent=None, parent_action=None, action_list=[]): move = choice(node.untried_actions) state.apply_move(move) new_node = MCTSNode(parent=node, parent_action=move, action_list=state.legal_moves) node.untried_actions.remove(move) new_node.parent = node new_node.parent_action = move new_node.action_list =state.legal_moves node.child_nodes[move] = new_node return new_node return None
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) # start at root root_node = MCTSNode(parent=None, parent_action=None) node = root_node root_node.untried_actions = fun_board.legal_actions(state) for step in range(num_nodes): sampled_game = state # Start at root node = root_node node.state = sampled_game node = traverse_nodes(node, sampled_game, identity_of_bot) leaf_node = expand_leaf(node, sampled_game) sampled_game = rollout(leaf_node.state) won = board.win_values(sampled_game) if won is None: won = False elif won[identity_of_bot] == 1: won = True else: won = False backpropagate(leaf_node, won) best_action = None best_ratio = 0 for action in root_node.child_nodes.keys(): child_node = root_node.child_nodes[action] ratio = child_node.wins / child_node.visits if ratio >= best_ratio: best_ratio = ratio best_action = action # global root_node # root_node = root_node.child_nodes[best_action] if best_action is None: print(node) print(best_action) return best_action
def think(state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: state: The state of the game. Returns: The action to be taken. """ #''' start = time() time_elapsed = 0 #''' def get_result (sampled_game): reds, blues = sampled_game.score.get('red', 0), sampled_game.score.get('blue', 0) result = reds - blues if identity_of_bot == 'red' else blues - reds return result identity_of_bot = state.player_turn root_node = MCTSNode(parent=None, parent_action=None, action_list=state.legal_moves) while time_elapsed < 10: #for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state.copy() # Start at root node = root_node # Do MCTS - This is all you! node = traverse_nodes(node, sampled_game, identity_of_bot) node = expand_leaf(node, sampled_game) rollout(sampled_game) backpropagate(node, get_result(sampled_game)) time_elapsed = time() - start # Make choice based on tree choice = make_choice(root_node, state, identity_of_bot) action = choice.parent_action # Write tree to file for time testing (Extra Credit Assignment) file = open('mcts_modified.out', 'a') file.write(root_node.tree_to_string(horizon=100, indent=1)) return action # Return an action, typically the most frequently used action (from the root) or the action with the best # estimated win rate.
def expand_leaf(node, board, state): new_move = choice(node.untried_actions) board.next_state(state,new_move) new_node = MCTSNode(parent=node, parent_action=new_move, action_list=board.legal_actions(state)) node.child_nodes[new_move] = new_node return new_node pass
def expand_leaf(node, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. state: The state of the game. Returns: The added child node. """ new_node = node # Checking to make sure there are still untried actions if node.untried_actions: # Randomly choose untried action move = choice(node.untried_actions) # Apply the move to the game state state.apply_move(move) # Make a new node with the move and the game state new_node = MCTSNode(node, move, state.legal_moves) # Append the new node to the tree node.child_nodes[move] = new_node # Remove the action from the list of untried actions node.untried_actions.remove(move) return new_node pass
def think(game, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Returns the actions to be taken. """ #print(game.display(state)) identity_of_bot = game.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=game.legal_actions(state)) for step in range(num_nodes): sampled_game = state # Copy the game for sampling a playthrough node = root_node # Start at root while node.untried_actions == [] and node.child_nodes != {}: node = traverse_nodes(node, game, sampled_game, identity_of_bot) sampled_game = game.next_state(sampled_game, node.parent_action) if node.untried_actions != []: node = expand_leaf(node, game, sampled_game) sampled_game = game.next_state(sampled_game, node.parent_action) #print(sampled_game) points = rollout(game, sampled_game, identity_of_bot) result = points[identity_of_bot] backpropagate(node, result) choice = sorted(root_node.child_nodes.values(), key=lambda c: c.visits)[-1].parent_action print("MCTS bot ", identity_of_bot, " picking ", choice) return choice
def expand_leaf(node, board, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. board: The game setup. state: The state of the game. Returns: The added child node. """ if len(node.untried_actions) == 0: return None action = choice(node.untried_actions) state = board.next_state(state, action) act_list = board.legal_actions(state) new_node = MCTSNode(parent=node, parent_action=action, action_list=act_list, player=board.current_player(state)) node.child_nodes[action] = new_node node.untried_actions.remove(action) return new_node
def expand_leaf(node, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. state: The state of the game. Returns: The added child node. """ # Make sure there are still actions to be taken # if node.untried_actions!=[]: if node.untried_actions: move = choice(node.untried_actions) state.apply_move(move) node.untried_actions.remove(move) new_node = MCTSNode(node, move, node.untried_actions) node.child_nodes[move] = new_node return new_node else: return node pass
def expand_leaf(node, board, state): # expansion """ Adds new leaves to the tree by creating a new child node for the given node. Every possible move is being "simulated" here. Each possible move is created as a new state in the tree Args: node: The node for which a child will be added. board: The game setup. state: The state of the game. Returns: All added child nodes. """ nodes = [] while len(node.untried_actions) != 0: action = choice(node.untried_actions) node.untried_actions.remove(action) new_state = board.next_state(state, action) new_node = MCTSNode(node, action, board.legal_actions(new_state)) node.child_nodes[action] = new_node nodes.append(new_node) return nodes
def expand_leaf(node, board, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. board: The game setup. state: The state of the game. Returns: The added child node. """ if not node.untried_actions: if not node.child_nodes: return node, state else: print( "error: node was traversed to without untried actions, but with child nodes" ) random_move = choice(node.untried_actions) state = board.next_state(state, random_move) next_node = MCTSNode(parent=node, parent_action=random_move, action_list=board.legal_actions(state)) # remove the move from list of untried moves node.untried_actions.remove(random_move) node.child_nodes[random_move] = next_node return next_node, state
def expand_leaf(node, board, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. board: The game setup. state: The state of the game. Returns: The added child node. """ try: action = node.untried_actions.pop( ) # tries to pop() an action from the list except: return node # if list is empty, return node else: state = board.next_state(state, action) new_node = MCTSNode( parent=node, parent_action=action, action_list=board.legal_actions( state)) # create new leaf with list of legal actions # make pointer of child node equal the new_node node.child_nodes[action] = new_node return new_node
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) actions = board.legal_actions(state) leaves = [] sampled_game = state node = root_node flag = False for step in range(num_nodes): while len(node.untried_actions) == 0: if len(leaves) == 0: flag = True break node = leaves[0] leaves.remove(node) sampled_game = nodeState(node, state, board) if flag: break # Do MCTS - This is all you! leaf = traverse_nodes(node, board, sampled_game, identity_of_bot) leaves.append(leaf) for leaf in leaves: leafState = nodeState(leaf, state, board) score = rollout(board, leafState) myScore = score[identity_of_bot] if (myScore == 1): won = True else: won = False backpropagate(leaf, won) #print(len(root_node.child_nodes)) bestRatio = -1 bestAction = actions[0] for key in root_node.child_nodes: branch = root_node.child_nodes[key] #print(branch.wins,"/",branch.visits) if bestRatio <= 0 or branch.visits > 1: if branch.visits == 0: continue ratio = branch.wins/branch.visits if bestRatio < ratio: bestAction = key bestRatio = ratio #print(bestRatio," ", bestAction) # Return an action, typically the most frequently used action (from the root) or the action with the best # estimated win rate. return bestAction
def expand_leaf(node, board, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. board: The game setup. state: The state of the game. Returns: The added child node. And the Updated state """ if not node.untried_actions: # empty list of untried actions if not node.child_nodes: # no children return node, state else: # should be terminal node. Can't have children print( "error: node without untried actions and children is not expandable" ) next_move = choice(node.untried_actions) # makes a random choice state = board.next_state(state, next_move) # updates state with new action available_actions = board.legal_actions( state) # get the next set of available actions new_child = MCTSNode(parent=node, parent_action=next_move, action_list=available_actions) node.untried_actions.remove( next_move) # removes the random choice from tried choices node.child_nodes[ next_move] = new_child # and declares at that index in child_nodes as the new node return new_child, state
def expand_leaf(node, board, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. board: The game setup. state: The state of the game. Returns: The added child node. """ # Create new node from parent node # Can choose action randomly # Action list comes from node.untried_actions? parent_node = node actions1 = board.legal_actions(state) state2 = board.next_state(state, actions1[0]) actions2 = board.legal_actions(state2) #print(actions[0]) # print("this is the length before ",len(parent_node.child_nodes) ) child_node = MCTSNode(parent=parent_node, parent_action=actions1[0], action_list=board.legal_actions(state2)) try: parent_node.child_nodes[actions2[0]] = child_node except: return child_node parent_node.child_nodes[actions1[0]] = child_node print("it broke", actions2, "also :", actions1) # print("this is the length after",len(parent_node.child_nodes) ) return child_node
def think(state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: state: The state of the game. Returns: The action to be taken. """ identity_of_bot = state.player_turn root_node = MCTSNode(parent=None, parent_action=None, action_list=state.legal_moves) for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state.copy() # Start at root node = root_node # Do MCTS - This is all you! # Select v1 = traverse_nodes(node, sampled_game, identity_of_bot) # Expand delta = expand_leaf(v1, sampled_game) # Rollout rollout(sampled_game) # Iterator for backpropogate and win result = 0 if identity_of_bot == sampled_game.winner: result = 1 backpropagate(delta, result) # Return an action, typically the most frequently used action (from the root) or the action with the best # estimated win rate return max(root_node.child_nodes.values(), key=lambda c: c.visits).parent_action
def expand_leaf(node, board, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. board: The game setup. state: The state of the game. Returns: The added child node. """ #new_node.parent = node # Parent node to this node #new_node.parent_action = node.untried_actions.pop(0) # The move that got us to this node - "None" for the root node. #board.next_state(state, new_node.parent_action) #new_node.child_nodes = {} # Action -> MCTSNode dictionary of children #new_node.untried_actions = board.legal_actions(state) # Yet unexplored actions #new_node.wins = 0 #new_node.visits = 0 action = node.untried_actions.pop(0) state = board.next_state(state, action) new_node = MCTSNode(node, action, board.legal_actions(state)) node.child_nodes[new_node.parent_action] = new_node return new_node
def expand_leaf(node, board, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. board: The game setup. state: The state of the game. Returns: The added child node. """ #store and remove a random action from node that is giving birth if node.untried_actions: #print("Debugging untried actions:", len(node.untried_actions), node.untried_actions) pa = choice(node.untried_actions) node.untried_actions.remove(pa) #get the action list for your new child node al = board.legal_actions(board.next_state(state, pa)) #print("expand_leaf", al) new_node = MCTSNode(node, pa, al) #print("new_node.untried_actions:", new_node.untried_actions) #update parent nodes child dict node.child_nodes[pa] = new_node return new_node else: return node
def expand_leaf(node, board, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. board: The game setup. state: The state of the game. Returns: The added child node. """ #if ending, return the node if board.is_ended(state): # print("expand ended") return node, state #a random action from this node #print("expand") random_action = choice(node.untried_actions) #remove the action from untried actions because already made the action node.untried_actions.remove(random_action) ######### update the state ########## state = board.next_state(state, random_action) ############################################## #create the node child = MCTSNode(parent=node, parent_action=random_action, action_list=board.legal_actions(state)) # add child node in the tree node.child_nodes[random_action] = child return child, state
def expand_leaf(node, board, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. board: The game setup. state: The state of the game. Returns: The added child node. """ # if does not have untried actions check to see if it has children if (not node.untried_actions): if (not node.child_nodes): return node, state # try an untried action in current node random_action = choice(node.untried_actions) state = board.next_state(state, random_action) next_node = MCTSNode(parent = node, parent_action = random_action, action_list = board.legal_actions(state)) # remove untried action from the node node.untried_actions.remove(random_action) # set new child as a leaf of current node node.child_nodes[random_action] = next_node # return the new leaf node and the state return next_node, state
def expand_leaf(node, board, state, child_action, test): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. board: The game setup. state: The state of the game. Returns: The added child node. """ # Changes state of board to the next state state = board.next_state(state, child_action) # Creates a child node child_node = MCTSNode(parent=node, parent_action=child_action, action_list=board.legal_actions(state)) # Setting child node's untried actions. #child_node.untried_actions = #node_to_add_to.child_nodes[new_child] = #print(child_node.parent_action) #print(test) #print(board.display(state,None)) #time.sleep(0.2) return child_node
def expand_leaf(node, board, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. board: The game setup. state: The state of the game. Returns: The added child node. """ #if the node is a deadend, don't expand! if not node.untried_actions: return node, state # arbitrarily pick action from the node random_action = choice(node.untried_actions) # find all possible actions after that action is made # NOTE - board is now changed, bc move was tried and board is a reference state = board.next_state(state, random_action) possible_actions = board.legal_actions(state) # make tha fookin' node child_node = MCTSNode(node, random_action, possible_actions) # adjust parent node's untried action list and child node dict node.child_nodes[random_action] = child_node node.untried_actions.remove(random_action) return child_node, state
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # Start at root node = root_node # Do MCTS - This is all you! # Return an action, typically the most frequently used action (from the root) or the action with the best # estimated win rate. return None
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ # Initialize variables identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # Start at root node = root_node # Traverse tree until leaf is reached, get new state leaf, new_state = traverse_nodes(node, board, sampled_game, identity_of_bot) # If the reached leaf is not a game ending state, expand the tree if not board.is_ended(new_state): child = expand_leaf(leaf, board, new_state) new_state = board.next_state(new_state, child.parent_action) else: child = leaf # Simulate possible outcome for leaf won = rollout(board, new_state, identity_of_bot) # Backpropogate simulation results backpropagate(child, won) best_UCT = 0 best_children = [] # Choose best child depending on UCT calculation for key, child in root_node.child_nodes.items(): child_UCT = child.wins / float(child.visits) if child_UCT == best_UCT: best_children.append(child) elif child_UCT > best_UCT: best_children = [child] best_UCT = child_UCT best_child = choice(best_children) print("MCTS vanilla picking {} with ratio {}".format( best_child.parent_action, best_UCT)) return best_child.parent_action
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) legal = False for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # Start at root node = root_node # Do MCTS - This is all you! won = False while node.untried_actions == [] and node.child_nodes != {}: node.visits+=1 node = traverse_nodes(node, board, sampled_game, identity_of_bot) if node.parent_action!=None: sampled_game = board.next_state(state, node.parent_action) if node.untried_actions != []: node = expand_leaf(node, board, sampled_game) node = traverse_nodes(node,board, sampled_game,identity_of_bot) sampled_game = board.next_state(state, node.parent_action) sampled_game = rollout(board, sampled_game) point = board.points_values(sampled_game)[identity_of_bot] if point == 1: won = True backpropagate(node, won) if board.is_ended(state) == True: break # Return an action, typically the most frequently used action (from the root) or the action with the best # estimated win rate. actionValue = 0 actionTake = None for action in root_node.child_nodes.keys(): test_node = root_node.child_nodes[action] if test_node.visits == 0: value = 0 else: value = test_node.wins/test_node.visits if value > actionValue: actionTake = action actionValue = value return actionTake
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) for _ in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # Start at root node = root_node # Do MCTS - This is all you! node = traverse_nodes(node, board, sampled_game, identity_of_bot) #update state with actions taken to select selected_node selected_node = node select_actions = [] while selected_node.parent: select_actions.append(selected_node.parent_action) selected_node = selected_node.parent select_actions.reverse() for action in select_actions: sampled_game = board.next_state(sampled_game, action) #handle possible selection of terminal node if not node.untried_actions: won = board.points_values(sampled_game)[1] else: #expand from selection node = expand_leaf(node, board, sampled_game) #update simulated state sampled_game = board.next_state(sampled_game, node.parent_action) # simulate game from new node sampled_game = rollout(board, sampled_game) won = board.points_values(sampled_game)[1] # update tree backpropagate(node, won) # Return an action, typically the most frequently used action (from the root) or the action with the best # estimated win rate. best_winrate = -inf if identity_of_bot == 1: sign = 1 else: sign = -1 for action, child in root_node.child_nodes.items(): child_winrate = (child.wins/child.visits)*sign if child_winrate > best_winrate: best_action = action best_winrate = child_winrate return best_action
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # reset state to exclude newly expanded leaf # Start at root node = root_node # Do MCTS - This is all you! # Selection leaf = traverse_nodes(node, board, sampled_game, identity_of_bot) # current leaf # Expansion new_leaf = expand_leaf(leaf, board, sampled_game) # expand to a new leaf sampled_game = board.next_state(sampled_game, new_leaf.parent_action) # Rollout if not board.is_ended(sampled_game): rollout(board, sampled_game) # play the game # who wins score = board.points_values(sampled_game) winner = 'draw' if score is not None: if score[1] == 1: winner = 1 elif score[2] == 1: winner = 2 if winner is identity_of_bot: i_won = 1 else: i_won = 0 backpropagate(leaf, i_won) # back up using i_won condition # Return an action, typically the most frequently used action (from the root) or the action with the best # estimated win rate. best_child = max( root_node.child_nodes.items(), key=lambda item: item[1].visits)[1] # most frequently visited best_move = best_child.parent_action return best_move
def think(state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: state: The state of the game. Returns: The action to be taken. """ identity_of_bot = state.player_turn root_node = MCTSNode(parent=None, parent_action=None, action_list=state.legal_moves) for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state.copy() # Start at root node = root_node # Do MCTS - This is all you! #Traversal leaf, sampled_game = traverse_nodes(node, sampled_game, identity_of_bot) # Expand and roll out unless terminal if len(leaf.untried_actions) > 0: #Expansion new_node, sampled_game = expand_leaf(leaf, sampled_game) #Rollout won = rollout(sampled_game, identity_of_bot) else: new_node = leaf if sampled_game.winner == identity_of_bot: won = True else: won = False #Backpropagate backpropagate(new_node, won) best_action = None best_wins = 0 for action, child in root_node.child_nodes.items(): if child.wins > best_wins: best_wins = child.wins best_action = action #Prospects are bad... if best_action == None: best_action = choice(list(root_node.child_nodes.keys())) # Return an action, typically the most frequently used action (from the root) or the action with the best # estimated win rate. return best_action
def think(state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: state: The state of the game. Returns: The action to be taken. """ start = time() time_elapsed = 0 def get_result (sampled_game): if sampled_game.winner == identity_of_bot: return 1 if sampled_game.winner == 'tie': return 0.5 if sampled_game.winner != identity_of_bot: return -1 else: return -1 identity_of_bot = state.player_turn root_node = MCTSNode(parent=None, parent_action=None, action_list=state.legal_moves) while time_elapsed < 10: #for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state.copy() # Start at root node = root_node # Do MCTS - This is all you! node = traverse_nodes(node, sampled_game, identity_of_bot) node = expand_leaf(node, sampled_game) rollout(sampled_game) backpropagate(node, get_result(sampled_game)) time_elapsed = time() - start # Make choice based on tree choice = make_choice(root_node, state, identity_of_bot) action = choice.parent_action # Write tree to file for time testing (Extra Credit Assignment) file = open('mcts_vanilla.out', 'a') file.write(root_node.tree_to_string(horizon=100, indent=1)) return action
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) step = 0 global explore_faction while step < num_nodes: # Do MCTS - This is all you! leaf_node, sampled_game = traverse_nodes(root_node, board, state, identity_of_bot) new_nodes = expand_leaf(leaf_node, board, sampled_game) if len(new_nodes) == 0: break step += len(new_nodes) done_rollouts = {} for roll_node in new_nodes: if tuple(sorted( roll_node.untried_actions)) in done_rollouts.keys(): backpropagate( roll_node, done_rollouts[tuple(sorted(roll_node.untried_actions))]) else: won = rollout( board, board.next_state(sampled_game, roll_node.parent_action))[identity_of_bot] backpropagate(roll_node, won) if won == 1: explore_faction += .25 # if a game is won, seek out games along this path. elif won == 0 and explore_faction > 0.25: explore_faction -= 0.25 # if a game is lost, seek fewer games from this path. done_rollouts[tuple(sorted(roll_node.untried_actions))] = won # Return an action, typically the most frequently used action (from the root) or the action with the best # estimated win rate. maximum = -1 max_node = root_node for child in root_node.child_nodes.values(): # print("Score: ", child.wins / child.visits) if child is not None and (child.wins / child.visits) > maximum: max_node = child maximum = (child.wins / child.visits) # print("Max Node: ", max_node, " - ", max_node.wins/max_node.visits) return max_node.parent_action
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state), player=identity_of_bot) for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # Start at root node = root_node # Do MCTS - This is all you! node = traverse_nodes(node, board, sampled_game, identity_of_bot) sampled_game = update_state(node, board, state) node = expand_leaf(node, board, sampled_game) sampled_game = update_state(node, board, state) if node: won = rollout(board, sampled_game) backpropagate(node, won, identity_of_bot) else: break # With the completed tree, get the action with the best rate. best_action = None best_rate = float('-inf') highest_visits = 0 for child in root_node.child_nodes: r = root_node.child_nodes[child] child_wins = r.visits - r.wins if r.player != identity_of_bot else r.wins child_rate = child_wins / root_node.child_nodes[child].visits child_visits = root_node.child_nodes[child].visits if child_rate > best_rate and child_visits >= highest_visits: best_action = child best_rate = child_rate #print("BEST: " + str(best_rate)) highest_visits = child_visits print("MCTS Vanilla bot " + str(identity_of_bot) + " picking %s with expected win rate %f" % (str(best_action), best_rate)) return best_action
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) max_child_visits = 0 # May belong inside of below loop, took it out during testing, never got that far tho selected_action = None next_state = state for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # Start at root node = root_node # max_child_visits = 0 # selected_action = None # Do MCTS - This is all you! child_node = traverse_nodes(node, sampled_game, identity_of_bot, board) if child_node.parent != None: check_win_state = board.next_state(next_state, child_node.parent_action) has_won = board.is_ended(check_win_state) else: has_won = board.is_ended(next_state) if not has_won: expanded_node = expand_leaf(child_node, board, next_state) next_state = board.next_state(next_state, expanded_node.parent_action) win_dict = rollout(next_state, board) backpropagate(expanded_node, win_dict) continue # Return an action, typically the most frequently used action (from the root) or the action with the best # estimated win rate. for children in root_node.child_nodes.values(): if children.visits > max_child_visits: max_child_visits = children.visits selected_action = children.parent_action micro_actions[(selected_action[0], selected_action[1])].append( (selected_action[2], selected_action[3])) ##print("test", micro_actions) ##print("selected action", selected_action) return selected_action
def onClick(self, event): x,y = self.get_intersection(event.x, event.y) if (x != -1 and y != -1): board_coords = self.get_board_coordinates(x, y) if not self.first_move: board = self.board_state.get_board() if self.first_move or board[board_coords[1]][board_coords[0]] == 0: # player is able to place piece if self.first_move: self.first_move = False self.placed_pieces.append(Piece(board_coords[1], board_coords[0], self.player_turn, self.placePiece(x, y), self)) new_board = [[0] * BOARD_SIZE for _ in range(BOARD_SIZE)] new_board[board_coords[1]][board_coords[0]] = BLACK self.board_state = BoardState(grid=new_board, recent_move=(board_coords[1], board_coords[0]), turn=BLACK, search_breadth=1) self.past_board_states.append(self.board_state) self.player_turn = (-1)*self.player_turn else: self.placed_pieces.append(Piece(board_coords[1], board_coords[0], self.player_turn, self.placePiece(x, y), self)) self.past_board_states.append(self.board_state) self.board_state = self.board_state.play(board_coords[1],board_coords[0]) self.player_turn = (-1)*self.player_turn possible_winner = self.board_state.get_winner() if possible_winner != 0: self.winner(possible_winner) else: ai_mcts_node = MCTSNode(self.board_state) ai_mcts_tree = MCTSTree(ai_mcts_node) next_state = ai_mcts_tree.best_move(time_cutoff=WAIT_TIME) self.board_state = next_state ai_move = next_state.get_recent_move() print("("+str(ai_move[0])+", "+str(ai_move[1])+")") self.placed_pieces.append(Piece(ai_move[1], ai_move[0], self.player_turn, self.placePiece((ai_move[1]+1)*self.grid_interval, (ai_move[0]+1)*self.grid_interval), self)) self.past_board_states.append(self.board_state) self.player_turn = (-1) * self.player_turn possible_winner = self.board_state.get_winner() if possible_winner != 0: self.winner(possible_winner)