def play_as_mcts(batch_data, networks, config, connection): """ Play a batch of games as MCTS vs. MCTS. """ roots = create_roots(batch_data) # Get network evaluation from main process. data = pack_data_for_eval(batch_data, networks, roots) connection.send(("evaluate", data)) policies, values = connection.recv() log(f"Root policies:\n{policies}") log(f"OG root values: {values}") values = expand_nodes(batch_data, roots, policies, values) prepare_actions(batch_data, roots) for _ in range(config.MCTS_ITERATIONS): selected_nodes = select_nodes(batch_data, roots) data = pack_data_for_eval(batch_data, networks, selected_nodes) connection.send(("evaluate", data)) policies, values = connection.recv() values = expand_nodes(batch_data, selected_nodes, policies, values) backprop_nodes(batch_data, selected_nodes, values) return roots
def __init__(self, game, playouts=None): super().__init__(game) if playouts: self.ITERATIONS = playouts else: self.ITERATIONS = Config.MCTS_ITERATIONS log("MCTS is using {} playouts.".format(self.ITERATIONS))
def execute_action(self, state): super.__doc__ log("Waiting for player input...") self.state = None self.gui.add_action_listener(self) while self.state is None and self.gui.active: sleep(0.1) return self.state
def execute_action(self, state): super.__doc__ actions = self.game.actions(state) index = int(uniform(0, len(actions))) chosen = actions[index] #self.game.store_random_statistics({a: uniform() for a in actions}) log("Random action: {}".format(chosen)) return self.game.result(state, chosen)
def __init__(self, game): GameAI.__init__(self, game) self.tpt = dict() # Transposition table. game_name = type(self.game).__name__ if game_name == "Latrunculi": self.MAX_DEPTH = 12 - self.game.size if self.game.size < 8 else 5 elif game_name == "Connect_Four": self.MAX_DEPTH = 13 - self.game.size elif game_name == "Othello": self.MAX_DEPTH = 15 - self.game.size log(f"Minimax is using a max search depth of {self.MAX_DEPTH}")
def softmax_sample(child_nodes, visit_counts, tempature=2.5): """ Perform softmax sampling on a set of nodes based on a probability distribution of their visit counts. """ sum_visits = sum(visit_counts) prob_visits = [(v/sum_visits) * tempature for v in visit_counts] exps = np.exp(prob_visits) log(f"Probabilities of softmax: {exps/sum(exps)}") return np.random.choice(child_nodes, p=exps/sum(exps))
def show_move(self, state, action): player_color = self.player_color(state.player) if action: source, dest = action.source, action.dest log("{} moved from {} to {}".format(player_color, source, dest)) self.draw_status_text("{} moved from {} to {}".format( player_color, source, dest)) else: log("{} has no moves, pass made.".format(player_color)) self.draw_status_text("{} passed".format(player_color)) result = self.game.result(state, action) self.update(result)
def __init__(self, game, playouts=Config.MCTS_ITERATIONS): super().__init__(game) if playouts is not None: self.ITERATIONS = playouts self.MAX_MOVES = 5000 elif self.game.size > 3: playout_options = [800, 200, 35, 20, 10, 5, 5] max_moves = [400, 1200, 1600, 2400, 5000, 5000, 5000] self.ITERATIONS = playout_options[self.game.size - 4] self.MAX_MOVES = max_moves[self.game.size - 4] log("MCTS is using {} playouts and {} max moves.".format( self.ITERATIONS, self.MAX_MOVES))
def getorigin(self, eventorigin): x = eventorigin.x y = eventorigin.y coords = self.field_clicked(x, y, self.state.board, self.left_space, self.top_space, self.board_field_size) # print("Coords: {}".format(coords)) if self.is_currentPlayer_piece( self.state.player, self.state.board[coords] ) and self.mouseclick_move_list == [] and self.has_legal_move(coords): self.mouseclick_move_list.append(coords) self.draw_status_text( "Selected source coords: ({})".format(coords)) elif self.game.action_type == "single": if self.is_legal_move(None, coords): self.make_move(self.state, Action(None, coords)) self.mouseclick_move_list.clear() else: if self.mouseclick_move_list == [] and self.is_currentPlayer_piece( self.state.player, int(self.state.board[coords[0], coords[1]] * -0.5)): self.make_move(self.state, Action(coords, coords)) elif len(self.mouseclick_move_list) == 1: if self.mouseclick_move_list[0] == coords: self.mouseclick_move_list.pop() elif self.is_currentPlayer_piece(self.state.player, self.state.board[coords]): self.mouseclick_move_list.pop() self.mouseclick_move_list.append(coords) else: if self.is_legal_move(self.mouseclick_move_list[0], coords): self.draw_status_text( "Selected destination coords: ({})".format(coords)) self.mouseclick_move_list.append(coords) self.make_move( self.state, Action(self.mouseclick_move_list[0], self.mouseclick_move_list[1])) self.mouseclick_move_list.clear() elif len(self.mouseclick_move_list) > 2: self.mouseclick_move_list.pop() log("mouseclick_move_list\n{}".format(self.mouseclick_move_list)) self.update(self.state)
def execute_action(self, state): super.__doc__ self.player = state.player actions = self.game.actions(state) if actions == [None]: return self.game.result(state, None) best_action = None highest_value = -float("inf") self.time_started = time() # Traverse possible actions, using minimax to calculate best action to take. for action in actions: result = self.game.result(state, action) value = self.minimax(result, self.MAX_DEPTH, False, -10000, 10000) if value > highest_value: highest_value = value best_action = action log("Minimax action: {} worth: {}".format(best_action, highest_value)) return self.game.result(state, best_action)
def execute_action(self, node): super.__doc__ best_node = self.choose_action(node) self.chosen_node = best_node log(f"Root: {node}") for n in node.children.values(): log(n.pretty_desc()) log("MCTS action: {}, q value: {}.".format(best_node.action, best_node.q_value)) self.game.store_search_statistics(node) return best_node.state
def execute_action(self, state): super.__doc__ log("MCTS is calculating the best move...") root_node = Node(state, None) # Perform iterations of selection, simulation, expansion, and back propogation. # After the iterations are done, the child of the root node with the highest # number of mean value (value/visits) are chosen as the best action. for _ in range(self.ITERATIONS): node = self.select(root_node) if node.visits > 0 and not self.game.terminal_test(node.state): # Expand tree from available actions. Select first expanded node as # new current and simulate an action from this nodes possible actions. actions = self.game.actions(node.state) self.expand(node, actions) node = node.children[ actions[0]] # Select first child of expanded Node. # Perform rollout, simulate till end of game and return outcome. value = self.rollout(root_node.state, node) self.back_propagate( node, -value if node.state.player == root_node.state.player else value) node = root_node for node in root_node.children.values(): log(node.pretty_desc()) best_node = max(root_node.children.values(), key=lambda n: n.visits) root_node = None log("MCTS action: {}, likelihood of win: {}%".format( best_node.action, int((best_node.mean_value * 50) + 50))) return best_node.state
def play_games(games, w_players, b_players, config, network_steps=None, gui=None, connection=None): """ Play a number of games to the end, with capabilities for playing as any type of agent and any type of game. Parameters: games - List of game objects to be played out, the state of the games (result and state history) are updated during the process. w_players - List of agents controlling the white pieces. b_players - List of agents controlling the black pieces. config - Config object with a variety of parameters to be used during the game. network_steps - Dictionary of game -> dict, mapping which game should target which generation of neural network. Only relevant if MCTS is used. gui - GUI object used to visualize the games, only available if batch-play is not active, meaning only one game is played. connection - Pipe object with connection to the main process. Used when requesting network evaluating among other things. """ # List of lists. Each containing a game to be played, # the current state for that game, the agent playing as player1, # and the agent playing as player 2. batch_data = [[ games[i], games[i].start_state(), w_players[i], b_players[i] ] for i in range(len(games))] total_games = len(games) counters = [0 for _ in games] # Counting amount of moves for each game. if gui is not None: sleep(1) # Update GUI, to clear board, if several games are played sequentially. gui.update(batch_data[0][1]) while batch_data: player = batch_data[0][2] if batch_data[0][0].player( batch_data[0][1]) else batch_data[0][3] time_turn = time() if is_mcts(player): # Run MCTS simulations. Get resulting root nodes. roots = play_as_mcts(batch_data, network_steps, config, connection) finished_games_indexes = [] for (i, (game, state, player_1, player_2)) in enumerate(batch_data): player = player_1 if game.player(state) else player_2 # execute_action receives a root node if player is MCTS, else it gives a state. state = player.execute_action( roots[i] if is_mcts(player) else state) batch_data[i][1] = state if gui is not None: if type(w_players).__name__ != "Human" and not state.player: sleep(config.GUI_AI_SLEEP) elif type(b_players).__name__ != "Human" and state.player: sleep(config.GUI_AI_SLEEP) gui.update(state) log(state) counters[i] = counters[i] + 1 if game.terminal_test( state) or counters[i] > config.LATRUNCULI_MAX_MOVES: finished_games_indexes.append(i) util = game.utility(state, True) game.terminal_value = util winner = "White" if util == 1 else "Black" if util == -1 else "Draw" log(f"Game over! Winner: {winner}") else: # Append state to game history, unless the state is terminal. game.history.append(state) turn_took = "{0:.3f}".format((time() - time_turn)) num_active = len(batch_data) num_moves = len(batch_data[0][0].history) name_1, name_2 = type(batch_data[0][2]).__name__, type( batch_data[0][3]).__name__ elems_removed = 0 # Removes games that are finished. for i in finished_games_indexes: batch_data.pop(i - elems_removed) elems_removed += 1 num_active -= elems_removed if connection: # Send logging information to main process if playing as MCTS. status = (f"Moves: {num_moves}. Active games: " + f"{num_active}/{total_games}. Turn took {turn_took} s") if name_1 != "MCTS" or name_2 != "MCTS": status += " - Eval vs. {}".format(name_1 if name_2 == "MCTS" else name_2) elif network_steps is not None: status += " - Eval vs Macro Networks" connection.send(("log", [status, getpid()]))
def notify(self, observable, *args, **kwargs): log("Got {}, {} {}".format(args, kwargs, observable)) self.state = observable.actions(self.state) self.update(self.state)