MC_EXPLORATION_CONSTANT, a_net=ANET, epsilon=EPSILON) for i in tqdm(range(EPISODES + 1)): # No action needed to reach initial state action = None state = mc_game.get_simple_state() # Init Monte Carlo root root = Node(state, player, None, action, mc_game.get_reversed_binary()) while not actual_game.is_terminal_state(): if i in DISPLAY_INDICES: visualizer.draw(actual_game.get_state(), DISPLAY_DELAY) # Find the best move using MCTS new_root, prev_root_children = mc.tree_search( root, MC_NUMBER_SEARCH_GAMES) # Distribution of visit counts along all arcs emanating from root D = [ child.visits / root.visits for child in prev_root_children ] # Add case to RBUF RBUF.add(root.state, root.reversed_state, D)
def tournament(self): self.init_AI() for players in self.vs: print( f"Model {self.model_postfixes[players[0]]} vs. model {self.model_postfixes[players[1]]}" ) starter = 1 game = HexGame(self.size, starter) visualizer = HexMapVisualizer(game.board.cells.values(), True, self.size, game_type="hex") smarter_wins = 0 for i in range(self.number_of_games): player = self.AI[players[i % 2]] strtr = player.name #print(f"Player {game.playing} (model {player.name}) is starting!") while not game.is_terminal_state(): #print(f"Player {player.name} moving") if self.display: visualizer.draw(game.get_state(), self.delay) state = game.get_simple_state() legal_moves = game.get_reversed_binary() possible_states = game.generate_possible_child_states() pred, idx = player.model.get_move(state, legal_moves) if random() > (0 if self.display else 0.2): best_index = idx else: best_index = np.random.choice(np.arange(len(pred)), p=pred) data = possible_states[best_index] game.do_action(data["action"]) prev_player = player.name player = self.change_turn(players, player) if self.display: visualizer.draw(game.get_state(), self.delay * 3) #print(f"Model {strtr} started, and model {prev_player} (player {game.playing}) won!") smarter = self.model_postfixes[players[1]] if prev_player == smarter: smarter_wins += 1 #starter = 2 if starter == 1 else 1 game.reset(starter, hard=True) print( f"Model {smarter} won {smarter_wins} out of {self.number_of_games} games ({smarter_wins/self.number_of_games*100}%)" )