new_root, prev_root_children = mc.tree_search( root, MC_NUMBER_SEARCH_GAMES) # Distribution of visit counts along all arcs emanating from root D = [ child.visits / root.visits for child in prev_root_children ] # Add case to RBUF RBUF.add(root.state, root.reversed_state, D) root = new_root action = root.action actual_game.do_action(action) mc.game.do_action(action) root.reset() if i in DISPLAY_INDICES: visualizer.draw(actual_game.get_state(), DISPLAY_DELAY, show=True) state_batch, legal_moves, d_batch = RBUF.get_sample() ANET.train(state_batch, legal_moves, d_batch) # Mix starting players if MIXED_START:
def tournament(self): self.init_AI() for players in self.vs: print( f"Model {self.model_postfixes[players[0]]} vs. model {self.model_postfixes[players[1]]}" ) starter = 1 game = HexGame(self.size, starter) visualizer = HexMapVisualizer(game.board.cells.values(), True, self.size, game_type="hex") smarter_wins = 0 for i in range(self.number_of_games): player = self.AI[players[i % 2]] strtr = player.name #print(f"Player {game.playing} (model {player.name}) is starting!") while not game.is_terminal_state(): #print(f"Player {player.name} moving") if self.display: visualizer.draw(game.get_state(), self.delay) state = game.get_simple_state() legal_moves = game.get_reversed_binary() possible_states = game.generate_possible_child_states() pred, idx = player.model.get_move(state, legal_moves) if random() > (0 if self.display else 0.2): best_index = idx else: best_index = np.random.choice(np.arange(len(pred)), p=pred) data = possible_states[best_index] game.do_action(data["action"]) prev_player = player.name player = self.change_turn(players, player) if self.display: visualizer.draw(game.get_state(), self.delay * 3) #print(f"Model {strtr} started, and model {prev_player} (player {game.playing}) won!") smarter = self.model_postfixes[players[1]] if prev_player == smarter: smarter_wins += 1 #starter = 2 if starter == 1 else 1 game.reset(starter, hard=True) print( f"Model {smarter} won {smarter_wins} out of {self.number_of_games} games ({smarter_wins/self.number_of_games*100}%)" )