コード例 #1
0
        mc_game = HexGame(SIZE, player)
        mc = MCTS(mc_game,
                  MC_EXPLORATION_CONSTANT,
                  a_net=ANET,
                  epsilon=EPSILON)

        for i in tqdm(range(EPISODES + 1)):

            # No action needed to reach initial state
            action = None

            state = mc_game.get_simple_state()

            # Init Monte Carlo root
            root = Node(state, player, None, action,
                        mc_game.get_reversed_binary())

            while not actual_game.is_terminal_state():
                if i in DISPLAY_INDICES:
                    visualizer.draw(actual_game.get_state(), DISPLAY_DELAY)

                # Find the best move using MCTS
                new_root, prev_root_children = mc.tree_search(
                    root, MC_NUMBER_SEARCH_GAMES)

                # Distribution of visit counts along all arcs emanating from root
                D = [
                    child.visits / root.visits for child in prev_root_children
                ]

                # Add case to RBUF
コード例 #2
0
    def tournament(self):

        self.init_AI()

        for players in self.vs:
            print(
                f"Model {self.model_postfixes[players[0]]} vs. model {self.model_postfixes[players[1]]}"
            )
            starter = 1
            game = HexGame(self.size, starter)
            visualizer = HexMapVisualizer(game.board.cells.values(),
                                          True,
                                          self.size,
                                          game_type="hex")

            smarter_wins = 0

            for i in range(self.number_of_games):

                player = self.AI[players[i % 2]]
                strtr = player.name
                #print(f"Player {game.playing} (model {player.name}) is starting!")

                while not game.is_terminal_state():
                    #print(f"Player {player.name} moving")
                    if self.display:
                        visualizer.draw(game.get_state(), self.delay)

                    state = game.get_simple_state()
                    legal_moves = game.get_reversed_binary()

                    possible_states = game.generate_possible_child_states()

                    pred, idx = player.model.get_move(state, legal_moves)

                    if random() > (0 if self.display else 0.2):
                        best_index = idx
                    else:
                        best_index = np.random.choice(np.arange(len(pred)),
                                                      p=pred)

                    data = possible_states[best_index]

                    game.do_action(data["action"])

                    prev_player = player.name
                    player = self.change_turn(players, player)

                if self.display:
                    visualizer.draw(game.get_state(), self.delay * 3)
                #print(f"Model {strtr} started, and model {prev_player} (player {game.playing}) won!")

                smarter = self.model_postfixes[players[1]]
                if prev_player == smarter:
                    smarter_wins += 1

                #starter = 2 if starter == 1 else 1

                game.reset(starter, hard=True)

            print(
                f"Model {smarter} won {smarter_wins} out of {self.number_of_games} games ({smarter_wins/self.number_of_games*100}%)"
            )