Esempio n. 1
0
 def simulation(self, node):
     """
     Leaf Evaluation - Estimating the value of a leaf node in the tree by doing a roll-out simulation using the
     default policy from the leaf node’s state to a final state.
     :return: int - The player who won the simulated game
     """
     current_node = node
     children = self.state_manager.get_child_nodes(current_node.state)
     player = node.player
     while len(children) != 0:
         # Use the default policy (random) to select a child
         current_node = random.choice(children)
         player = get_next_player(player)
         children = self.state_manager.get_child_nodes(current_node.state)
     winner = get_next_player(
         player)  # Winner was actually the prev player who made a move
     return int(winner == 1)
    def simulate(self):
        """
        Run G consecutive games (aka. episodes) of the self.game_type using fixed values for the game parameters:
        N and K for NIM, B_init for Ledge. When the G games have finished, your simulator must summarize the win-loss
        statistics. A typical summary (for G = 50) would be a simple statement such as: Player 1 wins 40 of 50 games
        (80%).
        """
        wins = 0  # Number of times player 1 wins

        # Actual games being played
        for episode in range(1, self.episodes + 1):
            logging.info("Episode: {}".format(episode))
            # The actual game being played this episode
            game = get_new_game(self.game_type,
                                self.game_config,
                                verbose=self.verbose)

            # For each game, a new Monte Carlo Search Tree is made
            mcts = MonteCarloSearchTree(self.game_type, self.game_config)
            state, player = game.get_current_state(), self.get_start_player()
            mcts.set_root(Node(state, None, player=player))

            # While the actual game is not finished
            while not game.is_winning_state():
                # Every time we shall select a new action, we perform M number of simulations in MCTS
                for _ in range(self.num_sim):
                    # One iteration of Monte Carlo Tree Search consists of four steps
                    # 1. Selection
                    leaf = mcts.selection()
                    # 2. Expand selected leaf node
                    sim_node = mcts.expansion(leaf)
                    # 3. Simulation
                    z = mcts.simulation(sim_node)
                    # 4. Backward propagation
                    mcts.backward(sim_node, z)

                # Now use the search tree to choose next action
                new_root = mcts.select_actual_action(player)

                # Perform this action, moving the game from state s -> s´
                game.perform_action(player, new_root.action)

                # Update player
                player = get_next_player(player)

                # Set new root of MCST
                mcts.set_root(new_root)

            # If next player is 2 and we are in a win state, player 1 got us in a win state
            if player == 2:
                wins += 1

        # Report statistics
        logging.info("Player1 wins {} of {} games ({}%)".format(
            wins, self.episodes, round(100 * (wins / self.episodes))))
Esempio n. 3
0
    def play_game(self, p1, p2):
        """
        Play one game and return the winner
        :param p1: Actor - player 1
        :param p2: Actor - player 2
        :return: int - winner
        """
        actors = {1: p1, 2: p2}
        self.state_manager.init_new_game()
        player = random.randint(1, 2)  # Choose random player to start
        action_log = []
        while not self.state_manager.is_winning_state():
            current_state = self.state_manager.get_current_state()
            action_index = actors[player].topp_policy(player, current_state)
            action = self.state_manager.get_action(player, action_index)
            self.state_manager.perform_actual_action(action)
            player = get_next_player(player)
            action_log.append(action)

        winner = get_next_player(player)
        return actors[winner].name, action_log
Esempio n. 4
0
    def simulation(self, node):
        """
        Leaf Evaluation - Estimating the value of a leaf node in the tree by doing a roll-out simulation using the
        default policy from the leaf node’s state to a final state.
        :return: int - The player who won the simulated game
        """
        current_state, player = node.state, node.player

        # Use Critic with a probability X
        if random.random() > self.actor.epsilon_critic:
            reward = self.actor.value_function(player, current_state)
        # If not, simulate to end of game
        else:
            while not self.state_manager.verify_winning_state(current_state):
                # Get next action using the default policy
                action_index = self.actor.default_policy(player, current_state)
                current_state = self.state_manager.get_next_state(
                    player, current_state, action_index)
                player = get_next_player(player)

            winner = get_next_player(
                player)  # Winner was actually the prev player who made a move
            reward = int(winner == 1)
        return reward
Esempio n. 5
0
    def expansion(self, leaf):
        """
        Node Expansion - Generating some or all child states of a parent state, and then connecting the tree node
        housing the parent state (a.k.a. parent node) to the nodes housing the child states (a.k.a. child nodes).
        :return:
        """
        # Get all legal child states from leaf state
        leaf.children = self.state_manager.get_child_nodes(leaf.state)

        # Set leaf as their parent node
        child_player = get_next_player(leaf.player)
        for child in leaf.children:
            child.player = child_player
            child.parent = leaf
        # Tree is now expanded, return the leaf, and simulate to game over
        return leaf
Esempio n. 6
0
    def simulate(self):
        """
        Run G consecutive games (aka. episodes) of the self.game_type using fixed values for the game parameters
        """
        save_interval = int(self.episodes /
                            (self.save_interval - 1))  # Save interval for ANET
        visualizer = Visualizer(
            self.game_config)  # Visualizer that visualize games
        actor = Actor(self.anet_config)  # Initialize Actor which have ANET
        rbuf = ReplayBuffer()  # Buffer for saving training data (node, D)
        game = StateManager(
            self.game_config
        )  # Init a StateManager that takes care of the actual game
        wins = 0  # Number of times player 1 wins

        # Actual games being played
        for episode in range(1, self.episodes + 1):
            logging.info("Episode: {}".format(episode))

            # Initialize the actual game
            game.init_new_game()
            action_log = []

            # Initialize the MonteCarloSearchTree to a single node with the initialized game state
            state, player = game.get_current_state(), self.get_start_player()
            mcts = MonteCarloSearchTree(actor,
                                        self.game_config,
                                        c=self.mcts_config["c"])
            mcts.set_root(Node(state, None, player=player))

            # While the actual game is not finished
            while not game.is_winning_state():
                # Every time we shall select a new action, we perform M number of simulations in MCTS
                for _ in range(self.num_sim):
                    # One iteration of Monte Carlo Tree Search consists of four steps
                    leaf = mcts.selection()
                    sim_node = mcts.expansion(leaf)
                    z = mcts.simulation(sim_node)
                    mcts.backward(sim_node, z)

                # Get the probability distribution over actions from current root/state.
                D = mcts.get_root_distribution()

                # Modifying D for obvious wins or other heuristics
                D = game.apply_heuristics(mcts.root, D)

                # Add training data to ReplayBuffers (node, D, reward)
                rbuf.add_case((mcts.root, D, mcts.root.value))

                # Select actual move based on D
                new_root = mcts.select_actual_action(player)
                action_log.append(new_root.action)

                # Perform this action, moving the game from state s -> s´
                game.perform_actual_action(new_root.action)

                # Update player
                player = get_next_player(player)

                # Set new root of MCST
                mcts.set_root(new_root)

            # End of episode
            visualizer.add_game_log(action_log)

            # Update epsilon for next round of simulations
            actor.update_epsilon()

            # Train ANET and CNET on a random mini-batch of cases from ReplayBuffer
            actor.train(rbuf.get_batch(self.batch_size))

            # Save ANET
            if episode % save_interval == 0 or episode == 1:
                path = "./pretrained/ANET_E{}.pth".format(episode)
                logging.info("Saving model to file {}".format(path))
                torch.save(actor.anet.state_dict(), path)

            # Save visualization of last game
            if self.visualize and episode % self.visualize_interval == 0:
                visualizer.animate_latest_game()

            # If next player is 2 and we are in a win state, player 1 got us in a win state
            if player == 2:
                wins += 1

        actor.visualize_loss()
        logging.info("Player1 wins {} of {} games ({}%)".format(
            wins, self.episodes, round(100 * (wins / self.episodes))))