Ejemplo n.º 1
0
class MonteCarloSearchTree:
    def __init__(self, game_type, game_config):
        self.state_manager = StateManager(game_type, game_config)
        self.root = None
        self.c = game_config["c"]  # Exploration constant

        self.state_manager.init_new_game()

    def set_root(self, node):
        self.root = node

    def get_augmented_value(self, node, player):
        """
        Calculation needed in order to perform the Tree Policy
        :param node: Node
        :param player: int
        :return: float
        """
        c = self.c if player == 1 else -self.c
        return node.value + c * sqrt(log(node.parent.total) / (1 + node.total))

    def select(self, root):
        """
        Calculate the the augmented value for each child, and select the best path for the current player to take.
        :param root: Node
        :return:
        """
        # Calculate the augmented values needed for the tree policy
        children = [(node, self.get_augmented_value(node, root.player))
                    for node in root.children]

        # Tree Policy = Maximise for P1 and minimize for P2
        if root.player == 1:
            root, value = max(children, key=operator.itemgetter(1))
        else:
            root, value = min(children, key=operator.itemgetter(1))
        return root

    def selection(self):
        """
        Tree search - Traversing the tree from the root to a leaf node by using the tree policy.
        :return: Node
        """
        root = self.root
        children = root.get_children()

        # While root is not a leaf node
        while len(children) != 0:
            root = self.select(root)
            children = root.get_children()

        return root

    def expansion(self, leaf):
        """
        Node Expansion - Generating some or all child states of a parent state, and then connecting the tree node
        housing the parent state (a.k.a. parent node) to the nodes housing the child states (a.k.a. child nodes).
        :return:
        """
        # Get all legal child states from leaf state
        leaf.children = self.state_manager.get_child_nodes(leaf.state)

        # Set leaf as their parent node
        child_player = get_next_player(leaf.player)
        for child in leaf.children:
            child.player = child_player
            child.parent = leaf
        # Tree is now expanded, return the leaf, and simulate to game over
        return leaf

    def simulation(self, node):
        """
        Leaf Evaluation - Estimating the value of a leaf node in the tree by doing a roll-out simulation using the
        default policy from the leaf node’s state to a final state.
        :return: int - The player who won the simulated game
        """
        current_node = node
        children = self.state_manager.get_child_nodes(current_node.state)
        player = node.player
        while len(children) != 0:
            # Use the default policy (random) to select a child
            current_node = random.choice(children)
            player = get_next_player(player)
            children = self.state_manager.get_child_nodes(current_node.state)
        winner = get_next_player(
            player)  # Winner was actually the prev player who made a move
        return int(winner == 1)

    @staticmethod
    def backward(sim_node, z):
        """
        Backward propagation - Passing the evaluation of a final state back up the tree, updating relevant data
        (see course lecture notes) at all nodes and edges on the path from the final state to the tree root.
        :param sim_node: Node - leaf node to go backward from
        :param z: int - 1 if player 1 won, else 0
        :return: None
        """
        node = sim_node
        node.total += 1
        while node.parent:
            node.parent.total += 1
            node.value += (z - node.value) / node.total
            node = node.parent

    def select_actual_action(self, player):
        """
        To select the actual action to take in the game, select the edge with the highest visit count
        :return: Node
        """
        children = [(child, child.value) for child in self.root.children]

        # Tree Policy = Maximise for P1 and minimize for P2
        if player == 1:
            root, value = max(children, key=operator.itemgetter(1))
        else:
            root, value = min(children, key=operator.itemgetter(1))
        return root

    def tree_print(self):
        nodes = [self.root]
        while nodes:
            curr = nodes[0]
            nodes = nodes[1:]
            print((curr.total, curr.player))
            nodes += curr.children