Exemple #1
0
def run_mcts(config: MuZeroConfig, root: Node, action_history: ActionHistory, network: BaseNetwork):
    """
    Core Monte Carlo Tree Search algorithm.
    To decide on an action, we run N simulations, always starting at the root of
    the search tree and traversing the tree according to the UCB formula until we
    reach a leaf node.
    """
    min_max_stats = MinMaxStats(config.known_bounds)

    for _ in range(config.num_simulations):
        history = action_history.clone()
        node = root
        search_path = [node]

        while node.expanded():
            action, node = select_child(config, node, min_max_stats)
            history.add_action(action)
            search_path.append(node)

        # Inside the search tree we use the dynamics function to obtain the next
        # hidden state given an action and the previous hidden state.
        parent = search_path[-2]
        network_output = network.recurrent_inference(parent.hidden_state, history.last_action())
        expand_node(node, history.to_play(), history.action_space(), network_output)

        backpropagate(search_path, network_output.value, history.to_play(), config.discount, min_max_stats)
def run_mcts(config: MuZeroConfig, action_history: ActionHistory,
             network: BaseNetwork, game, train):
    """
    Core Monte Carlo Tree Search algorithm.
    To decide on an action, we run N simulations, always starting at the root of
    the search tree and traversing the tree according to the UCB formula until we
    reach a leaf node.
    """
    root = Node(0)
    current_observation = game.make_observation(-1)

    print(game.make_observation_str())

    expand_node(root, game.to_play(), game.legal_actions(),
                network.initial_inference(current_observation))
    if train:
        add_exploration_noise(config, root)

    for _ in range(config.num_simulations):
        t0 = time.time()
        history = action_history.clone()
        node = root
        search_path = [node]

        while node.expanded():
            action, node = select_child(config, node)
            history.add_action(action)
            search_path.append(node)

        # Inside the search tree we use the dynamics function to obtain the next
        # hidden state given an action and the previous hidden state.
        parent = search_path[-2]
        t1 = time.time()
        network_output = network.recurrent_inference(
            parent.hidden_state,
            history.last_action().index)
        t2 = time.time()
        expand_node(node, history.to_play(), history.action_space(),
                    network_output)

        backpropagate(search_path, network_output.value, history.to_play(),
                      config.discount)
        t3 = time.time()
        print("cpu time", t1 - t0 + t3 - t2)
        print("gpu time", t2 - t1)
    return root
Exemple #3
0
def run_mcts(config: MuZeroConfig, root: Node, action_history: ActionHistory, network: BaseNetwork):
   
    min_max_stats = MinMaxStats(config.known_bounds)

    for _ in range(config.num_simulations):
        history = action_history.clone()
        node = root
        search_path = [node]

        while node.expanded():
            action, node = select_child(config, node, min_max_stats)
            history.add_action(action)
            search_path.append(node)

        # Inside the search tree we use the dynamics function to obtain the next
        # hidden state given an action and the previous hidden state.
        parent = search_path[-2]
        network_output = network.recurrent_inference(parent.hidden_state, history.last_action())
        expand_node(node, history.to_play(), history.action_space(), network_output)

        backpropagate(search_path, network_output.value, history.to_play(), config.discount, min_max_stats)
Exemple #4
0
 def action_history(self) -> ActionHistory:
     """Return the actions executed inside the search."""
     return ActionHistory(self.history, self.action_space_size)
Exemple #5
0
 def clone(self):
     return ActionHistory(self.history, self.action_space_size)