Ejemplo n.º 1
0
    def get_move(self, game_state: go.GameState):
        """Runs all playouts sequentially and returns the most visited action.
        game_state: the current game state

        Return: the selected action
        """
        for _ in range(self._n_playout):
            state_copy = game_state.copy()
            self._playout(state_copy)
        if self._root.is_leaf():
            return PASS_MOVE
        return max(self._root._children.items(),
                   key=lambda act_node: act_node[1]._n_visits)[0]
Ejemplo n.º 2
0
    def get_move_probs(self, game_state: go.GameState):
        """Run all playouts sequentially and return the available actions and
        their corresponding probabilities.
        state: the current game state
        temp: temperature parameter in (0, 1] controls the level of exploration
        """
        for _ in range(self._n_playout):
            state_copy = game_state.copy()
            self._playout(state_copy)

        if self._root.is_leaf():
            return None, None

        # calc the move probabilities based on visit counts at the root node
        act_visits = [
            (act, node._n_visits) for act, node in self._root._children.items()
        ]
        acts, visits = zip(*act_visits)
        act_probs = softmax(1.0 / self._temperature * np.log(np.array(visits) + 1e-10))

        return acts, act_probs