コード例 #1
0
    def backup(self, score=0):
        """Update the node statistics on the path from the passed node to
        root to reflect the value of the given `simulation_statistics`.
        """
        self.num_visits += 1
        self._avg_reward += (score - self._avg_reward) / self.num_visits

        debug.log(
            {
                'num_visits': self.num_visits,
                'score': score,
                'avg_reward': self._avg_reward,
                'is_root': self.is_root()
            },
            level=logging.INFO)

        if not self.is_root():
            if (not self.parent.is_decision_node()
                    and self.acting_player != self.parent.acting_player):
                score = -score

            debug.log(
                {
                    'parent.is_decision_node': self.parent.is_decision_node(),
                    'parent.acting_player': self.parent.acting_player,
                    'acting_player': self.acting_player,
                    'parent.score': score
                },
                level=logging.INFO)

            self.parent.backup(score=score)
コード例 #2
0
    def backup(self, score=0):
        """Update the node statistics on the path from the passed node to
        root to reflect the value of the given `simulation_statistics`.
        """
        self.num_visits += 1
        self._avg_reward += (score - self._avg_reward) / self.num_visits

        debug.log(lambda:
            {
                'num_visits': self.num_visits,
                'score': score,
                'avg_reward': self._avg_reward,
                'is_root': self.is_root()
            },
            level=logging.INFO)

        if not self.is_root():
            if (not self.parent.is_decision_node()
                    and self.acting_player != self.parent.acting_player):
                score = -score

            debug.log(lambda:
                {
                    'parent.is_decision_node': self.parent.is_decision_node(),
                    'parent.acting_player': self.parent.acting_player,
                    'acting_player': self.acting_player,
                    'parent.score': score
                },
                level=logging.INFO)

            self.parent.backup(score=score)
コード例 #3
0
    def select_action(self, game_state, time_allowed_s=-1, **_):
        '''Select an action in game state, `game_state`.

        Return an optimal action in game state, `game_state`, if the time
        allowed, `time_allowed_s` is sufficient. Otherwise, return the best
        action found after `time_allowed_s` seconds.

        A non-positive `time_allowed_s` implies no time limit.
        '''
        start_time = time.clock() if time_allowed_s > 0 else 0
        best_action = None
        action_value = -INF

        debug.log({'Time available in seconds': time_allowed_s},
                  level=logging.INFO)
        debug.log(str(game_state), level=logging.INFO, raw=True)

        self._tree = {'children': [], 'value': action_value}

        for action in game_state.legal_actions():
            self._tree['children'].append({'action': action})
            with game_state.play(action):
                try:
                    action_value = -self.value(
                        game_state,
                        time_allowed_s=time_allowed_s,
                        time_used=time.clock() - start_time,
                        tree=self._tree['children'][-1])
                except self.TimeIsUp:
                    if best_action is None:
                        best_action = action
                        self._tree['value'] = action_value
                    debug.log({'Time is up': True,
                               'Best action so far:': best_action,
                               'Value': self._tree['value']},
                              level=logging.INFO)
                    break
                else:
                    debug.log({'Time remaining in seconds': (
                        time_allowed_s
                        - (time.clock() - start_time)),
                        'Best action so far:': best_action,
                        'New value': action_value,
                        'Value': self._tree['value'],
                        'Tree': self.to_dict()},
                        level=logging.INFO)
                    if action_value > self._tree['value']:
                        best_action = action
                        self._tree['value'] = action_value

                        debug.log({'Time remaining in seconds': (
                            time_allowed_s
                            - (time.clock() - start_time)),
                            'Best action so far:': best_action,
                            'Value': self._tree['value']},
                            level=logging.INFO)
        debug.log({'Time remaining in seconds': (
            time_allowed_s
            - (time.clock() - start_time)),
            'Best action so far:': best_action,
            'Value': self._tree['value'],
            'Tree': self.to_dict()}, level=logging.INFO)
        return best_action
コード例 #4
0
    def select_action(self, game_state, time_allowed_s=-1, max_depth=-1):
        """
        Select an action in game state, `game_state`.

        Return an optimal action in game state, `game_state`, if the time
        allowed, `time_allowed_s` is sufficient. Otherwise, return the best
        action found after `time_allowed_s` seconds. A non-positive
        `time_allowed_s` implies no time limit.

        If max_depth is positive the search only goes to max_depth below the
        rootnode before applying a heuristic.
        """
        self.pruned_nodes = 0
        start_time = time.clock() if time_allowed_s > 0 else 0
        best_action = None
        action_value = -INF

        debug.log({'Time available in seconds': time_allowed_s},
                  level=logging.INFO)
        debug.log(str(game_state), level=logging.INFO, raw=True)

        self._tree = {'children': [], 'value': action_value}

        for action in game_state.legal_actions():
            self._tree['children'].append({'action': action})
            with game_state.play(action):
                try:
                    action_value = -self.value(
                        game_state, -INF, INF, time_allowed_s,
                        time.clock() - start_time, max_depth - 1,
                        self._tree['children'][-1])
                except self.TimeIsUp:
                    if best_action is None:
                        best_action = action
                        self._tree['value'] = action_value
                    debug.log(
                        {
                            'Time is up': True,
                            'Best action so far:': best_action,
                            'Value': self._tree['value']
                        },
                        level=logging.INFO)
                    break
                else:
                    log_time = time_allowed_s - (time.clock() - start_time)
                    debug.log(
                        {
                            'Time remaining in seconds': log_time,
                            'Best action so far:': best_action,
                            'New value': action_value,
                            'Value': self._tree['value'],
                            'Tree': self.to_dict()
                        },
                        level=logging.INFO)
                    if action_value > self._tree['value']:
                        best_action = action
                        self._tree['value'] = action_value

                        debug.log(
                            {
                                'Time remaining in seconds': log_time,
                                'Best action so far:': best_action,
                                'Value': self._tree['value']
                            },
                            level=logging.INFO)

        log_time = time_allowed_s - (time.clock() - start_time)
        debug.log(
            {
                'Time remaining in seconds': log_time,
                'Best action so far:': best_action,
                'Value': self._tree['value'],
                'Tree': self.to_dict(),
                'Pruned nodes': self.pruned_nodes
            },
            level=logging.INFO)
        return best_action
コード例 #5
0
    def search(self, root_state, time_allowed_s=-1, num_iterations=-1):
        """Execute MCTS from `root_state`.

        Parameters:
        `root_state`: The state of the game from which to search.
        Must adhere to the generic game state interface
        described by `games_puzzles_algorithms.games.fake_game_state`.
        `time_allowed_s`: The time allotted to search for a good action.
        Negative values imply that there is no time limit.
        Setting this to zero will ensure that no search is done.
        `num_iterations`: The number of search iterations (rollouts) to
        complete. Negative values imply that there is not iteration
        limit. Setting this to zero will ensure that no search is done.

        If `time_allowed_s` and `num_iterations` are both negative,
        `num_iterations` will be set to 1.
        """
        if time_allowed_s < 0 and num_iterations < 0:
            num_iterations = 1
        if root_state.is_terminal():
            return None

        start_time = time.clock()

        self._root.expand(root_state)

        debug.log(
            {
                'Initial search tree': self._root.info_strings_to_dict(),
                'Time available in seconds': time_allowed_s,
                '# iterations': num_iterations
            },
            level=logging.INFO)
        debug.log(str(root_state), level=logging.INFO, raw=True)

        num_iterations_completed = 0
        time_used_s = 0

        def time_is_available():
            nonlocal time_used_s
            time_used_s = time.clock() - start_time
            return (time_allowed_s < 0 or time_used_s < time_allowed_s)

        while (num_iterations < 1
               or num_iterations_completed < num_iterations):
            try:
                node, game_state, num_actions = self.select_node(
                    self._root,
                    root_state,
                    time_is_available=time_is_available)
            except TimeIsUp:
                break

            debug.log("Executing roll-out from (player {} is acting):".format(
                game_state.player_to_act()),
                      level=logging.INFO)
            debug.log(str(game_state), level=logging.INFO, raw=True)

            rollout_results = self.roll_out(game_state, node.acting_player)
            debug.log({'Roll-out results': rollout_results})
            node.backup(**rollout_results)

            debug.log(
                {
                    'Updated search tree': self._root.info_strings_to_dict(),
                    'Seconds used': time_used_s,
                    '# iterations completed': num_iterations_completed + 1
                },
                level=logging.INFO)

            for _ in range(num_actions):
                game_state.undo()
            num_iterations_completed += 1
        return {
            'num_iterations_completed': num_iterations_completed,
            'time_used_s': time_used_s,
            'num_nodes_expanded': self._root.num_nodes()
        }
コード例 #6
0
    def search(self, root_state, time_allowed_s=-1, num_iterations=-1):
        """Execute MCTS from `root_state`.

        Parameters:
        `root_state`: The state of the game from which to search.
        Must adhere to the generic game state interface
        described by `games_puzzles_algorithms.games.fake_game_state`.
        `time_allowed_s`: The time allotted to search for a good action.
        Negative values imply that there is no time limit.
        Setting this to zero will ensure that no search is done.
        `num_iterations`: The number of search iterations (rollouts) to
        complete. Negative values imply that there is not iteration
        limit. Setting this to zero will ensure that no search is done.

        If `time_allowed_s` and `num_iterations` are both negative,
        `num_iterations` will be set to 1.
        """
        if time_allowed_s < 0 and num_iterations < 0:
            num_iterations = 1
        if root_state.is_terminal():
            return None

        start_time = time.clock()

        self._root.expand(root_state)

        debug.log(lambda:
            {
                'Initial search tree': self._root.info_strings_to_dict(),
                'Time available in seconds': time_allowed_s,
                '# iterations': num_iterations
            },
            level=logging.INFO)
        debug.log(lambda: str(root_state), level=logging.INFO, raw=True)

        num_iterations_completed = 0
        time_used_s = 0

        def time_is_available():
            nonlocal time_used_s
            time_used_s = time.clock() - start_time
            return (time_allowed_s < 0 or time_used_s < time_allowed_s)

        while (num_iterations < 1
               or num_iterations_completed < num_iterations):
            try:
                node, game_state, num_actions = self.select_node(
                    self._root,
                    root_state,
                    time_is_available=time_is_available)
            except TimeIsUp:
                break

            debug.log(lambda:"Executing roll-out from (player {} is acting):"
                      .format(game_state.player_to_act()),
                      level=logging.INFO)
            debug.log(lambda:str(game_state), level=logging.INFO, raw=True)

            rollout_results = self.roll_out(game_state, node.acting_player)
            debug.log(lambda:{'Roll-out results': rollout_results})
            node.backup(**rollout_results)

            debug.log(lambda:
                {
                    'Updated search tree': self._root.info_strings_to_dict(),
                    'Seconds used': time_used_s,
                    '# iterations completed': num_iterations_completed + 1
                },
                level=logging.INFO)

            for _ in range(num_actions):
                game_state.undo()
            num_iterations_completed += 1

        return {'num_iterations_completed': num_iterations_completed,
                'time_used_s': time_used_s,
                'num_nodes_expanded': self._root.num_nodes()}
コード例 #7
0
    def select_action(self, game_state, time_allowed_s=-1, max_depth=-1):
        """
        Select an action in game state, `game_state`.

        Return an optimal action in game state, `game_state`, if the time
        allowed, `time_allowed_s` is sufficient. Otherwise, return the best
        action found after `time_allowed_s` seconds. A non-positive
        `time_allowed_s` implies no time limit.

        If max_depth is positive the search only goes to max_depth below the
        rootnode before applying a heuristic.
        """
        self.pruned_nodes = 0
        start_time = time.clock() if time_allowed_s > 0 else 0
        best_action = None
        action_value = -INF

        debug.log(lambda:{'Time available in seconds': time_allowed_s},
                  level=logging.INFO)
        debug.log(lambda:str(game_state), level=logging.INFO, raw=True)

        self._tree = {'children': [], 'value': action_value}

        for action in game_state.legal_actions():
            self._tree['children'].append({'action': action})
            with game_state.play(action):
                try:
                    action_value = -self.value(
                        game_state,
                        -INF,
                        INF,
                        time_allowed_s,
                        time.clock() - start_time,
                        max_depth - 1,
                        self._tree['children'][-1])
                except self.TimeIsUp:
                    if best_action is None:
                        best_action = action
                        self._tree['value'] = action_value
                    debug.log(lambda:{'Time is up': True,
                               'Best action so far:': best_action,
                               'Value': self._tree['value']},
                               level=logging.INFO)
                    break
                else:
                    log_time = time_allowed_s - (time.clock() - start_time)
                    debug.log(lambda:{'Time remaining in seconds': log_time,
                               'Best action so far:': best_action,
                               'New value': action_value,
                               'Value': self._tree['value'],
                               'Tree': self.to_dict()},
                               level=logging.INFO)
                    if action_value > self._tree['value']:
                        best_action = action
                        self._tree['value'] = action_value

                        debug.log(lambda:{'Time remaining in seconds': log_time,
                                   'Best action so far:': best_action,
                                   'Value': self._tree['value']},
                                   level=logging.INFO)

        log_time = time_allowed_s - (time.clock() - start_time)
        debug.log(lambda:{'Time remaining in seconds': log_time,
                   'Best action so far:': best_action,
                   'Value': self._tree['value'],
                   'Tree': self.to_dict(),
                   'Pruned nodes': self.pruned_nodes}, level=logging.INFO)
        debug.log_t(lambda:{'Time remaining in seconds': log_time,
                   'Best action so far:': best_action,
                   'Value': self._tree['value'],
                   'Pruned nodes': self.pruned_nodes}, level=logging.INFO)
        return best_action