コード例 #1
0
        def recurse(state: State, alpha, beta) -> Tuple[int, object]:
            """
            根据当前状态返回一个当前最佳效应和所对应的动作
            :param state: 当前的状态
            :param alpha: 到当前状态的最开始玩家的收益下界
            :param beta:  到结束状态的最对手玩家的收益上界
            :return: 返回一个元组 (utility action)
            """
            is_over, winner = state.get_state_result()
            if is_over:
                if winner == self.player:
                    return 1, None
                elif winner == get_opponent(self.player):
                    return -1, None
                else:
                    return 0, None

            available_actions = state.get_available_actions()
            if state.player == self.player:
                max_value = (float("-inf"), None)
                for action in available_actions:
                    max_value = max(max_value, (recurse(
                        state.get_next_state(action), alpha, beta)[0], action),
                                    key=lambda x: x[0])
                    alpha = max(alpha, max_value[0])
                    if beta <= alpha:
                        break
                return max_value
            elif state.player == get_opponent(self.player):
                min_value = (float("inf"), None)
                for action in available_actions:
                    min_value = min(min_value, (recurse(
                        state.get_next_state(action), alpha, beta)[0], action),
                                    key=lambda x: x[0])
                    beta = min(beta, min_value[0])
                    if beta <= alpha:
                        break
                return min_value
コード例 #2
0
        def recurse(state: State) -> Tuple[int, object]:
            """
            根据当前状态返回一个当前最佳效应和所对应的动作
            :param state: 当前的状态
            :return: 返回一个元组 (utility action)
            """
            is_over, winner = state.get_state_result()
            if is_over:
                if winner == state.player:
                    return 1, None
                elif winner == get_opponent(state.player):
                    return -1, None
                else:
                    return 0, None

            available_actions = state.get_available_actions()
            values = [
                -recurse(state.get_next_state(action))[0]
                for action in available_actions
            ]
            kws = pd.Series(data=values, index=available_actions)
            action = kws.idxmax()
            return kws[action], action