Ejemplo n.º 1
0
def pr_max_1(mdp: MDP, T: List[int], connected: List[bool] = []) -> List[int]:
    """
    Compute the states s of the MDP such that the maximum probability to reach T from s is 1.

    :param mdp: a MDP.
    :param T: a target states list of the MDP.
    :param connected: (optional) list of the states of the MDP connected to T. If this parameter is not provided, it is
                      computed in the function.
    :return: the list of states s of the MDP such that the maximum probability to reach T from s is 1.
    """
    if not connected:
        connected = connected_to(mdp, T)
    removed_state = [False] * mdp.number_of_states
    T_set = set(T)
    disabled_action = [[False] * len(mdp.act(s))
                       for s in range(mdp.number_of_states)]
    no_disabled_actions = [0] * mdp.number_of_states

    U = [s for s in range(mdp.number_of_states) if not connected[s]]
    while len(U) > 0:
        R = deque(U)
        while len(R) > 0:
            u = R.pop()
            for (t, alpha_i) in mdp._alpha_pred[u]:
                if connected[t] and not disabled_action[t][
                        alpha_i] and t not in T_set:
                    disabled_action[t][alpha_i] = True
                    no_disabled_actions[t] += 1
                    if no_disabled_actions[t] == len(mdp.act(t)):
                        R.appendleft(t)
                        connected[t] = False
            removed_state[u] = True
        sub_mdp = MDP([], [], [],
                      number_of_states=mdp.number_of_states,
                      validation=False)
        for s in range(mdp.number_of_states):
            if not removed_state[s]:
                for alpha_i in range(len(mdp.act(s))):
                    if not disabled_action[s][alpha_i]:
                        sub_mdp.enable_action(
                            s, mdp._enabled_actions[s][0][alpha_i],
                            filter(
                                lambda succ_pr: not removed_state[succ_pr[0]],
                                mdp._enabled_actions[s][1][alpha_i]))
        mdp = sub_mdp
        connected = connected_to(mdp, T)
        U = [
            s for s in range(mdp.number_of_states)
            if not connected[s] and not removed_state[s]
        ]
    pr_1 = [s for s in range(mdp.number_of_states) if not removed_state[s]]
    return pr_1
Ejemplo n.º 2
0
def build_strategy(mdp: MDP,
                   T: List[int],
                   solver: pulp = pulp.GLPK_CMD(),
                   msg=0) -> Callable[[int], int]:
    """
    Build a memoryless strategy that returns, following a state s of the MDP, the action that minimize
    the expected length of paths to a set of target states T.

    :param mdp: a MDP for which the strategy will be built.
    :param T: a target states list.
    :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX).
    :return: the strategy built.
    """
    x = min_expected_cost(mdp, T, solver=solver, msg=msg)
    global v
    v = x

    states = range(mdp.number_of_states)
    act_min = [
        mdp.act(s)[argmin([
            mdp.w(alpha) +
            sum(map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]], succ_list))
            for (alpha, succ_list) in mdp.alpha_successors(s)
        ])] for s in states
    ]

    return lambda s: act_min[s]