def pr_max_1(mdp: MDP, T: List[int], connected: List[bool] = []) -> List[int]: """ Compute the states s of the MDP such that the maximum probability to reach T from s is 1. :param mdp: a MDP. :param T: a target states list of the MDP. :param connected: (optional) list of the states of the MDP connected to T. If this parameter is not provided, it is computed in the function. :return: the list of states s of the MDP such that the maximum probability to reach T from s is 1. """ if not connected: connected = connected_to(mdp, T) removed_state = [False] * mdp.number_of_states T_set = set(T) disabled_action = [[False] * len(mdp.act(s)) for s in range(mdp.number_of_states)] no_disabled_actions = [0] * mdp.number_of_states U = [s for s in range(mdp.number_of_states) if not connected[s]] while len(U) > 0: R = deque(U) while len(R) > 0: u = R.pop() for (t, alpha_i) in mdp._alpha_pred[u]: if connected[t] and not disabled_action[t][ alpha_i] and t not in T_set: disabled_action[t][alpha_i] = True no_disabled_actions[t] += 1 if no_disabled_actions[t] == len(mdp.act(t)): R.appendleft(t) connected[t] = False removed_state[u] = True sub_mdp = MDP([], [], [], number_of_states=mdp.number_of_states, validation=False) for s in range(mdp.number_of_states): if not removed_state[s]: for alpha_i in range(len(mdp.act(s))): if not disabled_action[s][alpha_i]: sub_mdp.enable_action( s, mdp._enabled_actions[s][0][alpha_i], filter( lambda succ_pr: not removed_state[succ_pr[0]], mdp._enabled_actions[s][1][alpha_i])) mdp = sub_mdp connected = connected_to(mdp, T) U = [ s for s in range(mdp.number_of_states) if not connected[s] and not removed_state[s] ] pr_1 = [s for s in range(mdp.number_of_states) if not removed_state[s]] return pr_1
def build_strategy(mdp: MDP, T: List[int], solver: pulp = pulp.GLPK_CMD(), msg=0) -> Callable[[int], int]: """ Build a memoryless strategy that returns, following a state s of the MDP, the action that minimize the expected length of paths to a set of target states T. :param mdp: a MDP for which the strategy will be built. :param T: a target states list. :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX). :return: the strategy built. """ x = min_expected_cost(mdp, T, solver=solver, msg=msg) global v v = x states = range(mdp.number_of_states) act_min = [ mdp.act(s)[argmin([ mdp.w(alpha) + sum(map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]], succ_list)) for (alpha, succ_list) in mdp.alpha_successors(s) ])] for s in states ] return lambda s: act_min[s]