def import_from_yaml(stream) -> MDP: """ Import a yaml file (as stream) into a MDP. :param stream: yaml file stream. :return: the MDP imported from the yaml file """ mdp_dict = yaml.load(stream)['mdp'] mdp_states = mdp_dict['states'] mdp_actions = mdp_dict['actions'] states = [state['name'] for state in mdp_states] state_from_name = {} for i in range(len(mdp_states)): state_from_name[states[i]] = i actions = [action['name'] for action in mdp_actions] w = [int(action['weight']) for action in mdp_actions] action_from_name = {} for i in range(len(mdp_actions)): action_from_name[actions[i]] = i mdp = MDP(states, actions, w) for s in range(len(states)): enabled_actions = mdp_states[s]['enabled actions'] for enabled_action in enabled_actions: transitions = [(state_from_name[transition['target']], str_to_float(str(transition['probability']))) for transition in enabled_action['transitions']] alpha = enabled_action['name'] # enable this action in the MDP mdp.enable_action(s, action_from_name[alpha], transitions) return mdp
def build_strategy(mdp: MDP, T: List[int], solver: pulp = pulp.GLPK_CMD(), msg=0) -> Callable[[int], int]: """ Build a memoryless strategy that returns the action that maximises the reachability probability to T of each state s in parameter of this strategy. :param mdp: a MDP for which the strategy will be built. :param T: a target states list. :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX). :return: the strategy built. """ x = reach(mdp, T, solver=solver, msg=msg) states = range(mdp.number_of_states) act_max = [[] for _ in states] # update act_max for s in states: pr_max = 0 for (alpha, successor_list) in mdp.alpha_successors(s): pr = sum( map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]], successor_list)) if pr == pr_max: act_max[s].append(alpha) elif pr > pr_max: pr_max = pr act_max[s] = [alpha] # compute M^max mdp_max = MDP([], [], mdp._w, mdp.number_of_states, validation=False) for s in states: i = 0 for (alpha, successor_list) in mdp.alpha_successors(s): if alpha == act_max[s][i]: i += 1 mdp_max.enable_action(s, alpha, successor_list) if i == len(act_max[s]): break # compute the final strategy minimal_steps = minimal_steps_number_to(mdp_max, T) strategy: List[int] = [] for s in states: if x[s] == 0 or minimal_steps[s] == 0: strategy.append(act_max[s][0]) else: for (alpha, successor_list) in mdp_max.alpha_successors(s): for (succ, pr) in successor_list: if minimal_steps[succ] == minimal_steps[s] - 1: strategy.append(alpha) break if len(strategy) == s + 1: break return lambda s: strategy[s]
def pr_max_1(mdp: MDP, T: List[int], connected: List[bool] = []) -> List[int]: """ Compute the states s of the MDP such that the maximum probability to reach T from s is 1. :param mdp: a MDP. :param T: a target states list of the MDP. :param connected: (optional) list of the states of the MDP connected to T. If this parameter is not provided, it is computed in the function. :return: the list of states s of the MDP such that the maximum probability to reach T from s is 1. """ if not connected: connected = connected_to(mdp, T) removed_state = [False] * mdp.number_of_states T_set = set(T) disabled_action = [[False] * len(mdp.act(s)) for s in range(mdp.number_of_states)] no_disabled_actions = [0] * mdp.number_of_states U = [s for s in range(mdp.number_of_states) if not connected[s]] while len(U) > 0: R = deque(U) while len(R) > 0: u = R.pop() for (t, alpha_i) in mdp._alpha_pred[u]: if connected[t] and not disabled_action[t][ alpha_i] and t not in T_set: disabled_action[t][alpha_i] = True no_disabled_actions[t] += 1 if no_disabled_actions[t] == len(mdp.act(t)): R.appendleft(t) connected[t] = False removed_state[u] = True sub_mdp = MDP([], [], [], number_of_states=mdp.number_of_states, validation=False) for s in range(mdp.number_of_states): if not removed_state[s]: for alpha_i in range(len(mdp.act(s))): if not disabled_action[s][alpha_i]: sub_mdp.enable_action( s, mdp._enabled_actions[s][0][alpha_i], filter( lambda succ_pr: not removed_state[succ_pr[0]], mdp._enabled_actions[s][1][alpha_i])) mdp = sub_mdp connected = connected_to(mdp, T) U = [ s for s in range(mdp.number_of_states) if not connected[s] and not removed_state[s] ] pr_1 = [s for s in range(mdp.number_of_states) if not removed_state[s]] return pr_1
def random_MDP(n: int, a: int, strictly_A: bool = False, complete_graph: bool = False, weights_interval: Tuple[int, int] = (1, 1), force_weakly_connected_to: bool=False) -> MDP: """ Generate a random MDP. :param n: number of states of the generated MDP. :param a: number of actions of the generated MDP. :param strictly_A: (optional) set this parameter to True to force each state of the generated MDP to have exactly a actions, i.e. |A(s)| = a for all state s. :param complete_graph: (optional) set this parameter to True to force the MDP to have a complete underlying graph. :param weights_interval: (optional) set an interval (w1, w2) for weights of each action. Following this parameter, w(α) ∈ [w1, w2] for each action α of the generated MDP. :param force_weakly_connected_to: (optional) set this parameter to True to force some random state to be absorbing states. As consequence, some states should not be connected to a target state T and more states can have a reachability probability to T < 1. :return: a randomly generated MDP. """ states = list(range(n)) actions = list(range(a)) w1, w2 = weights_interval if not (1 <= w1 <= w2): raise ValueError("weights_interval (w1, w2) must be 1 <= w1 <= w2") w = [random.randint(w1, w2) for _ in range(a)] mdp = MDP([], [], w, n) for s in states: if not strictly_A: alpha_list = random.sample(actions, random.randint(1, a)) else: alpha_list = actions if complete_graph: successors_set = set() for alpha in alpha_list: successors = random.sample(states, random.randint(1, n)) if force_weakly_connected_to and random.random() >= 0.7: successors = [s] if complete_graph: successors_set |= set(successors) if alpha == alpha_list[-1]: for succ in filter(lambda succ: succ not in successors_set, states): successors.append(succ) probabilities = random_probability(len(successors)) mdp.enable_action(s, alpha, [(successors[succ], probabilities[succ]) for succ in range(len(probabilities))]) return mdp
def complete_MDP(n: int, a: int, w: List[int]=[]) -> MDP: """ Worst case of MDP. :param n: number of states :param a: number of actions :param w: weights :return: the MDP generated. """ if not w: w = [1] * a mdp = MDP([], [], w, number_of_states=n) pr = [float(i) / x for i in range(1, n+1) for x in [sum(range(1, n+1))]] for s in range(n): for alpha in range(a): pr = pr[1:] + pr[0:1] to_enable = [None] * n for succ in range(n): to_enable[succ] = (succ, pr[succ]) mdp.enable_action(s, alpha, to_enable) return mdp