コード例 #1
0
def import_from_yaml(stream) -> MDP:
    """
    Import a yaml file (as stream) into a MDP.

    :param stream: yaml file stream.
    :return: the MDP imported from the yaml file
    """
    mdp_dict = yaml.load(stream)['mdp']
    mdp_states = mdp_dict['states']
    mdp_actions = mdp_dict['actions']
    states = [state['name'] for state in mdp_states]
    state_from_name = {}
    for i in range(len(mdp_states)):
        state_from_name[states[i]] = i
    actions = [action['name'] for action in mdp_actions]
    w = [int(action['weight']) for action in mdp_actions]
    action_from_name = {}
    for i in range(len(mdp_actions)):
        action_from_name[actions[i]] = i

    mdp = MDP(states, actions, w)
    for s in range(len(states)):
        enabled_actions = mdp_states[s]['enabled actions']
        for enabled_action in enabled_actions:
            transitions = [(state_from_name[transition['target']],
                            str_to_float(str(transition['probability'])))
                           for transition in enabled_action['transitions']]

            alpha = enabled_action['name']

            # enable this action in the MDP
            mdp.enable_action(s, action_from_name[alpha], transitions)

    return mdp
コード例 #2
0
def build_strategy(mdp: MDP,
                   T: List[int],
                   solver: pulp = pulp.GLPK_CMD(),
                   msg=0) -> Callable[[int], int]:
    """
    Build a memoryless strategy that returns the action that maximises the reachability probability to T
    of each state s in parameter of this strategy.

    :param mdp: a MDP for which the strategy will be built.
    :param T: a target states list.
    :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX).
    :return: the strategy built.
    """
    x = reach(mdp, T, solver=solver, msg=msg)

    states = range(mdp.number_of_states)
    act_max = [[] for _ in states]

    # update act_max
    for s in states:
        pr_max = 0
        for (alpha, successor_list) in mdp.alpha_successors(s):
            pr = sum(
                map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]],
                    successor_list))
            if pr == pr_max:
                act_max[s].append(alpha)
            elif pr > pr_max:
                pr_max = pr
                act_max[s] = [alpha]

    # compute M^max
    mdp_max = MDP([], [], mdp._w, mdp.number_of_states, validation=False)
    for s in states:
        i = 0
        for (alpha, successor_list) in mdp.alpha_successors(s):
            if alpha == act_max[s][i]:
                i += 1
                mdp_max.enable_action(s, alpha, successor_list)
            if i == len(act_max[s]):
                break

    # compute the final strategy
    minimal_steps = minimal_steps_number_to(mdp_max, T)
    strategy: List[int] = []
    for s in states:
        if x[s] == 0 or minimal_steps[s] == 0:
            strategy.append(act_max[s][0])
        else:
            for (alpha, successor_list) in mdp_max.alpha_successors(s):
                for (succ, pr) in successor_list:
                    if minimal_steps[succ] == minimal_steps[s] - 1:
                        strategy.append(alpha)
                        break
                if len(strategy) == s + 1:
                    break
    return lambda s: strategy[s]
コード例 #3
0
def pr_max_1(mdp: MDP, T: List[int], connected: List[bool] = []) -> List[int]:
    """
    Compute the states s of the MDP such that the maximum probability to reach T from s is 1.

    :param mdp: a MDP.
    :param T: a target states list of the MDP.
    :param connected: (optional) list of the states of the MDP connected to T. If this parameter is not provided, it is
                      computed in the function.
    :return: the list of states s of the MDP such that the maximum probability to reach T from s is 1.
    """
    if not connected:
        connected = connected_to(mdp, T)
    removed_state = [False] * mdp.number_of_states
    T_set = set(T)
    disabled_action = [[False] * len(mdp.act(s))
                       for s in range(mdp.number_of_states)]
    no_disabled_actions = [0] * mdp.number_of_states

    U = [s for s in range(mdp.number_of_states) if not connected[s]]
    while len(U) > 0:
        R = deque(U)
        while len(R) > 0:
            u = R.pop()
            for (t, alpha_i) in mdp._alpha_pred[u]:
                if connected[t] and not disabled_action[t][
                        alpha_i] and t not in T_set:
                    disabled_action[t][alpha_i] = True
                    no_disabled_actions[t] += 1
                    if no_disabled_actions[t] == len(mdp.act(t)):
                        R.appendleft(t)
                        connected[t] = False
            removed_state[u] = True
        sub_mdp = MDP([], [], [],
                      number_of_states=mdp.number_of_states,
                      validation=False)
        for s in range(mdp.number_of_states):
            if not removed_state[s]:
                for alpha_i in range(len(mdp.act(s))):
                    if not disabled_action[s][alpha_i]:
                        sub_mdp.enable_action(
                            s, mdp._enabled_actions[s][0][alpha_i],
                            filter(
                                lambda succ_pr: not removed_state[succ_pr[0]],
                                mdp._enabled_actions[s][1][alpha_i]))
        mdp = sub_mdp
        connected = connected_to(mdp, T)
        U = [
            s for s in range(mdp.number_of_states)
            if not connected[s] and not removed_state[s]
        ]
    pr_1 = [s for s in range(mdp.number_of_states) if not removed_state[s]]
    return pr_1
コード例 #4
0
def random_MDP(n: int, a: int,
               strictly_A: bool = False,
               complete_graph: bool = False,
               weights_interval: Tuple[int, int] = (1, 1),
               force_weakly_connected_to: bool=False) -> MDP:
    """
    Generate a random MDP.

    :param n: number of states of the generated MDP.
    :param a: number of actions of the generated MDP.
    :param strictly_A: (optional) set this parameter to True to force each state of the generated MDP to have exactly
                       a actions, i.e. |A(s)| = a for all state s.
    :param complete_graph: (optional) set this parameter to True to force the MDP to have a complete underlying graph.
    :param weights_interval: (optional) set an interval (w1, w2) for weights of each action. Following this parameter,
                             w(α) ∈ [w1, w2] for each action α of the generated MDP.
    :param force_weakly_connected_to: (optional) set this parameter to True to force some random state to be absorbing
                                      states. As consequence, some states should not be connected to a target state T
                                      and more states can have a reachability probability to T < 1.
    :return: a randomly generated MDP.
    """
    states = list(range(n))
    actions = list(range(a))
    w1, w2 = weights_interval
    if not (1 <= w1 <= w2):
        raise ValueError("weights_interval (w1, w2) must be 1 <= w1 <= w2")
    w = [random.randint(w1, w2) for _ in range(a)]
    mdp = MDP([], [], w, n)

    for s in states:
        if not strictly_A:
            alpha_list = random.sample(actions, random.randint(1, a))
        else:
            alpha_list = actions
        if complete_graph:
            successors_set = set()
        for alpha in alpha_list:
            successors = random.sample(states, random.randint(1, n))
            if force_weakly_connected_to and random.random() >= 0.7:
                    successors = [s]
            if complete_graph:
                successors_set |= set(successors)
                if alpha == alpha_list[-1]:
                    for succ in filter(lambda succ: succ not in successors_set, states):
                        successors.append(succ)
            probabilities = random_probability(len(successors))
            mdp.enable_action(s, alpha,
                              [(successors[succ], probabilities[succ]) for succ in range(len(probabilities))])

    return mdp
コード例 #5
0
def complete_MDP(n: int, a: int, w: List[int]=[]) -> MDP:
    """
    Worst case of MDP.

    :param n: number of states
    :param a: number of actions
    :param w: weights
    :return: the MDP generated.
    """
    if not w:
        w = [1] * a
    mdp = MDP([], [], w, number_of_states=n)
    pr = [float(i) / x for i in range(1, n+1) for x in [sum(range(1, n+1))]]
    for s in range(n):
        for alpha in range(a):
            pr = pr[1:] + pr[0:1]
            to_enable = [None] * n
            for succ in range(n):
                to_enable[succ] = (succ, pr[succ])
            mdp.enable_action(s, alpha, to_enable)
    return mdp