Exemple #1
0
def build_strategy(mdp: MDP,
                   T: List[int],
                   solver: pulp = pulp.GLPK_CMD(),
                   msg=0) -> Callable[[int], int]:
    """
    Build a memoryless strategy that returns the action that maximises the reachability probability to T
    of each state s in parameter of this strategy.

    :param mdp: a MDP for which the strategy will be built.
    :param T: a target states list.
    :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX).
    :return: the strategy built.
    """
    x = reach(mdp, T, solver=solver, msg=msg)

    states = range(mdp.number_of_states)
    act_max = [[] for _ in states]

    # update act_max
    for s in states:
        pr_max = 0
        for (alpha, successor_list) in mdp.alpha_successors(s):
            pr = sum(
                map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]],
                    successor_list))
            if pr == pr_max:
                act_max[s].append(alpha)
            elif pr > pr_max:
                pr_max = pr
                act_max[s] = [alpha]

    # compute M^max
    mdp_max = MDP([], [], mdp._w, mdp.number_of_states, validation=False)
    for s in states:
        i = 0
        for (alpha, successor_list) in mdp.alpha_successors(s):
            if alpha == act_max[s][i]:
                i += 1
                mdp_max.enable_action(s, alpha, successor_list)
            if i == len(act_max[s]):
                break

    # compute the final strategy
    minimal_steps = minimal_steps_number_to(mdp_max, T)
    strategy: List[int] = []
    for s in states:
        if x[s] == 0 or minimal_steps[s] == 0:
            strategy.append(act_max[s][0])
        else:
            for (alpha, successor_list) in mdp_max.alpha_successors(s):
                for (succ, pr) in successor_list:
                    if minimal_steps[succ] == minimal_steps[s] - 1:
                        strategy.append(alpha)
                        break
                if len(strategy) == s + 1:
                    break
    return lambda s: strategy[s]
def export_to_yaml(mdp: MDP, file_name: str) -> None:
    """
    Serialise a MDP instance into a yaml file.

    :param mdp: a MDP
    :param file_name: the name of the yaml file
    """
    mdp_dict = {'mdp': {'states': [], 'actions': []}}
    for s in range(mdp.number_of_states):
        mdp_dict['mdp']['states'].append({})
        mdp_dict['mdp']['states'][-1]['name'] = mdp.state_name(s)
        mdp_dict['mdp']['states'][-1]['enabled actions'] = []
        for (alpha, succ_list) in mdp.alpha_successors(s):
            mdp_dict['mdp']['states'][-1]['enabled actions'].append({})
            mdp_dict['mdp']['states'][-1]['enabled actions'][-1][
                'name'] = mdp.act_name(alpha)
            mdp_dict['mdp']['states'][-1]['enabled actions'][-1][
                'transitions'] = []
            for (succ, pr) in succ_list:
                mdp_dict['mdp']['states'][-1]['enabled actions'][-1][
                    'transitions'].append({})
                mdp_dict['mdp']['states'][-1]['enabled actions'][-1][
                    'transitions'][-1]['target'] = mdp.state_name(succ)
                mdp_dict['mdp']['states'][-1]['enabled actions'][-1][
                    'transitions'][-1]['probability'] = pr
    for alpha in range(mdp.number_of_actions):
        mdp_dict['mdp']['actions'].append({})
        mdp_dict['mdp']['actions'][-1]['name'] = mdp.act_name(alpha)
        mdp_dict['mdp']['actions'][-1]['weight'] = mdp.w(alpha)
    if file_name:
        with open(file_name + '.yaml', 'w') as yaml_file:
            yaml.dump(mdp_dict, yaml_file, default_flow_style=False)
    else:
        print(yaml.dump(mdp_dict, default_flow_style=False))
def export_mdp(mdp: MDP, mdp_name: str, strategy: List[int] = []) -> None:
    states = range(mdp.number_of_states)

    g = Digraph(mdp_name, filename=mdp_name + '.gv')

    g.attr('node', shape='circle')
    for s in states:
        g.node('s%d' % s, label=mdp.state_name(s))

    g.attr('node', shape='point')
    for s in states:
        for (alpha, succ_list) in mdp.alpha_successors(s):
            if strategy and strategy[s] == alpha:
                color = 'red'
            else:
                color = 'black'
            g.node('s%d->a%d' % (s, alpha),
                   xlabel=' ' + mdp.act_name(alpha) + ' | ' +
                   str(mdp.w(alpha)) + ' ',
                   fontsize='8',
                   fontcolor=color,
                   color=color)
            g.edge('s%d' % s, 's%d->a%d' % (s, alpha))
            for (succ, pr) in succ_list:
                g.edge('s%d->a%d' % (s, alpha),
                       's%d' % succ,
                       label=str(round(pr, 4)),
                       fontsize='8')

    g.view()
Exemple #4
0
def build_strategy(mdp: MDP,
                   T: List[int],
                   solver: pulp = pulp.GLPK_CMD(),
                   msg=0) -> Callable[[int], int]:
    """
    Build a memoryless strategy that returns, following a state s of the MDP, the action that minimize
    the expected length of paths to a set of target states T.

    :param mdp: a MDP for which the strategy will be built.
    :param T: a target states list.
    :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX).
    :return: the strategy built.
    """
    x = min_expected_cost(mdp, T, solver=solver, msg=msg)
    global v
    v = x

    states = range(mdp.number_of_states)
    act_min = [
        mdp.act(s)[argmin([
            mdp.w(alpha) +
            sum(map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]], succ_list))
            for (alpha, succ_list) in mdp.alpha_successors(s)
        ])] for s in states
    ]

    return lambda s: act_min[s]
Exemple #5
0
def reach(mdp: MDP,
          T: List[int],
          msg=0,
          solver: pulp = pulp.GLPK_CMD()) -> List[float]:
    """
    Compute the maximum reachability probability to T for each state of the MDP in parameter and get a vector x (as list)
    such that x[s] is the maximum reachability probability to T of the state s.

    :param mdp: a MDP for which the maximum reachability probability will be computed for each of its states.
    :param T: a list of target states.
    :param msg: (optional) set this parameter to 1 to activate the debug mode in the console.
    :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX).
    :return: the a list x such that x[s] is the maximum reachability probability to T.
    """
    states = list(range(mdp.number_of_states))
    # x[s] is the Pr^max to reach T
    x = [-1] * mdp.number_of_states
    connected = connected_to(mdp, T)

    # find all states s such that s is not connected to T
    for s in filter(lambda s: not connected[s], states):
        x[s] = 0
    # find all states s such that Pr^max to reach T is 1
    for s in pr_max_1(mdp, T, connected=connected):
        x[s] = 1

    # if there exist some other states such that Pr^max to reach T is in ]0, 1[, a LP is generated for these states
    untreated_states = list(filter(lambda s: x[s] == -1, states))
    if untreated_states:

        # formulate the LP problem
        linear_program = pulp.LpProblem("reachability", pulp.LpMinimize)
        # initialize variables
        for s in untreated_states:
            x[s] = pulp.LpVariable(mdp.state_name(s), lowBound=0, upBound=1)
        # objective function
        linear_program += sum(x)
        # constraints
        for s in untreated_states:
            for (alpha, successors_list) in mdp.alpha_successors(s):
                linear_program += x[s] >= sum(
                    pr * x[succ] for (succ, pr) in successors_list)

        if msg:
            print(linear_program)

        # solve the LP
        solver.msg = msg
        linear_program.solve(solver)

        for s in untreated_states:
            x[s] = x[s].varValue

    if msg:
        print_optimal_solution(x, states, mdp.state_name)

    global v
    v = x

    return x
Exemple #6
0
def min_expected_cost(mdp: MDP,
                      T: List[int],
                      msg=0,
                      solver: pulp = pulp.GLPK_CMD()) -> List[float]:
    """
    Compute the minimum expected length of paths to the set of targets T from each state in the MDP.

    :param mdp: a MDP.
    :param T: a list of target states of the MDP.
    :param msg: (optional) set this parameter to 1 to activate the debug mode in the console.
    :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX).
    :return: a list x such that x[s] is the mimum expected length of paths to the set of targets T from the state s of
             the MDP.
    """
    states = range(mdp.number_of_states)
    x = [float('inf')] * mdp.number_of_states
    expect_inf = [True] * mdp.number_of_states

    # determine states for which x[s] != inf
    for s in pr_max_1(mdp, T):
        x[s] = -1
        expect_inf[s] = False
    for t in T:
        x[t] = 0

    # formulate the LP problem
    linear_program = pulp.LpProblem(
        "minimum expected length of path to target", pulp.LpMaximize)
    # initialize variables
    for s in filter(lambda s: x[s] == -1, states):
        x[s] = pulp.LpVariable(mdp.state_name(s), lowBound=0)
    # objective function
    linear_program += sum(
        map(lambda s: x[s], filter(lambda s: not expect_inf[s], states)))
    # constraints
    for s in filter(lambda s: x[s] == -1, states):
        for (alpha, successor_list) in mdp.alpha_successors(s):
            if not list(
                    filter(lambda succ_pr: expect_inf[succ_pr[0]],
                           successor_list)):
                linear_program += x[s] <= mdp.w(alpha) + sum(
                    map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]],
                        successor_list))
    if msg:
        print(linear_program)

    # solve the LP
    solver.msg = msg
    if linear_program.variables():
        linear_program.solve(solver)

    for s in states:
        if x[s] != 0 and x[s] != float('inf'):
            x[s] = x[s].varValue

    if msg:
        print_optimal_solution(x, states, mdp.state_name)

    return x