Python MDP.w Exemples, structures.mdp.MDP.w Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : yaml_parser.py Projet : florentdelgrange/Stochastic-Shortest-Path

def export_to_yaml(mdp: MDP, file_name: str) -> None:
    """
    Serialise a MDP instance into a yaml file.

    :param mdp: a MDP
    :param file_name: the name of the yaml file
    """
    mdp_dict = {'mdp': {'states': [], 'actions': []}}
    for s in range(mdp.number_of_states):
        mdp_dict['mdp']['states'].append({})
        mdp_dict['mdp']['states'][-1]['name'] = mdp.state_name(s)
        mdp_dict['mdp']['states'][-1]['enabled actions'] = []
        for (alpha, succ_list) in mdp.alpha_successors(s):
            mdp_dict['mdp']['states'][-1]['enabled actions'].append({})
            mdp_dict['mdp']['states'][-1]['enabled actions'][-1][
                'name'] = mdp.act_name(alpha)
            mdp_dict['mdp']['states'][-1]['enabled actions'][-1][
                'transitions'] = []
            for (succ, pr) in succ_list:
                mdp_dict['mdp']['states'][-1]['enabled actions'][-1][
                    'transitions'].append({})
                mdp_dict['mdp']['states'][-1]['enabled actions'][-1][
                    'transitions'][-1]['target'] = mdp.state_name(succ)
                mdp_dict['mdp']['states'][-1]['enabled actions'][-1][
                    'transitions'][-1]['probability'] = pr
    for alpha in range(mdp.number_of_actions):
        mdp_dict['mdp']['actions'].append({})
        mdp_dict['mdp']['actions'][-1]['name'] = mdp.act_name(alpha)
        mdp_dict['mdp']['actions'][-1]['weight'] = mdp.w(alpha)
    if file_name:
        with open(file_name + '.yaml', 'w') as yaml_file:
            yaml.dump(mdp_dict, yaml_file, default_flow_style=False)
    else:
        print(yaml.dump(mdp_dict, default_flow_style=False))

Exemple #2

0

Afficher le fichier

Fichier : graphviz.py Projet : florentdelgrange/Stochastic-Shortest-Path

def export_mdp(mdp: MDP, mdp_name: str, strategy: List[int] = []) -> None:
    states = range(mdp.number_of_states)

    g = Digraph(mdp_name, filename=mdp_name + '.gv')

    g.attr('node', shape='circle')
    for s in states:
        g.node('s%d' % s, label=mdp.state_name(s))

    g.attr('node', shape='point')
    for s in states:
        for (alpha, succ_list) in mdp.alpha_successors(s):
            if strategy and strategy[s] == alpha:
                color = 'red'
            else:
                color = 'black'
            g.node('s%d->a%d' % (s, alpha),
                   xlabel=' ' + mdp.act_name(alpha) + ' | ' +
                   str(mdp.w(alpha)) + ' ',
                   fontsize='8',
                   fontcolor=color,
                   color=color)
            g.edge('s%d' % s, 's%d->a%d' % (s, alpha))
            for (succ, pr) in succ_list:
                g.edge('s%d->a%d' % (s, alpha),
                       's%d' % succ,
                       label=str(round(pr, 4)),
                       fontsize='8')

    g.view()

Exemple #3

0

Afficher le fichier

def build_strategy(mdp: MDP,
                   T: List[int],
                   solver: pulp = pulp.GLPK_CMD(),
                   msg=0) -> Callable[[int], int]:
    """
    Build a memoryless strategy that returns, following a state s of the MDP, the action that minimize
    the expected length of paths to a set of target states T.

    :param mdp: a MDP for which the strategy will be built.
    :param T: a target states list.
    :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX).
    :return: the strategy built.
    """
    x = min_expected_cost(mdp, T, solver=solver, msg=msg)
    global v
    v = x

    states = range(mdp.number_of_states)
    act_min = [
        mdp.act(s)[argmin([
            mdp.w(alpha) +
            sum(map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]], succ_list))
            for (alpha, succ_list) in mdp.alpha_successors(s)
        ])] for s in states
    ]

    return lambda s: act_min[s]

Exemple #4

0

Afficher le fichier

def min_expected_cost(mdp: MDP,
                      T: List[int],
                      msg=0,
                      solver: pulp = pulp.GLPK_CMD()) -> List[float]:
    """
    Compute the minimum expected length of paths to the set of targets T from each state in the MDP.

    :param mdp: a MDP.
    :param T: a list of target states of the MDP.
    :param msg: (optional) set this parameter to 1 to activate the debug mode in the console.
    :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX).
    :return: a list x such that x[s] is the mimum expected length of paths to the set of targets T from the state s of
             the MDP.
    """
    states = range(mdp.number_of_states)
    x = [float('inf')] * mdp.number_of_states
    expect_inf = [True] * mdp.number_of_states

    # determine states for which x[s] != inf
    for s in pr_max_1(mdp, T):
        x[s] = -1
        expect_inf[s] = False
    for t in T:
        x[t] = 0

    # formulate the LP problem
    linear_program = pulp.LpProblem(
        "minimum expected length of path to target", pulp.LpMaximize)
    # initialize variables
    for s in filter(lambda s: x[s] == -1, states):
        x[s] = pulp.LpVariable(mdp.state_name(s), lowBound=0)
    # objective function
    linear_program += sum(
        map(lambda s: x[s], filter(lambda s: not expect_inf[s], states)))
    # constraints
    for s in filter(lambda s: x[s] == -1, states):
        for (alpha, successor_list) in mdp.alpha_successors(s):
            if not list(
                    filter(lambda succ_pr: expect_inf[succ_pr[0]],
                           successor_list)):
                linear_program += x[s] <= mdp.w(alpha) + sum(
                    map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]],
                        successor_list))
    if msg:
        print(linear_program)

    # solve the LP
    solver.msg = msg
    if linear_program.variables():
        linear_program.solve(solver)

    for s in states:
        if x[s] != 0 and x[s] != float('inf'):
            x[s] = x[s].varValue

    if msg:
        print_optimal_solution(x, states, mdp.state_name)

    return x