def export_to_yaml(mdp: MDP, file_name: str) -> None: """ Serialise a MDP instance into a yaml file. :param mdp: a MDP :param file_name: the name of the yaml file """ mdp_dict = {'mdp': {'states': [], 'actions': []}} for s in range(mdp.number_of_states): mdp_dict['mdp']['states'].append({}) mdp_dict['mdp']['states'][-1]['name'] = mdp.state_name(s) mdp_dict['mdp']['states'][-1]['enabled actions'] = [] for (alpha, succ_list) in mdp.alpha_successors(s): mdp_dict['mdp']['states'][-1]['enabled actions'].append({}) mdp_dict['mdp']['states'][-1]['enabled actions'][-1][ 'name'] = mdp.act_name(alpha) mdp_dict['mdp']['states'][-1]['enabled actions'][-1][ 'transitions'] = [] for (succ, pr) in succ_list: mdp_dict['mdp']['states'][-1]['enabled actions'][-1][ 'transitions'].append({}) mdp_dict['mdp']['states'][-1]['enabled actions'][-1][ 'transitions'][-1]['target'] = mdp.state_name(succ) mdp_dict['mdp']['states'][-1]['enabled actions'][-1][ 'transitions'][-1]['probability'] = pr for alpha in range(mdp.number_of_actions): mdp_dict['mdp']['actions'].append({}) mdp_dict['mdp']['actions'][-1]['name'] = mdp.act_name(alpha) mdp_dict['mdp']['actions'][-1]['weight'] = mdp.w(alpha) if file_name: with open(file_name + '.yaml', 'w') as yaml_file: yaml.dump(mdp_dict, yaml_file, default_flow_style=False) else: print(yaml.dump(mdp_dict, default_flow_style=False))
def export_mdp(mdp: MDP, mdp_name: str, strategy: List[int] = []) -> None: states = range(mdp.number_of_states) g = Digraph(mdp_name, filename=mdp_name + '.gv') g.attr('node', shape='circle') for s in states: g.node('s%d' % s, label=mdp.state_name(s)) g.attr('node', shape='point') for s in states: for (alpha, succ_list) in mdp.alpha_successors(s): if strategy and strategy[s] == alpha: color = 'red' else: color = 'black' g.node('s%d->a%d' % (s, alpha), xlabel=' ' + mdp.act_name(alpha) + ' | ' + str(mdp.w(alpha)) + ' ', fontsize='8', fontcolor=color, color=color) g.edge('s%d' % s, 's%d->a%d' % (s, alpha)) for (succ, pr) in succ_list: g.edge('s%d->a%d' % (s, alpha), 's%d' % succ, label=str(round(pr, 4)), fontsize='8') g.view()
def build_strategy(mdp: MDP, T: List[int], solver: pulp = pulp.GLPK_CMD(), msg=0) -> Callable[[int], int]: """ Build a memoryless strategy that returns, following a state s of the MDP, the action that minimize the expected length of paths to a set of target states T. :param mdp: a MDP for which the strategy will be built. :param T: a target states list. :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX). :return: the strategy built. """ x = min_expected_cost(mdp, T, solver=solver, msg=msg) global v v = x states = range(mdp.number_of_states) act_min = [ mdp.act(s)[argmin([ mdp.w(alpha) + sum(map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]], succ_list)) for (alpha, succ_list) in mdp.alpha_successors(s) ])] for s in states ] return lambda s: act_min[s]
def min_expected_cost(mdp: MDP, T: List[int], msg=0, solver: pulp = pulp.GLPK_CMD()) -> List[float]: """ Compute the minimum expected length of paths to the set of targets T from each state in the MDP. :param mdp: a MDP. :param T: a list of target states of the MDP. :param msg: (optional) set this parameter to 1 to activate the debug mode in the console. :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX). :return: a list x such that x[s] is the mimum expected length of paths to the set of targets T from the state s of the MDP. """ states = range(mdp.number_of_states) x = [float('inf')] * mdp.number_of_states expect_inf = [True] * mdp.number_of_states # determine states for which x[s] != inf for s in pr_max_1(mdp, T): x[s] = -1 expect_inf[s] = False for t in T: x[t] = 0 # formulate the LP problem linear_program = pulp.LpProblem( "minimum expected length of path to target", pulp.LpMaximize) # initialize variables for s in filter(lambda s: x[s] == -1, states): x[s] = pulp.LpVariable(mdp.state_name(s), lowBound=0) # objective function linear_program += sum( map(lambda s: x[s], filter(lambda s: not expect_inf[s], states))) # constraints for s in filter(lambda s: x[s] == -1, states): for (alpha, successor_list) in mdp.alpha_successors(s): if not list( filter(lambda succ_pr: expect_inf[succ_pr[0]], successor_list)): linear_program += x[s] <= mdp.w(alpha) + sum( map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]], successor_list)) if msg: print(linear_program) # solve the LP solver.msg = msg if linear_program.variables(): linear_program.solve(solver) for s in states: if x[s] != 0 and x[s] != float('inf'): x[s] = x[s].varValue if msg: print_optimal_solution(x, states, mdp.state_name) return x