def build_strategy(mdp: MDP, T: List[int], solver: pulp = pulp.GLPK_CMD(), msg=0) -> Callable[[int], int]: """ Build a memoryless strategy that returns the action that maximises the reachability probability to T of each state s in parameter of this strategy. :param mdp: a MDP for which the strategy will be built. :param T: a target states list. :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX). :return: the strategy built. """ x = reach(mdp, T, solver=solver, msg=msg) states = range(mdp.number_of_states) act_max = [[] for _ in states] # update act_max for s in states: pr_max = 0 for (alpha, successor_list) in mdp.alpha_successors(s): pr = sum( map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]], successor_list)) if pr == pr_max: act_max[s].append(alpha) elif pr > pr_max: pr_max = pr act_max[s] = [alpha] # compute M^max mdp_max = MDP([], [], mdp._w, mdp.number_of_states, validation=False) for s in states: i = 0 for (alpha, successor_list) in mdp.alpha_successors(s): if alpha == act_max[s][i]: i += 1 mdp_max.enable_action(s, alpha, successor_list) if i == len(act_max[s]): break # compute the final strategy minimal_steps = minimal_steps_number_to(mdp_max, T) strategy: List[int] = [] for s in states: if x[s] == 0 or minimal_steps[s] == 0: strategy.append(act_max[s][0]) else: for (alpha, successor_list) in mdp_max.alpha_successors(s): for (succ, pr) in successor_list: if minimal_steps[succ] == minimal_steps[s] - 1: strategy.append(alpha) break if len(strategy) == s + 1: break return lambda s: strategy[s]
def export_to_yaml(mdp: MDP, file_name: str) -> None: """ Serialise a MDP instance into a yaml file. :param mdp: a MDP :param file_name: the name of the yaml file """ mdp_dict = {'mdp': {'states': [], 'actions': []}} for s in range(mdp.number_of_states): mdp_dict['mdp']['states'].append({}) mdp_dict['mdp']['states'][-1]['name'] = mdp.state_name(s) mdp_dict['mdp']['states'][-1]['enabled actions'] = [] for (alpha, succ_list) in mdp.alpha_successors(s): mdp_dict['mdp']['states'][-1]['enabled actions'].append({}) mdp_dict['mdp']['states'][-1]['enabled actions'][-1][ 'name'] = mdp.act_name(alpha) mdp_dict['mdp']['states'][-1]['enabled actions'][-1][ 'transitions'] = [] for (succ, pr) in succ_list: mdp_dict['mdp']['states'][-1]['enabled actions'][-1][ 'transitions'].append({}) mdp_dict['mdp']['states'][-1]['enabled actions'][-1][ 'transitions'][-1]['target'] = mdp.state_name(succ) mdp_dict['mdp']['states'][-1]['enabled actions'][-1][ 'transitions'][-1]['probability'] = pr for alpha in range(mdp.number_of_actions): mdp_dict['mdp']['actions'].append({}) mdp_dict['mdp']['actions'][-1]['name'] = mdp.act_name(alpha) mdp_dict['mdp']['actions'][-1]['weight'] = mdp.w(alpha) if file_name: with open(file_name + '.yaml', 'w') as yaml_file: yaml.dump(mdp_dict, yaml_file, default_flow_style=False) else: print(yaml.dump(mdp_dict, default_flow_style=False))
def export_mdp(mdp: MDP, mdp_name: str, strategy: List[int] = []) -> None: states = range(mdp.number_of_states) g = Digraph(mdp_name, filename=mdp_name + '.gv') g.attr('node', shape='circle') for s in states: g.node('s%d' % s, label=mdp.state_name(s)) g.attr('node', shape='point') for s in states: for (alpha, succ_list) in mdp.alpha_successors(s): if strategy and strategy[s] == alpha: color = 'red' else: color = 'black' g.node('s%d->a%d' % (s, alpha), xlabel=' ' + mdp.act_name(alpha) + ' | ' + str(mdp.w(alpha)) + ' ', fontsize='8', fontcolor=color, color=color) g.edge('s%d' % s, 's%d->a%d' % (s, alpha)) for (succ, pr) in succ_list: g.edge('s%d->a%d' % (s, alpha), 's%d' % succ, label=str(round(pr, 4)), fontsize='8') g.view()
def build_strategy(mdp: MDP, T: List[int], solver: pulp = pulp.GLPK_CMD(), msg=0) -> Callable[[int], int]: """ Build a memoryless strategy that returns, following a state s of the MDP, the action that minimize the expected length of paths to a set of target states T. :param mdp: a MDP for which the strategy will be built. :param T: a target states list. :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX). :return: the strategy built. """ x = min_expected_cost(mdp, T, solver=solver, msg=msg) global v v = x states = range(mdp.number_of_states) act_min = [ mdp.act(s)[argmin([ mdp.w(alpha) + sum(map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]], succ_list)) for (alpha, succ_list) in mdp.alpha_successors(s) ])] for s in states ] return lambda s: act_min[s]
def reach(mdp: MDP, T: List[int], msg=0, solver: pulp = pulp.GLPK_CMD()) -> List[float]: """ Compute the maximum reachability probability to T for each state of the MDP in parameter and get a vector x (as list) such that x[s] is the maximum reachability probability to T of the state s. :param mdp: a MDP for which the maximum reachability probability will be computed for each of its states. :param T: a list of target states. :param msg: (optional) set this parameter to 1 to activate the debug mode in the console. :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX). :return: the a list x such that x[s] is the maximum reachability probability to T. """ states = list(range(mdp.number_of_states)) # x[s] is the Pr^max to reach T x = [-1] * mdp.number_of_states connected = connected_to(mdp, T) # find all states s such that s is not connected to T for s in filter(lambda s: not connected[s], states): x[s] = 0 # find all states s such that Pr^max to reach T is 1 for s in pr_max_1(mdp, T, connected=connected): x[s] = 1 # if there exist some other states such that Pr^max to reach T is in ]0, 1[, a LP is generated for these states untreated_states = list(filter(lambda s: x[s] == -1, states)) if untreated_states: # formulate the LP problem linear_program = pulp.LpProblem("reachability", pulp.LpMinimize) # initialize variables for s in untreated_states: x[s] = pulp.LpVariable(mdp.state_name(s), lowBound=0, upBound=1) # objective function linear_program += sum(x) # constraints for s in untreated_states: for (alpha, successors_list) in mdp.alpha_successors(s): linear_program += x[s] >= sum( pr * x[succ] for (succ, pr) in successors_list) if msg: print(linear_program) # solve the LP solver.msg = msg linear_program.solve(solver) for s in untreated_states: x[s] = x[s].varValue if msg: print_optimal_solution(x, states, mdp.state_name) global v v = x return x
def min_expected_cost(mdp: MDP, T: List[int], msg=0, solver: pulp = pulp.GLPK_CMD()) -> List[float]: """ Compute the minimum expected length of paths to the set of targets T from each state in the MDP. :param mdp: a MDP. :param T: a list of target states of the MDP. :param msg: (optional) set this parameter to 1 to activate the debug mode in the console. :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX). :return: a list x such that x[s] is the mimum expected length of paths to the set of targets T from the state s of the MDP. """ states = range(mdp.number_of_states) x = [float('inf')] * mdp.number_of_states expect_inf = [True] * mdp.number_of_states # determine states for which x[s] != inf for s in pr_max_1(mdp, T): x[s] = -1 expect_inf[s] = False for t in T: x[t] = 0 # formulate the LP problem linear_program = pulp.LpProblem( "minimum expected length of path to target", pulp.LpMaximize) # initialize variables for s in filter(lambda s: x[s] == -1, states): x[s] = pulp.LpVariable(mdp.state_name(s), lowBound=0) # objective function linear_program += sum( map(lambda s: x[s], filter(lambda s: not expect_inf[s], states))) # constraints for s in filter(lambda s: x[s] == -1, states): for (alpha, successor_list) in mdp.alpha_successors(s): if not list( filter(lambda succ_pr: expect_inf[succ_pr[0]], successor_list)): linear_program += x[s] <= mdp.w(alpha) + sum( map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]], successor_list)) if msg: print(linear_program) # solve the LP solver.msg = msg if linear_program.variables(): linear_program.solve(solver) for s in states: if x[s] != 0 and x[s] != float('inf'): x[s] = x[s].varValue if msg: print_optimal_solution(x, states, mdp.state_name) return x