cost_state_fn = BallSetFn(int(N/2), 0.25) cost_fn = CostWrapper(cost_state_fn) state_dim = 1 action_dim = 1 gen_model = GenerativeModel(trans_fn, boundary, cost_fn, state_dim, action_dim) action_boundary = [(-1,1)] discount = 0.95 problem = Problem(gen_model, action_boundary, discount) return problem if __name__ == '__main__': parser = ArgumentParser(__file__, 'Generates a double integrator problem') parser.add_argument('save_file', metavar='FILE', help='save file') args = parser.parse_args() problem = make_di_problem() dump(problem,args.save_file)
mdp_obj = builder.build_mdp() return (mdp_obj,discretizer) if __name__ == '__main__': parser = ArgumentParser(__file__,'Generates an MDP from continuous problem') parser.add_argument('problem_in_file', metavar='FILE', help='problem file') parser.add_argument('num_states', metavar='N', type=int, help='number of states per dimension') parser.add_argument('num_actions', metavar='A', type=int, help='number of actions per dimension') parser.add_argument('mdp_out_file', metavar='FILE', help='mdp save file') parser.add_argument('disc_out_file', metavar='FILE', help='discretizer save file') args = parser.parse_args() problem=load(args.problem_in_file) (mdp,disc) = make_uniform_mdp(problem, args.num_states, args.num_actions) dump(mdp,args.mdp_out_file) dump(disc,args.disc_out_file)
from solvers.value_iter import ValueIterator from solvers import * def solve_with_value_iter(mdp,thresh,max_iter): iterator = ValueIterator(mdp) solver = IterativeSolver(iterator) term_conds = [ValueChangeTerminationCondition(thresh), MaxIterTerminationCondition(max_iter)] announce = [ValueChangeAnnounce()] solver.termination_conditions.extend(term_conds) solver.notifications.extend(announce) solver.solve() return iterator.get_value_vector() if __name__ == '__main__': parser = ArgumentParser(__file__,\ 'Generates an MDP from continuous problem') parser.add_argument('mdp_in_file', metavar='FILE', help='mdp file') parser.add_argument('sol_out_file', metavar='FILE', help='solution out file') args = parser.parse_args() mdp = load(args.mdp_in_file) sol = solve_with_kojima(mdp,1e-9,1e4) dump(p, args.sol_out_file)
from mdp.state_functions import InterpolatedFunction from mdp.solution_process import * if __name__ == '__main__': parser = ArgumentParser(__file__,\ 'Form a policy from value function') parser.add_argument('sol_in_file', metavar='FILE', help='solution in file') parser.add_argument('disc_in_file', metavar='FILE', help='discretizer in file') parser.add_argument('mdp_in_file', metavar='FILE', help='mdp in file') parser.add_argument('v_fn_out_file', metavar='FILE', help='solution out file') args = parser.parse_args() p = load(args.sol_in_file) disc = load(args.disc_in_file) mdp = load(args.mdp_in_file) # Build the q policy (v,_) = split_solution(mdp,p) v_fn = InterpolatedFunction(disc,v) dump(v_fn,args.v_fn_out_file)