sim = rooms.Simulator(room_config) data = {} final_data = {} for episode in range(start_episode, end_episode+1, step_episode): for k in range(start_k, end_k+1, step_k): print "episode: %i k: %i" % (episode, k) if (k, episode) not in data: data[(k, episode)] = [] samples = rooms.collect_samples(sim, maxepisodes=episode, maxsteps=max_steps) graph = pvf.construct_graph(samples, sim.states) try: basis = pvf.create_basis_function(graph, sim.states, sim.actions, k) except: print "Couldn't compute basis function for this data" continue policy = rooms.initialize_policy(0.0, discount, basis) final_policy = lspi.lspi(maxiter, epsilon, samples, policy)[0] for n in range(num_tries): execution_data = rooms.test_execution(sim, final_policy, maxsteps=max_steps) data[(k, episode)].append(execution_data)
print sim k = 20 maxiter = 200 epsilon = 10**(-12) samples = collect_samples(sim) if len(samples) < 5000: samples += collect_samples(sim) discount = .8 # construct a graph from the samples graph = pvf.construct_graph(samples, Simulator.states) basis = pvf.create_basis_function(graph, Simulator.states, Simulator.actions, k) policy = initialize_policy(0.0, discount, basis) final_policy, all_policies = lspi.lspi(maxiter, epsilon, samples, policy) value_policy = initialize_value_function_policy(sim) plt.figure() plt.subplot(2,2,1) approxV = display_qvalues(sim, final_policy) plt.title('Estimated Value Function') plt.subplot(2,2,2) display_qvalues(sim, final_policy, dim=1) plt.title('Estimated Value Function')
import lspiframework.lspi as lspi import protovalueframework.pvf as pvf import pdb k = 10 maxiter = 20 epsilon = 10**(-5) #samples = uniform_samples() samples = collect_samples() discount = .9 # construct a graph from the samples graph = pvf.construct_graph(samples, S) basis = pvf.create_basis_function(graph, S, A, k) policy = initialize_policy(0, discount, basis) final_policy, all_policies = lspi.lspi(maxiter, epsilon, samples, policy) plt.figure() plt.subplot(1,2,1) display_policy(final_policy) plt.subplot(1,2,2) display_policy(all_policies[0]) plt.show()