Esempio n. 1
0
    sim = rooms.Simulator(room_config)

    data = {}
    final_data = {}
    for episode in range(start_episode, end_episode+1, step_episode):
        for k in range(start_k, end_k+1, step_k):
            print "episode: %i k: %i" % (episode, k)

            if (k, episode) not in data:
                data[(k, episode)] = []
            samples = rooms.collect_samples(sim, maxepisodes=episode, maxsteps=max_steps)

            graph = pvf.construct_graph(samples, sim.states)
            try:
                basis = pvf.create_basis_function(graph, sim.states,
                                                      sim.actions, k)
            except:
                print "Couldn't compute basis function for this data"
                continue
                    
            policy = rooms.initialize_policy(0.0, discount, basis)

            final_policy = lspi.lspi(maxiter, epsilon,
                                         samples, policy)[0]

            for n in range(num_tries):                
                execution_data = rooms.test_execution(sim, final_policy, maxsteps=max_steps)
                

                data[(k, episode)].append(execution_data)
Esempio n. 2
0
    print sim
    
    k = 20
    maxiter = 200
    epsilon = 10**(-12)
    samples = collect_samples(sim)
    if len(samples) < 5000:
        samples += collect_samples(sim)
    discount = .8

    
    # construct a graph from the samples
    graph = pvf.construct_graph(samples, Simulator.states)

    basis = pvf.create_basis_function(graph, Simulator.states,
                                      Simulator.actions, k)

    policy = initialize_policy(0.0, discount, basis)

    final_policy, all_policies = lspi.lspi(maxiter, epsilon,
                                           samples, policy)

    value_policy = initialize_value_function_policy(sim)
    
    plt.figure()
    plt.subplot(2,2,1)
    approxV = display_qvalues(sim, final_policy)
    plt.title('Estimated Value Function')
    plt.subplot(2,2,2)
    display_qvalues(sim, final_policy, dim=1)
    plt.title('Estimated Value Function')
Esempio n. 3
0
    import lspiframework.lspi as lspi
    import protovalueframework.pvf as pvf

    import pdb

    k = 10
    maxiter = 20
    epsilon = 10**(-5)
    #samples = uniform_samples()
    samples = collect_samples()
    discount = .9

    # construct a graph from the samples
    graph = pvf.construct_graph(samples, S)

    basis = pvf.create_basis_function(graph, S, A, k)
    
    policy = initialize_policy(0, discount, basis)

    final_policy, all_policies = lspi.lspi(maxiter,
                                           epsilon,
                                           samples,
                                           policy)

    plt.figure()
    plt.subplot(1,2,1)
    display_policy(final_policy)
    plt.subplot(1,2,2)
    display_policy(all_policies[0])
    plt.show()