Ejemplo n.º 1
0
v_f = value_iteration(gamma=0.9, theta=0.01)

graph_vals(v1, 'Sweep 1')
graph_vals(v10, 'Sweep 10')
graph_vals(v_f, 'Optimal V*')

#
# Policy Iteration
#
values = []
V, pi = policy_iteration(gamma=0.9, theta=0.01, value_list=values)

for i, v in enumerate(values):
    graph_vals(v, 'Policy Evaluation %d' % (i + 1))
'''

#Gamma Variations
print 'Generating Gamma Variations'
for g in (0.9, 0.5, 0.3):
    values = []
    V, pi = policy_iteration(theta=0.01, value_list=values, gamma=g)

    print 'gamma = %f:' % (g)

    for s in ((4, 7, 1), (1, 3, 6), (9, 2, 1)):
        print 'state %s => action %s' % (s, pi[s])
'''    
    for i, v in enumerate(values):
        graph_vals(v, 'Policy Evaluation %d. Gamma: %f' % (i + 1, g), 'gamma_%d_%d' % (int(g*10), i+1))
'''
Ejemplo n.º 2
0
'''
Test value iteration

Created on 24 Sep 2009

@author: joh
'''

from value_methods import policy_iteration

if __name__ == '__main__':
    values = []
    V, pi = policy_iteration(gamma=0.9, theta=0.01, value_list=values)
    
    assert values[-1] == V
    
    def vcmp(v1, v2):
        #print 'vcmp',v1,v2
        return cmp(v1[1], v2[1])
        
    V = sorted(V.items(), cmp=vcmp)
    
    for v in V:
        print v, "=>", pi[v[0]]
    
    
    print "%d policy evaluation steps were required." % (len(values))
    
    
Ejemplo n.º 3
0
graph_vals(v1, 'Sweep 1')
graph_vals(v10, 'Sweep 10')
graph_vals(v_f, 'Optimal V*')

#
# Policy Iteration
#
values = []
V, pi = policy_iteration(gamma=0.9, theta=0.01, value_list=values)

for i, v in enumerate(values):
    graph_vals(v, 'Policy Evaluation %d' % (i + 1))
'''
    
#Gamma Variations
print 'Generating Gamma Variations'
for g in(0.9, 0.5, 0.3):
    values = []
    V, pi = policy_iteration(theta=0.01, value_list=values, gamma=g)
    
    print 'gamma = %f:' % (g)
    
    for s in ((4,7,1), (1,3,6), (9,2,1)):
        print 'state %s => action %s' % (s, pi[s])
    
'''    
    for i, v in enumerate(values):
        graph_vals(v, 'Policy Evaluation %d. Gamma: %f' % (i + 1, g), 'gamma_%d_%d' % (int(g*10), i+1))
'''