Exemple #1
0
    print "=== {} ===".format(name)
    print "value function:"
    print_parking_value_function(V, mdp)
    print
    print "policy:"
    print_parking_policy(policy, mdp)
    print
    print "============================================================"
    print
    print

### PROBLEM 2
mdp = MDP()

# load MDP1
mdp.load_from_file('MDP1.txt')

epsilon = 0.000001

# run infinite horizon value iteration and policy iteration
beta = 0.1
(V, policy) = InfiniteHorizonPolicyOptimization.value_iteration(mdp, beta, epsilon)
print_helper(V, policy, "MDP1 value iteration, beta={}, epsilon={}".format(beta, epsilon))
(V, policy) = InfiniteHorizonPolicyOptimization.policy_iteration(mdp, beta)
print_helper(V, policy, "MDP1 policy iteration, beta={}".format(beta))

# run infinite horizon value iteration and policy iteration
beta = 0.9
(V, policy) = InfiniteHorizonPolicyOptimization.value_iteration(mdp, beta, epsilon)
print_helper(V, policy, "MDP1 value iteration, beta={}, epsilon={}".format(beta, epsilon))
(V, policy) = InfiniteHorizonPolicyOptimization.policy_iteration(mdp, beta)
Exemple #2
0
    print "=== {} ===".format(name)
    print "non-stationary value function:"
    print_value_function(V)
    print
    print "policy:"
    print_policy(policy)
    print
    print "============================================================"
    print
    print

# PROBLEM 1

# load MDP debug
mdp = MDP()
mdp.load_from_file('MDP_debug.txt')

# run finite horizon value iteration
H = 10
(V, policy) = MDPOptimization.finite_horizon_value_iteration(mdp, H)
print_helper(V, policy, "MDP Debug")

# PROBLEM 2

# load custom MDP
mdp = MDP()
mdp.load_from_file('MDP_custom.txt')

# run finite horizon value iteration
H = 10
(V, policy) = MDPOptimization.finite_horizon_value_iteration(mdp, H)