print "=== {} ===".format(name) print "value function:" print_parking_value_function(V, mdp) print print "policy:" print_parking_policy(policy, mdp) print print "============================================================" print print ### PROBLEM 2 mdp = MDP() # load MDP1 mdp.load_from_file('MDP1.txt') epsilon = 0.000001 # run infinite horizon value iteration and policy iteration beta = 0.1 (V, policy) = InfiniteHorizonPolicyOptimization.value_iteration(mdp, beta, epsilon) print_helper(V, policy, "MDP1 value iteration, beta={}, epsilon={}".format(beta, epsilon)) (V, policy) = InfiniteHorizonPolicyOptimization.policy_iteration(mdp, beta) print_helper(V, policy, "MDP1 policy iteration, beta={}".format(beta)) # run infinite horizon value iteration and policy iteration beta = 0.9 (V, policy) = InfiniteHorizonPolicyOptimization.value_iteration(mdp, beta, epsilon) print_helper(V, policy, "MDP1 value iteration, beta={}, epsilon={}".format(beta, epsilon)) (V, policy) = InfiniteHorizonPolicyOptimization.policy_iteration(mdp, beta)
print "=== {} ===".format(name) print "non-stationary value function:" print_value_function(V) print print "policy:" print_policy(policy) print print "============================================================" print print # PROBLEM 1 # load MDP debug mdp = MDP() mdp.load_from_file('MDP_debug.txt') # run finite horizon value iteration H = 10 (V, policy) = MDPOptimization.finite_horizon_value_iteration(mdp, H) print_helper(V, policy, "MDP Debug") # PROBLEM 2 # load custom MDP mdp = MDP() mdp.load_from_file('MDP_custom.txt') # run finite horizon value iteration H = 10 (V, policy) = MDPOptimization.finite_horizon_value_iteration(mdp, H)