def run_simulation(MDP, policy): print "Starting simulation for given MDP" while MDP.get_parked() == False: action = policy.choose_action(MDP.get_time()) print "[TIME", MDP.get_time() ,"]:", policy.get_name(), "chose action", action MDP.take_action(action) print "[TIME", MDP.get_time() ,"]: Moved to state", MDP.get_state(), "Current reward %.3f." % MDP.get_reward() print "Exited in (spot, handicapped, available):", MDP.get_spot(), MDP.get_handicapped(), MDP.get_available()
def evaluate_policies(policy, MDP): total_reward, handicapped, crashed = 0,0,0 num_sims = 10000 for i in range(num_sims): run_simulation(MDP, policy) #maybe do something fancier total_reward += MDP.get_reward() if MDP.get_handicapped(): handicapped += 1 if not MDP.get_available(): crashed += 1 MDP.reset() print policy.get_name(), total_reward / num_sims, handicapped, crashed