예제 #1
0
def run_simulation(MDP, policy):
    print "Starting simulation for given MDP"

    while MDP.get_parked() == False:
        action = policy.choose_action(MDP.get_time())
        print "[TIME", MDP.get_time() ,"]:", policy.get_name(), "chose action", action
        MDP.take_action(action)
        print "[TIME", MDP.get_time() ,"]: Moved to state", MDP.get_state(), "Current reward %.3f." % MDP.get_reward()
    print "Exited in (spot, handicapped, available):", MDP.get_spot(), MDP.get_handicapped(), MDP.get_available()
예제 #2
0
def evaluate_policies(policy, MDP):
    total_reward, handicapped, crashed = 0,0,0
    num_sims = 10000
    for i in range(num_sims):
        run_simulation(MDP, policy)
        #maybe do something fancier
        total_reward += MDP.get_reward()
        if MDP.get_handicapped():
            handicapped += 1
        if not MDP.get_available():
            crashed += 1
        MDP.reset()
    print policy.get_name(), total_reward / num_sims, handicapped, crashed