Ejemplo n.º 1
0
def run_simulation(MDP, policy):
    print "Starting simulation for given MDP"

    while MDP.get_parked() == False:
        action = policy.choose_action(MDP.get_time())
        print "[TIME", MDP.get_time() ,"]:", policy.get_name(), "chose action", action
        MDP.take_action(action)
        print "[TIME", MDP.get_time() ,"]: Moved to state", MDP.get_state(), "Current reward %.3f." % MDP.get_reward()
    print "Exited in (spot, handicapped, available):", MDP.get_spot(), MDP.get_handicapped(), MDP.get_available()
Ejemplo n.º 2
0
def run_training(MDP, policy, horizon):
    t = 0
    trajectory = []
    while t < horizon:
        if MDP.get_parked():
            # we need to make it do one more update.
            action = policy.choose_training_action()
            state = MDP.get_state()
            trajectory.append((state, action, MDP.get_state()))
            #reset our simulator
            MDP.reset()
            policy.q_updates(trajectory)
            trajectory = []
        else:
            #record trajectory
            action = policy.choose_training_action()
            state = MDP.get_state()
            policy.take_action(action)
            trajectory.append((state, action, MDP.get_state()))
        t += 1
Ejemplo n.º 3
0
def run_simulation(MDP, policy):
    #print "Starting simulation for given MDP"
    while not MDP.get_parked():
        action = policy.choose_action(MDP.get_time())
        #print "[TIME", MDP.get_time() ,"]:", policy.get_name(), "chose action", action
        policy.take_action(action)