i = 0
    reward = None
    while not done:
        #print(env.t)
        #env.render()
        # For graph
        add2loc_map(env)
        #print(s)
        s_ = env.get_state(env.t_start + i + 1)
        v = s_[env.num_leading_cars * 3 + 0]
        x = s_[env.num_leading_cars * 3 + 1]
        a = s_[env.num_leading_cars * 3 + 2]
        #print(s_)
        #print(v)
        #print(a)
        s, reward, done, info = env.step(a, human=True)
        #print(s)
        #print(reward)
        #print()
        i = i + 1

    print(reward)
    rewards.append(reward)
    print(i)
print("HUMAN")
print("Average Reward:", np.mean(rewards))
print("Median Reward:", np.median(rewards))
print("SE Reward:", np.std(rewards) / (len(rewards))**0.5)
#print(rewards)
plt.hist(rewards, bins='auto')
plt.show()
Example #2
0
        reward = None
        t = 0
        while True:
            with torch.no_grad():
                #env.render()
                window.appendleft(torch.Tensor(state))
                action_probs = agent(deque2state(env)).detach().numpy()
                action = np.argmax(action_probs)
                a = (env.a_max - env.a_min) * (
                    (action) / (agent.action_size - 1)) + env.a_min
                #for i in range(agent.action_size):
                #print((env.a_max - env.a_min)*((i)/(agent.action_size - 1)) + env.a_min)
                #quit()
                #print(a)
                #input()
                next_state, reward, done, _ = env.step(a)
                # For Graph
                add2loc_map(env)
                #
                state = next_state
                t = t + 1
                if done:
                    break
        print(t)
        print(reward)
        rewards.append(reward)

print("Average Reward:", np.mean(rewards))
print("SE Reward:", np.std(rewards) / (len(rewards))**0.5)
quit()
#print(rewards)
    env.normalize = False
    #

    done = 0
    i = 0
    reward = None
    while not done:
        #print(env.t)
        #env.render()
        # For graph
        add2loc_map(env)
        #print(s)
        v, x, a = env.CACC(s,env.num_leading_cars)
        #print(v)
        #print(a)
        s, reward, done, info = env.step(a,controller="CACC")
        #print(reward)
        #print()
        i = i + 1

    #print(reward)
    results.append(env.results)
    #print(i)

# For CACC
#create_loc_map(env)

results_CACC = results

print("DONE with CACC")
#################################
Example #4
0
                #action_probs = agent(deque2state(env)).detach().numpy()
                #action = np.argmax(action_probs)
                #a = (env.a_max - env.a_min)*((action)/(agent.action_size - 1)) + env.a_min
                #for i in range(agent.action_size):
                #print((env.a_max - env.a_min)*((i)/(agent.action_size - 1)) + env.a_min)
                #quit()
                #print(a)
                #input()

                #v, x, a = env.CACC(state,env.num_leading_cars)
                a = env.get_state(env.t_start +
                                  env.t)[3 * num_leading_vehicle + 2]
                Replay_Buffer.appendleft([window, a])
                #print(v)

                next_state, reward, done, _ = env.step(a, human=True)
                # For Graph
                add2loc_map(env)
                #
                state = next_state
                t = t + 1
                if done:
                    break
        #print(t)
        #print(reward)
        rewards.append(reward)
        if (len(Replay_Buffer) > 2048):
            mimic_optimize(env, agent, Replay_Buffer, 2048)

    print("DONE")
    acc = 0
    env.normalize = False
    #

    done = 0
    i = 0
    reward = None
    while not done:
        #print(env.t)
        #env.render()
        # For graph
        add2loc_map(env)
        #print(s)
        v, x, a = env.CACC(s, env.num_leading_cars)
        #print(v)
        #a = min(-2,max(a,2))
        s, reward, done, info = env.step(a, controller="CACC")
        #print(reward)
        #print()
        i = i + 1

    print(reward)
    rewards.append(reward)
    print(i)
print("CACC")
print("Average Reward:", np.mean(rewards))
print("Median Reward:", np.median(rewards))
print("SE Reward:", np.std(rewards) / (len(rewards))**0.5)
#print(rewards)
plt.hist(rewards, bins='auto')
plt.show()