#
    env.normalize = True
    start_disp = env.center_state(env.current_states[0])
    env.normalize = False
    #

    done = 0
    i = 0
    reward = None
    while not done:
        #print(env.t)
        #env.render()
        # For graph
        add2loc_map(env)
        #print(s)
        s_ = env.get_state(env.t_start + i + 1)
        v = s_[env.num_leading_cars * 3 + 0]
        x = s_[env.num_leading_cars * 3 + 1]
        a = s_[env.num_leading_cars * 3 + 2]
        #print(s_)
        #print(v)
        #print(a)
        s, reward, done, info = env.step(a, human=True)
        #print(s)
        #print(reward)
        #print()
        i = i + 1

    print(reward)
    rewards.append(reward)
    print(i)
Beispiel #2
0
        t = 0
        while True:
            with torch.no_grad():
                #env.render()
                window.appendleft(torch.Tensor(state))
                #action_probs = agent(deque2state(env)).detach().numpy()
                #action = np.argmax(action_probs)
                #a = (env.a_max - env.a_min)*((action)/(agent.action_size - 1)) + env.a_min
                #for i in range(agent.action_size):
                #print((env.a_max - env.a_min)*((i)/(agent.action_size - 1)) + env.a_min)
                #quit()
                #print(a)
                #input()

                #v, x, a = env.CACC(state,env.num_leading_cars)
                a = env.get_state(env.t_start +
                                  env.t)[3 * num_leading_vehicle + 2]
                Replay_Buffer.appendleft([window, a])
                #print(v)

                next_state, reward, done, _ = env.step(a, human=True)
                # For Graph
                add2loc_map(env)
                #
                state = next_state
                t = t + 1
                if done:
                    break
        #print(t)
        #print(reward)
        rewards.append(reward)
        if (len(Replay_Buffer) > 2048):