i = 0 reward = None while not done: #print(env.t) #env.render() # For graph add2loc_map(env) #print(s) s_ = env.get_state(env.t_start + i + 1) v = s_[env.num_leading_cars * 3 + 0] x = s_[env.num_leading_cars * 3 + 1] a = s_[env.num_leading_cars * 3 + 2] #print(s_) #print(v) #print(a) s, reward, done, info = env.step(a, human=True) #print(s) #print(reward) #print() i = i + 1 print(reward) rewards.append(reward) print(i) print("HUMAN") print("Average Reward:", np.mean(rewards)) print("Median Reward:", np.median(rewards)) print("SE Reward:", np.std(rewards) / (len(rewards))**0.5) #print(rewards) plt.hist(rewards, bins='auto') plt.show()
reward = None t = 0 while True: with torch.no_grad(): #env.render() window.appendleft(torch.Tensor(state)) action_probs = agent(deque2state(env)).detach().numpy() action = np.argmax(action_probs) a = (env.a_max - env.a_min) * ( (action) / (agent.action_size - 1)) + env.a_min #for i in range(agent.action_size): #print((env.a_max - env.a_min)*((i)/(agent.action_size - 1)) + env.a_min) #quit() #print(a) #input() next_state, reward, done, _ = env.step(a) # For Graph add2loc_map(env) # state = next_state t = t + 1 if done: break print(t) print(reward) rewards.append(reward) print("Average Reward:", np.mean(rewards)) print("SE Reward:", np.std(rewards) / (len(rewards))**0.5) quit() #print(rewards)
env.normalize = False # done = 0 i = 0 reward = None while not done: #print(env.t) #env.render() # For graph add2loc_map(env) #print(s) v, x, a = env.CACC(s,env.num_leading_cars) #print(v) #print(a) s, reward, done, info = env.step(a,controller="CACC") #print(reward) #print() i = i + 1 #print(reward) results.append(env.results) #print(i) # For CACC #create_loc_map(env) results_CACC = results print("DONE with CACC") #################################
#action_probs = agent(deque2state(env)).detach().numpy() #action = np.argmax(action_probs) #a = (env.a_max - env.a_min)*((action)/(agent.action_size - 1)) + env.a_min #for i in range(agent.action_size): #print((env.a_max - env.a_min)*((i)/(agent.action_size - 1)) + env.a_min) #quit() #print(a) #input() #v, x, a = env.CACC(state,env.num_leading_cars) a = env.get_state(env.t_start + env.t)[3 * num_leading_vehicle + 2] Replay_Buffer.appendleft([window, a]) #print(v) next_state, reward, done, _ = env.step(a, human=True) # For Graph add2loc_map(env) # state = next_state t = t + 1 if done: break #print(t) #print(reward) rewards.append(reward) if (len(Replay_Buffer) > 2048): mimic_optimize(env, agent, Replay_Buffer, 2048) print("DONE") acc = 0
env.normalize = False # done = 0 i = 0 reward = None while not done: #print(env.t) #env.render() # For graph add2loc_map(env) #print(s) v, x, a = env.CACC(s, env.num_leading_cars) #print(v) #a = min(-2,max(a,2)) s, reward, done, info = env.step(a, controller="CACC") #print(reward) #print() i = i + 1 print(reward) rewards.append(reward) print(i) print("CACC") print("Average Reward:", np.mean(rewards)) print("Median Reward:", np.median(rewards)) print("SE Reward:", np.std(rewards) / (len(rewards))**0.5) #print(rewards) plt.hist(rewards, bins='auto') plt.show()