track_number=track_number) agent = Agent(env) print("TORCS Experiment Start.") for i in range(episode_count): print("Episode : " + str(i)) if np.mod(i, 3) == 0: # Sometimes you need to relaunch TORCS because of the memory leak error ob = env.reset(relaunch=True) else: ob = env.reset() total_reward = 0. for j in range(max_steps): action = agent.act(ob) ob, reward, done, _ = env.step(action) # print(ob) total_reward += reward step += 1 if done: break print("TOTAL REWARD @ " + str(i) + " -th Episode : " + str(total_reward)) print("Total Step: " + str(step)) print("") env.end() # This is for shutting down TORCS print("Finish.")
print("TORCS Experiment Start.") for i in range(episode_count): print("Episode : " + str(i)) if np.mod(i, 3) == 0: # Sometimes you need to relaunch TORCS because of the memory leak error ob = env.reset(relaunch=True) else: ob = env.reset() # env.reset() return current state including # total_reward = 0. for j in range(max_steps): action = agent.act(ob, reward, done, vision) ob, reward, done, _ = env.step(action) total_reward += reward step += 1 if done: break print("TOTAL REWARD @ " + str(i) + " -th Episode : " + str(total_reward)) print("Total Step: " + str(step)) print("") env.end() # This is for shutting down TORCS print("Finish.")
agent = Agent(1) # steering only print("TORCS Experiment Start.") for i in range(episode_count): print("Episode : " + str(i)) if np.mod(i, 3) == 0: # Sometimes you need to relaunch TORCS because of the memory leak error ob = env.reset(relaunch=True) else: ob = env.reset() total_reward = 0. for j in range(max_steps): action = agent.act(ob, reward, done, vision) ob, reward, done, _ = env.step(action) #print(ob) total_reward += reward step += 1 if done: break print("TOTAL REWARD @ " + str(i) +" -th Episode : " + str(total_reward)) print("Total Step: " + str(step)) print("") env.end() # This is for shutting down TORCS print("Finish.")
if __name__ == "__main__": log_level = logging.INFO logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s', level=log_level, datefmt='%Y-%m-%d %H:%M:%S') logging.info("simstar env init") env = SimstarEnv(synronized_mode=True, speed_up=5, hz=1) agent = Agent(dim_action=3) logging.info("entering main loop") for ee in range(max_episodes): episode_total_reward = 0 logging.info("reset environment. eposde number: %d", ee + 1) time.sleep(2) observation = env.reset() for ii in range(max_steps): action = agent.act(observation, reward, done) action[2] = 0.0 print(ii, action) debug_action = [0.0, 1.0, 0.0] action = debug_action observation, reward, done, _ = env.step(action) episode_total_reward += reward #time.sleep(0.5) if (done): break env.clear()
ob = env.reset() total_reward = 0. states = np.array([[0, 0, 0]]) partial_delta_theta = np.array([np.zeros((n_action,n_states))]) J = 0 step = 0 delta_theta = 0 baseline_n = np.array([np.zeros((n_action,n_states))]) baseline_d = np.array([np.zeros((n_action,n_states))]) J_vector = np.array([[0]]) for step in range(max_steps): action, av_theta, ob_theta = agent.act(ob, reward, done, vision, theta) ob, reward, done, _ = env.step(action) #print("\n-------------------------------------------------------") #print(ob) #print("\n-------------------------------------------------------") total_reward += reward ## update the vector of trajectories states = np.append(states, [[ob_theta, action, av_theta]], axis=0 ) ## update performance J= J + (0.99**step) * (reward) #Compute delta_theta till the current partial sum delta_theta = delta_theta + compute_Delta_Theta(states)