Beispiel #1
0
               track_number=track_number)
agent = Agent(env)

print("TORCS Experiment Start.")
for i in range(episode_count):
    print("Episode : " + str(i))

    if np.mod(i, 3) == 0:
        # Sometimes you need to relaunch TORCS because of the memory leak error
        ob = env.reset(relaunch=True)
    else:
        ob = env.reset()

    total_reward = 0.
    for j in range(max_steps):
        action = agent.act(ob)

        ob, reward, done, _ = env.step(action)
        # print(ob)
        total_reward += reward

        step += 1
        if done:
            break

    print("TOTAL REWARD @ " + str(i) + " -th Episode  :  " + str(total_reward))
    print("Total Step: " + str(step))
    print("")

env.end()  # This is for shutting down TORCS
print("Finish.")
print("TORCS Experiment Start.")
for i in range(episode_count):
    print("Episode : " + str(i))

    if np.mod(i, 3) == 0:
        # Sometimes you need to relaunch TORCS because of the memory leak error
        ob = env.reset(relaunch=True)
    else:
        ob = env.reset()

    # env.reset() return current state including
    #

    total_reward = 0.
    for j in range(max_steps):
        action = agent.act(ob, reward, done, vision)

        ob, reward, done, _ = env.step(action)

        total_reward += reward

        step += 1
        if done:
            break

    print("TOTAL REWARD @ " + str(i) + " -th Episode  :  " + str(total_reward))
    print("Total Step: " + str(step))
    print("")

env.end()  # This is for shutting down TORCS
print("Finish.")
Beispiel #3
0
agent = Agent(1)  # steering only


print("TORCS Experiment Start.")
for i in range(episode_count):
    print("Episode : " + str(i))

    if np.mod(i, 3) == 0:
        # Sometimes you need to relaunch TORCS because of the memory leak error
        ob = env.reset(relaunch=True)
    else:
        ob = env.reset()

    total_reward = 0.
    for j in range(max_steps):
        action = agent.act(ob, reward, done, vision)

        ob, reward, done, _ = env.step(action)
        #print(ob)
        total_reward += reward

        step += 1
        if done:
            break

    print("TOTAL REWARD @ " + str(i) +" -th Episode  :  " + str(total_reward))
    print("Total Step: " + str(step))
    print("")

env.end()  # This is for shutting down TORCS
print("Finish.")
if __name__ == "__main__":
    log_level = logging.INFO

    logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s',
                        level=log_level,
                        datefmt='%Y-%m-%d %H:%M:%S')

    logging.info("simstar env init")
    env = SimstarEnv(synronized_mode=True, speed_up=5, hz=1)
    agent = Agent(dim_action=3)

    logging.info("entering main loop")
    for ee in range(max_episodes):

        episode_total_reward = 0
        logging.info("reset environment. eposde number: %d", ee + 1)
        time.sleep(2)
        observation = env.reset()
        for ii in range(max_steps):
            action = agent.act(observation, reward, done)
            action[2] = 0.0
            print(ii, action)
            debug_action = [0.0, 1.0, 0.0]
            action = debug_action
            observation, reward, done, _ = env.step(action)
            episode_total_reward += reward
            #time.sleep(0.5)
            if (done):
                break

env.clear()
Beispiel #5
0
        ob = env.reset()


    total_reward = 0.
    states = np.array([[0, 0, 0]])
    partial_delta_theta = np.array([np.zeros((n_action,n_states))])
    J = 0
    step = 0
    delta_theta = 0
    baseline_n = np.array([np.zeros((n_action,n_states))])
    baseline_d = np.array([np.zeros((n_action,n_states))])
    J_vector = np.array([[0]])

    for step in range(max_steps):

        action, av_theta, ob_theta = agent.act(ob, reward, done, vision, theta)
        ob, reward, done, _ = env.step(action)

        #print("\n-------------------------------------------------------")
        #print(ob)
        #print("\n-------------------------------------------------------")
        total_reward += reward

        ## update the vector of trajectories
        states = np.append(states, [[ob_theta, action, av_theta]], axis=0 )
        ## update performance
        J= J + (0.99**step) * (reward) 

        #Compute delta_theta till the current partial sum
        delta_theta = delta_theta + compute_Delta_Theta(states)