Ejemplo n.º 1
0
def run(env_name='Ant-v2', num_steps=1000):
    env = gym.make(env_name)
    agent = Agent(env.observation_space, env.action_space)

    state = env.reset()
    reward = None
    done = False
    for _ in range(num_steps):
        env.render()
        action, _ = agent.act(state, reward, done)
        state, reward, done, info = env.step(action)
        print(reward)
        if done:
            state = env.reset()
Ejemplo n.º 2
0
# Quadcopter stands still at the ground and has as target a height of 150 above the sarting point
init_pos = np.array([0., 0., 0., 0., 0., 0.])
target_pos = np.array([0., 0., 150.])
task = Task(init_pose=init_pos, target_pos=target_pos, runtime=10.)
agent = Agent(task)

# save rewards for plotting
rewards = []
rotor_speeds_var = []

for i_episode in range(1, num_episodes + 1):
    state = agent.reset_episode()  # start a new episode
    step = 0
    while True:
        step += 1
        action = agent.act(state)
        next_state, reward, done = task.step(action)
        agent.step(action, reward, next_state, done)
        state = next_state
        if done:
            rewards.append(agent.score)
            rotor_speeds_var.append(np.var(action))
            print(
                "\r\nEp={:4d}, score={:7.3f} (top={:7.3f}) pos={} {} {} {} {} {} {}"
                .format(i_episode, agent.score, agent.top_score,
                        round(task.sim.pose[:3][0], 2),
                        round(task.sim.pose[:3][1], 2),
                        round(task.sim.pose[:3][2], 2),
                        round(task.sim.pose[3:6][0], 2),
                        round(task.sim.pose[3:6][1], 2),
                        round(task.sim.pose[3:6][2], 2),
    'rotor_speed1', 'rotor_speed2', 'rotor_speed3', 'rotor_speed4'
]
results = {l: [] for l in labels}

num_episodes = 500
target_pos = np.array([0., 0., 10.])
task = Task(init_pose=target_pos, target_pos=target_pos)
agent = Agent(task)
rewards = []

for i_episode in range(1, num_episodes + 1):
    state = agent.reset_episode()  # start a new episode
    ave_reward = 0
    cnt = 0
    while True:
        action = agent.act(state)
        next_state, reward, done = task.step(action)
        agent.step(action, reward, next_state, done)
        state = next_state
        ave_reward += reward
        cnt += 1
        if i_episode == 500:
            to_write = [task.sim.time] + list(task.sim.pose) + list(
                task.sim.v) + list(task.sim.angular_v) + list(rotor_speeds)
            for ii in range(len(labels)):
                results[labels[ii]].append(to_write[ii])
        if done:
            ave_reward /= cnt
            print(
                "\rEpisode = {:4d}, score = {:7.3f} (reward = {:7.3f})".format(
                    i_episode, agent.score, ave_reward),