def run(env_name='Ant-v2', num_steps=1000): env = gym.make(env_name) agent = Agent(env.observation_space, env.action_space) state = env.reset() reward = None done = False for _ in range(num_steps): env.render() action, _ = agent.act(state, reward, done) state, reward, done, info = env.step(action) print(reward) if done: state = env.reset()
# Quadcopter stands still at the ground and has as target a height of 150 above the sarting point init_pos = np.array([0., 0., 0., 0., 0., 0.]) target_pos = np.array([0., 0., 150.]) task = Task(init_pose=init_pos, target_pos=target_pos, runtime=10.) agent = Agent(task) # save rewards for plotting rewards = [] rotor_speeds_var = [] for i_episode in range(1, num_episodes + 1): state = agent.reset_episode() # start a new episode step = 0 while True: step += 1 action = agent.act(state) next_state, reward, done = task.step(action) agent.step(action, reward, next_state, done) state = next_state if done: rewards.append(agent.score) rotor_speeds_var.append(np.var(action)) print( "\r\nEp={:4d}, score={:7.3f} (top={:7.3f}) pos={} {} {} {} {} {} {}" .format(i_episode, agent.score, agent.top_score, round(task.sim.pose[:3][0], 2), round(task.sim.pose[:3][1], 2), round(task.sim.pose[:3][2], 2), round(task.sim.pose[3:6][0], 2), round(task.sim.pose[3:6][1], 2), round(task.sim.pose[3:6][2], 2),
'rotor_speed1', 'rotor_speed2', 'rotor_speed3', 'rotor_speed4' ] results = {l: [] for l in labels} num_episodes = 500 target_pos = np.array([0., 0., 10.]) task = Task(init_pose=target_pos, target_pos=target_pos) agent = Agent(task) rewards = [] for i_episode in range(1, num_episodes + 1): state = agent.reset_episode() # start a new episode ave_reward = 0 cnt = 0 while True: action = agent.act(state) next_state, reward, done = task.step(action) agent.step(action, reward, next_state, done) state = next_state ave_reward += reward cnt += 1 if i_episode == 500: to_write = [task.sim.time] + list(task.sim.pose) + list( task.sim.v) + list(task.sim.angular_v) + list(rotor_speeds) for ii in range(len(labels)): results[labels[ii]].append(to_write[ii]) if done: ave_reward /= cnt print( "\rEpisode = {:4d}, score = {:7.3f} (reward = {:7.3f})".format( i_episode, agent.score, ave_reward),