def interact(env: Env, agent: Agent, start_obs: Arrayable) -> Tuple[array, array, array]: """One step interaction between env and agent. :args env: environment :args agent: agent :args start_obs: initial observation :return: (next observation, reward, terminal?) """ action = agent.step(start_obs) next_obs, reward, done, information = env.step(action) time_limit = information[ 'time_limit'] if 'time_limit' in information else None agent.observe(next_obs, reward, done, time_limit) return next_obs, reward, done
target_pos = np.array([0., 0., 150.]) task = Task(init_pose=init_pos, target_pos=target_pos, runtime=10.) agent = Agent(task) # save rewards for plotting rewards = [] rotor_speeds_var = [] for i_episode in range(1, num_episodes + 1): state = agent.reset_episode() # start a new episode step = 0 while True: step += 1 action = agent.act(state) next_state, reward, done = task.step(action) agent.step(action, reward, next_state, done) state = next_state if done: rewards.append(agent.score) rotor_speeds_var.append(np.var(action)) print( "\r\nEp={:4d}, score={:7.3f} (top={:7.3f}) pos={} {} {} {} {} {} {}" .format(i_episode, agent.score, agent.top_score, round(task.sim.pose[:3][0], 2), round(task.sim.pose[:3][1], 2), round(task.sim.pose[:3][2], 2), round(task.sim.pose[3:6][0], 2), round(task.sim.pose[3:6][1], 2), round(task.sim.pose[3:6][2], 2), round(abs(task.sim.v).sum()), 2), end="") # [debug]
#Results with the conditions of the quadcopter labels = [ 'time', 'x', 'y', 'z', 'phi', 'theta', 'psi', 'x_velocity', 'y_velocity', 'z_velocity', 'phi_velocity', 'theta_velocity', 'psi_velocity', 'rotor_speed1', 'rotor_speed2', 'rotor_speed3', 'rotor_speed4' ] results = {x: [] for x in labels} num_episodes = 1000 for i_episode in range(1, num_episodes + 1): state = agent.reset_episode() # start a new episode while True: action = agent.act(state) next_state, reward, done = takeoff.step(action) agent.step(reward, done) state = next_state to_write = [takeoff.sim.time] + list(takeoff.sim.pose) + list( takeoff.sim.v) + list(takeoff.sim.angular_v) + list(action) for ii in range(len(labels)): results[labels[ii]].append(to_write[ii]) if done: print( "\rEpisode = {:4d}, score = {:7.3f} (best = {:7.3f}), noise_scale = {}" .format(i_episode, agent.score, agent.best_score, agent.noise_scale), end="") # [debug] break sys.stdout.flush() ''' Shows the results of the control plt.plot(results['time'], results['x'], label='x')