def evaluate_policy(self, total_steps): """ Evaluate policy Args: total_steps (int): Current total steps of training """ avg_test_return = 0. avg_test_steps = 0 if self._save_test_path: replay_buffer = get_replay_buffer(self._policy, self._test_env, size=self._episode_max_steps) for i in range(self._test_episodes): episode_return = 0. frames = [] obs = self._test_env.reset() avg_test_steps += 1 for _ in range(self._episode_max_steps): if self._normalize_obs: obs = self._obs_normalizer(obs, update=False) act, _ = self._policy.get_action(obs, test=True) act = (act if is_discrete(self._env.action_space) else np.clip( act, self._env.action_space.low, self._env.action_space.high)) next_obs, reward, done, _ = self._test_env.step(act) avg_test_steps += 1 if self._save_test_path: replay_buffer.add(obs=obs, act=act, next_obs=next_obs, rew=reward, done=done) if self._save_test_movie: frames.append(self._test_env.render(mode='rgb_array')) elif self._show_test_progress: self._test_env.render() episode_return += reward obs = next_obs if done: break prefix = "step_{0:08d}_epi_{1:02d}_return_{2:010.4f}".format( total_steps, i, episode_return) if self._save_test_path: save_path(replay_buffer.sample(self._episode_max_steps), os.path.join(self._output_dir, prefix + ".pkl")) replay_buffer.clear() if self._save_test_movie: frames_to_gif(frames, prefix, self._output_dir) avg_test_return += episode_return if self._show_test_images: images = tf.cast( tf.expand_dims(np.array(obs).transpose(2, 0, 1), axis=3), tf.uint8) tf.summary.image( 'train/input_img', images, ) return avg_test_return / self._test_episodes, avg_test_steps / self._test_episodes
def evaluate_policy(self, total_steps): tf.summary.experimental.set_step(total_steps) if self._normalize_obs: self._test_env.normalizer.set_params( *self._env.normalizer.get_params()) avg_test_return = 0. avg_test_steps = 0 if self._save_test_path: replay_buffer = get_replay_buffer(self._policy, self._test_env, size=self._episode_max_steps) for i in range(self._test_episodes): episode_return = 0. frames = [] obs = self._test_env.reset() avg_test_steps += 1 for _ in range(self._episode_max_steps): action = self._policy.get_action(obs, test=True) next_obs, reward, done, _ = self._test_env.step(action) avg_test_steps += 1 if self._save_test_path: replay_buffer.add(obs=obs, act=action, next_obs=next_obs, rew=reward, done=done) if self._save_test_movie: frames.append(self._test_env.render(mode='rgb_array')) elif self._show_test_progress: self._test_env.render() episode_return += reward obs = next_obs if done: break prefix = "step_{0:08d}_epi_{1:02d}_return_{2:010.4f}".format( total_steps, i, episode_return) if self._save_test_path: save_path( replay_buffer._encode_sample( np.arange(self._episode_max_steps)), os.path.join(self._output_dir, prefix + ".pkl")) replay_buffer.clear() if self._save_test_movie: frames_to_gif(frames, prefix, self._output_dir) avg_test_return += episode_return if self._show_test_images: images = tf.cast( tf.expand_dims(np.array(obs).transpose(2, 0, 1), axis=3), tf.uint8) tf.summary.image( 'train/input_img', images, ) return avg_test_return / self._test_episodes, avg_test_steps / self._test_episodes
def evaluate_policy(self, total_steps): """evaluating the policy.""" tf.summary.experimental.set_step(total_steps) total_test_return = 0. success_traj = 0 if self._save_test_path: replay_buffer = get_replay_buffer( self._policy, self._test_env, size=self._episode_max_steps) straight_line_episode = 0 no_straight_line_episode = 0 success_traj_straight_line = 0 success_traj_no_straight_line = 0 for i in range(self._test_episodes): episode_return = 0. frames = [] workspace, goal, obs = self._test_env.reset() start = obs reduced_workspace = self._CAE.evaluate(workspace) #Concatenate position observation with start, goal, and reduced workspace!! obs_full = np.concatenate((obs, goal, reduced_workspace)) for _ in range(self._episode_max_steps): action = self._policy.get_action(obs_full) next_obs, reward, done, _ = self._test_env.step(action) #Concatenate position observation with start, goal, and reduced workspace!! next_obs_full = np.concatenate((obs, goal, reduced_workspace)) # Add obersvation to the trajectory storage self.trajectory.append({'workspace': workspace,'position': obs, 'next_position': next_obs,'goal': goal, 'action': action, 'reward': reward, 'done': done}) if self._save_test_path: replay_buffer.add(obs=obs_full, act=action, next_obs=next_obs_full, rew=reward, done=done) if self._save_test_movie: frames.append(self._test_env.render(mode='plot')) elif self._show_test_progress: self._test_env.render() episode_return += reward obs = next_obs obs_full = next_obs_full if done: break prefix = "step_{0:08d}_epi_{1:02d}_return_{2:010.4f}".format( total_steps, i, episode_return) if self._save_test_path: save_path(replay_buffer._encode_sample(np.arange(self._episode_max_steps)), os.path.join(self._output_dir, prefix + ".pkl")) replay_buffer.clear() if self._save_test_movie: frames_to_gif(frames, prefix, self._output_dir) if self._save_test_path_sep: self._save_traj_separately(prefix) total_test_return += episode_return if straight_line_feasible(workspace, start, goal, self._test_env): straight_line_episode += 1 if reward == self._test_env.goal_reward: success_traj_straight_line += 1 else: no_straight_line_episode += 1 if reward == self._test_env.goal_reward: success_traj_no_straight_line += 1 if reward == self._test_env.goal_reward: success_traj += 1 # empty trajectory: self.trajectory = [] if self._show_test_images: images = tf.cast( tf.expand_dims(np.array(obs).transpose(2, 0, 1), axis=2), tf.uint8) tf.summary.image('train/input_img', images,) avg_test_return = total_test_return / self._test_episodes success_rate = success_traj / self._test_episodes if straight_line_episode > 0: success_rate_straight_line = success_traj_straight_line/straight_line_episode else: success_rate_straight_line = 0 if no_straight_line_episode > 0: success_rate_no_straight_line = success_traj_no_straight_line/no_straight_line_episode else: success_rate_no_straight_line = 0 ratio_straight_lines = straight_line_episode/ self._test_episodes return avg_test_return, success_rate, ratio_straight_lines, success_rate_straight_line, success_rate_no_straight_line