def get_frames(env, batch_size=30): rgb_frames = [] observations = env.reset() frame = observations_to_image(observations[0], []) rgb_frames.append(frame) dones = [False] while dones[0] == False: outputs = env.step([env.action_spaces[0].sample()]) observations, rewards, dones, infos = [list(x) for x in zip(*outputs)] frame = observations_to_image(observations[0], []) rgb_frames.append(frame) return rgb_frames
def _store_video_frame( self, obs: Observations, action: Optional[Union[np.ndarray, float]] = None, info: Optional[Dict[str, Any]] = None, ) -> None: new_obs = obs.copy() for key in ['image', 'depth']: if key not in new_obs: continue if new_obs[key].shape[0] < 200: # upscale image to make the resulting video more viewable new_obs[key] = np.repeat(np.repeat(new_obs[key], 4, axis=0), 4, axis=1) if action: act = action * 0.9 img_size = new_obs['image'].shape[0] start = img_size / 2 end = img_size * (1 - act) / 2 left = round(min(start, end)) right = round(max(start, end)) new_obs['image'][round(img_size * 0.9):round(img_size * 0.95), round(left):round(right)] = np.array([0, 0, 255]) new_obs['rgb'] = new_obs.pop('image') self._rgb_frames.append(observations_to_image(new_obs, info or {}))
def test_different_dim_observations_to_image(): observations = { "1_rgb": np.random.rand(512, 512, 3), "2_rgb": np.random.rand(418, 418, 3), "1_depth": np.random.rand(128, 128, 1), "2_depth": np.random.rand(128, 128, 1), } info = { "collisions": { "is_collision": True }, "top_down_map": { "map": np.random.randint(low=0, high=255, size=(300, 300)), "fog_of_war_mask": np.random.randint(low=0, high=1, size=(300, 300)), "agent_map_coord": (10, 10), "agent_angle": np.random.random(), }, } image = observations_to_image(observations, info) assert image.shape == ( 512, 1570, 3, ), "Resulted image resolution doesn't match."
def render(self, mode='rgb_array'): if mode == "rgb_array": frame = observations_to_image( self.env._last_full_obs, self.env.unwrapped._env.get_metrics()) else: raise ValueError(f"Render mode {mode} not currently supported.") self._viewer.display(frame)
def save_map(observations, info, images): im = observations_to_image(observations, info) top_down_map = draw_top_down_map(info, im.shape[0]) output_im = im output_im = append_text_to_image(output_im, observations["instruction"]["text"]) images.append(output_im)
def render(self, mode: str = "rgb_array") -> np.ndarray: frame = None if mode == "rgb_array": frame = observations_to_image(self._last_obs, self._env._env.get_metrics()) else: raise ValueError(f"Render mode {mode} not currently supported.") return frame
def following(config, env, keys): observation = env.reset() frames = list() audios = list() for key in keys: if key == 'w': # w action = HabitatSimActions.MOVE_FORWARD elif key == 'a': # a action = HabitatSimActions.TURN_LEFT elif key == 'd': # d action = HabitatSimActions.TURN_RIGHT elif key == 'f': # f action = HabitatSimActions.STOP # --- Game logic should go here observation, reward, done, info = env.step(**{'action': action}) if env.get_done(None): break if config.TASK_CONFIG.SIMULATOR.CONTINUOUS_VIEW_CHANGE and 'intermediate' in observation: for obs in observation['intermediate']: frame = observations_to_image(obs, info) frames.append(frame) frame = observations_to_image(observation, info) frames.append(frame) audio = observation['audiogoal'] audios.append(audio) env.close() # write frames and audio into videos video_dir = 'data/visualizations/demo' video_name = 'demo' fps = config.TASK_CONFIG.SIMULATOR.VIEW_CHANGE_FPS \ if config.TASK_CONFIG.SIMULATOR.CONTINUOUS_VIEW_CHANGE else 1 images_to_video_with_audio( frames, video_dir, video_name, audios, sr=config.TASK_CONFIG.SIMULATOR.AUDIO.RIR_SAMPLING_RATE, fps=fps)
def step(self, *args, **kwargs): intermediate_goal = kwargs["action"] self._previous_action = intermediate_goal goal = self.planner.get_map_coordinates(intermediate_goal) stop = int(self._config.TASK_CONFIG.TASK.ACTION_MAP.MAP_SIZE**2 // 2) == intermediate_goal observation = self._previous_observation cumulative_reward = 0 done = False reaching_waypoint = False cant_reach_waypoint = False if len(self._config.VIDEO_OPTION) > 0: rgb_frames = list() audios = list() for step_count in range(self._config.PREDICTION_INTERVAL): if step_count != 0 and not self.planner.check_navigability(goal): cant_reach_waypoint = True break action = self.planner.plan(observation, goal, stop=stop) observation, reward, done, info = super().step({"action": action}) if len(self._config.VIDEO_OPTION) > 0: if "rgb" not in observation: observation["rgb"] = np.zeros( (self.config.DISPLAY_RESOLUTION, self.config.DISPLAY_RESOLUTION, 3)) frame = observations_to_image(observation, info) rgb_frames.append(frame) audios.append(observation['audiogoal']) cumulative_reward += reward if done: self.planner.reset() observation = self.reset() break else: self.planner.update_map_and_graph(observation) # reaching intermediate goal x, y = self.planner.mapper.get_maps_and_agent_pose()[2:4] if (x - goal[0]) == (y - goal[1]) == 0: reaching_waypoint = True break if not done: self.planner.add_maps_to_observation(observation) self._previous_observation = observation info['reaching_waypoint'] = done or reaching_waypoint info['cant_reach_waypoint'] = cant_reach_waypoint if len(self._config.VIDEO_OPTION) > 0: assert len(rgb_frames) != 0 info['rgb_frames'] = rgb_frames info['audios'] = audios return observation, cumulative_reward, done, info
def interactive_demo(config, env): # Set the width and height of the screen [width, height] pygame.init() size = (728, 256) screen = pygame.display.set_mode(size) pygame.display.set_caption("Interactive Demo") # Loop until the user clicks the close button. done = False # Used to manage how fast the screen updates clock = pygame.time.Clock() frames = list() audios = list() observation = env.reset() rgb_image = np.swapaxes(observation['rgb'], 0, 1) # screen.blit(pygame.surfarray.make_surface(rgb_image), (0, 0)) pygame.display.flip() # -------- Main Program Loop ----------- keys = [] while not done: # --- Main event loop def wait(): while True: for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() sys.exit() if event.type == pygame.KEYDOWN: action = None if event.key == pygame.K_w: # w action = HabitatSimActions.MOVE_FORWARD keys.append('w') elif event.key == pygame.K_a: # a action = HabitatSimActions.TURN_LEFT keys.append('a') elif event.key == pygame.K_d: # d action = HabitatSimActions.TURN_RIGHT keys.append('d') elif event.key == pygame.K_f: # f action = HabitatSimActions.STOP keys.append('f') if action is not None: return action action = wait() # --- Game logic should go here observation, reward, done, info = env.step(**{'action': action}) if env.get_done(None): # observation = env.reset() break if config.TASK_CONFIG.SIMULATOR.CONTINUOUS_VIEW_CHANGE and 'intermediate' in observation: for obs in observation['intermediate']: frame = observations_to_image(obs, info) frames.append(frame) frame = observations_to_image(observation, info) frames.append(frame) frame = np.swapaxes(frame, 0, 1) audio = observation['audiogoal'] audios.append(audio) # Here, we clear the screen to white. Don't put other drawing commands # above this, or they will be erased with this command. screen.fill((255, 255, 255)) screen.blit(pygame.surfarray.make_surface(frame), (0, 0)) # smaller_frame = block_reduce(frame, block_size=(down_sampling, down_sampling, 1), func=np.mean) # screen.blit(pygame.surfarray.make_surface(smaller_frame), (0, 0)) # play sound # temp_file = 'data/temp/temp.wav' # sr = config.TASK_CONFIG.SIMULATOR.AUDIO.RIR_SAMPLING_RATE # audio = np.int16(audio * 32767).T # wavfile.write(temp_file, sr, audio) # wavfile.write(temp_file, sr, audio.T) # pygame.mixer.music.load(temp_file) # pygame.mixer.music.play(-1) # --- Go ahead and update the screen with what we've drawn. pygame.display.flip() # --- Limit to 60 frames per second clock.tick(1) # Close the window and quit. pygame.quit() env.close() print('Keys: {}'.format(','.join(keys))) # write frames and audio into videos video_dir = 'data/visualizations/demo' video_name = 'demo' fps = config.TASK_CONFIG.SIMULATOR.VIEW_CHANGE_FPS \ if config.TASK_CONFIG.SIMULATOR.CONTINUOUS_VIEW_CHANGE else 1 images_to_video_with_audio( frames, video_dir, video_name, audios, sr=config.TASK_CONFIG.SIMULATOR.AUDIO.RIR_SAMPLING_RATE, fps=fps)
def _eval_checkpoint( self, checkpoint_path: str, writer: TensorboardWriter, checkpoint_index: int = 0, ) -> None: r"""Evaluates a single checkpoint. Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard checkpoint_index: index of cur checkpoint for logging Returns: None """ ckpt_dict = self.load_checkpoint(checkpoint_path, map_location=self.device) config = self._setup_eval_config(ckpt_dict["config"]) ppo_cfg = config.RL.PPO if len(self.config.VIDEO_OPTION) > 0: config.defrost() config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS") config.freeze() logger.info(f"env config: {config}") self.envs = construct_envs(self.config, get_env_class(self.config.ENV_NAME)) self._setup_actor_critic_agent(ppo_cfg) self.agent.load_state_dict(ckpt_dict["state_dict"]) self.actor_critic = self.agent.actor_critic # get name of performance metric, e.g. "spl" metric_name = self.config.TASK_CONFIG.TASK.MEASUREMENTS[0] metric_cfg = getattr(self.config.TASK_CONFIG.TASK, metric_name) measure_type = baseline_registry.get_measure(metric_cfg.TYPE) assert measure_type is not None, "invalid measurement type {}".format( metric_cfg.TYPE) self.metric_uuid = measure_type(None, None)._get_uuid() observations = self.envs.reset() batch = batch_obs(observations) for sensor in batch: batch[sensor] = batch[sensor].to(self.device) current_episode_reward = torch.zeros(self.envs.num_envs, 1, device=self.device) test_recurrent_hidden_states = torch.zeros( self.actor_critic.net.num_recurrent_layers, self.config.NUM_PROCESSES, ppo_cfg.hidden_size, device=self.device, ) prev_actions = torch.zeros(self.config.NUM_PROCESSES, 1, device=self.device, dtype=torch.long) not_done_masks = torch.zeros(self.config.NUM_PROCESSES, 1, device=self.device) stats_episodes = dict() # dict of dicts that stores stats per episode rgb_frames = [ [] ] * self.config.NUM_PROCESSES # type: List[List[np.ndarray]] if len(self.config.VIDEO_OPTION) > 0: os.makedirs(self.config.VIDEO_DIR, exist_ok=True) while (len(stats_episodes) < self.config.TEST_EPISODE_COUNT and self.envs.num_envs > 0): current_episodes = self.envs.current_episodes() with torch.no_grad(): _, actions, _, test_recurrent_hidden_states = self.actor_critic.act( batch, test_recurrent_hidden_states, prev_actions, not_done_masks, deterministic=False, ) prev_actions.copy_(actions) outputs = self.envs.step([a[0].item() for a in actions]) observations, rewards, dones, infos = [ list(x) for x in zip(*outputs) ] batch = batch_obs(observations) for sensor in batch: batch[sensor] = batch[sensor].to(self.device) not_done_masks = torch.tensor( [[0.0] if done else [1.0] for done in dones], dtype=torch.float, device=self.device, ) rewards = torch.tensor(rewards, dtype=torch.float, device=self.device).unsqueeze(1) current_episode_reward += rewards next_episodes = self.envs.current_episodes() envs_to_pause = [] n_envs = self.envs.num_envs for i in range(n_envs): if ( next_episodes[i].scene_id, next_episodes[i].episode_id, ) in stats_episodes: envs_to_pause.append(i) # episode ended if not_done_masks[i].item() == 0: episode_stats = dict() episode_stats[self.metric_uuid] = infos[i][ self.metric_uuid] episode_stats["success"] = int( infos[i][self.metric_uuid] > 0) episode_stats["reward"] = current_episode_reward[i].item() current_episode_reward[i] = 0 # use scene_id + episode_id as unique id for storing stats stats_episodes[( current_episodes[i].scene_id, current_episodes[i].episode_id, )] = episode_stats if len(self.config.VIDEO_OPTION) > 0: generate_video( video_option=self.config.VIDEO_OPTION, video_dir=self.config.VIDEO_DIR, images=rgb_frames[i], episode_id=current_episodes[i].episode_id, checkpoint_idx=checkpoint_index, metric_name=self.metric_uuid, metric_value=infos[i][self.metric_uuid], tb_writer=writer, ) rgb_frames[i] = [] # episode continues elif len(self.config.VIDEO_OPTION) > 0: frame = observations_to_image(observations[i], infos[i]) rgb_frames[i].append(frame) # pausing self.envs with no new episode if len(envs_to_pause) > 0: state_index = list(range(self.envs.num_envs)) for idx in reversed(envs_to_pause): state_index.pop(idx) self.envs.pause_at(idx) # indexing along the batch dimensions test_recurrent_hidden_states = test_recurrent_hidden_states[ state_index] not_done_masks = not_done_masks[state_index] current_episode_reward = current_episode_reward[state_index] prev_actions = prev_actions[state_index] for k, v in batch.items(): batch[k] = v[state_index] if len(self.config.VIDEO_OPTION) > 0: rgb_frames = [rgb_frames[i] for i in state_index] aggregated_stats = dict() for stat_key in next(iter(stats_episodes.values())).keys(): aggregated_stats[stat_key] = sum( [v[stat_key] for v in stats_episodes.values()]) num_episodes = len(stats_episodes) episode_reward_mean = aggregated_stats["reward"] / num_episodes episode_metric_mean = aggregated_stats[self.metric_uuid] / num_episodes episode_success_mean = aggregated_stats["success"] / num_episodes logger.info(f"Average episode reward: {episode_reward_mean:.6f}") logger.info(f"Average episode success: {episode_success_mean:.6f}") logger.info( f"Average episode {self.metric_uuid}: {episode_metric_mean:.6f}") writer.add_scalars( "eval_reward", {"average reward": episode_reward_mean}, checkpoint_index, ) writer.add_scalars( f"eval_{self.metric_uuid}", {f"average {self.metric_uuid}": episode_metric_mean}, checkpoint_index, ) writer.add_scalars( "eval_success", {"average success": episode_success_mean}, checkpoint_index, ) self.envs.close()
def _eval_checkpoint(self, checkpoint_path: str, writer: TensorboardWriter, checkpoint_index: int = 0, log_diagnostics=[], output_dir='.', label='.', num_eval_runs=1) -> None: r"""Evaluates a single checkpoint. Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard checkpoint_index: index of cur checkpoint for logging Returns: None """ if checkpoint_index == -1: ckpt_file = checkpoint_path.split('/')[-1] split_info = ckpt_file.split('.') checkpoint_index = split_info[1] # Map location CPU is almost always better than mapping to a CUDA device. ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu") if self.config.EVAL.USE_CKPT_CONFIG: config = self._setup_eval_config(ckpt_dict["config"]) else: config = self.config.clone() ppo_cfg = config.RL.PPO task_cfg = config.TASK_CONFIG.TASK config.defrost() config.TASK_CONFIG.DATASET.SPLIT = config.EVAL.SPLIT config.freeze() if len(self.config.VIDEO_OPTION) > 0: config.defrost() config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS") config.freeze() logger.info(f"env config: {config}") self.envs = construct_envs(config, get_env_class(config.ENV_NAME)) # pass in aux config if we're doing attention aux_cfg = self.config.RL.AUX_TASKS self._setup_actor_critic_agent(ppo_cfg, task_cfg, aux_cfg) # Check if we accidentally recorded `visual_resnet` in our checkpoint and drop it (it's redundant with `visual_encoder`) ckpt_dict['state_dict'] = { k: v for k, v in ckpt_dict['state_dict'].items() if 'visual_resnet' not in k } self.agent.load_state_dict(ckpt_dict["state_dict"]) logger.info("agent number of trainable parameters: {}".format( sum(param.numel() for param in self.agent.parameters() if param.requires_grad))) self.actor_critic = self.agent.actor_critic observations = self.envs.reset() batch = batch_obs(observations, device=self.device) current_episode_reward = torch.zeros(self.envs.num_envs, 1, device=self.device) test_recurrent_hidden_states = torch.zeros( self.actor_critic.net.num_recurrent_layers, self.config.NUM_PROCESSES, ppo_cfg.hidden_size, device=self.device, ) _, num_recurrent_memories, _ = self._setup_auxiliary_tasks( aux_cfg, ppo_cfg, task_cfg, is_eval=True) if self.config.RL.PPO.policy in MULTIPLE_BELIEF_CLASSES: aux_tasks = self.config.RL.AUX_TASKS.tasks num_recurrent_memories = len(self.config.RL.AUX_TASKS.tasks) test_recurrent_hidden_states = test_recurrent_hidden_states.unsqueeze( 2).repeat(1, 1, num_recurrent_memories, 1) prev_actions = torch.zeros(self.config.NUM_PROCESSES, 1, device=self.device, dtype=torch.long) not_done_masks = torch.zeros(self.config.NUM_PROCESSES, 1, device=self.device) stats_episodes = dict() # dict of dicts that stores stats per episode rgb_frames = [[] for _ in range(self.config.NUM_PROCESSES) ] # type: List[List[np.ndarray]] if len(self.config.VIDEO_OPTION) > 0: os.makedirs(self.config.VIDEO_DIR, exist_ok=True) number_of_eval_episodes = self.config.TEST_EPISODE_COUNT if number_of_eval_episodes == -1: number_of_eval_episodes = sum(self.envs.number_of_episodes) else: total_num_eps = sum(self.envs.number_of_episodes) if total_num_eps < number_of_eval_episodes: logger.warn( f"Config specified {number_of_eval_episodes} eval episodes" ", dataset only has {total_num_eps}.") logger.warn(f"Evaluating with {total_num_eps} instead.") number_of_eval_episodes = total_num_eps videos_cap = 2 # number of videos to generate per checkpoint if len(log_diagnostics) > 0: videos_cap = 10 # video_indices = random.sample(range(self.config.TEST_EPISODE_COUNT), # min(videos_cap, self.config.TEST_EPISODE_COUNT)) video_indices = range(10) print(f"Videos: {video_indices}") total_stats = [] dones_per_ep = dict() # Logging more extensive evaluation stats for analysis if len(log_diagnostics) > 0: d_stats = {} for d in log_diagnostics: d_stats[d] = [ [] for _ in range(self.config.NUM_PROCESSES) ] # stored as nested list envs x timesteps x k (# tasks) pbar = tqdm.tqdm(total=number_of_eval_episodes * num_eval_runs) self.agent.eval() while (len(stats_episodes) < number_of_eval_episodes * num_eval_runs and self.envs.num_envs > 0): current_episodes = self.envs.current_episodes() with torch.no_grad(): weights_output = None if self.config.RL.PPO.policy in MULTIPLE_BELIEF_CLASSES: weights_output = torch.empty(self.envs.num_envs, len(aux_tasks)) ( _, actions, _, test_recurrent_hidden_states, ) = self.actor_critic.act(batch, test_recurrent_hidden_states, prev_actions, not_done_masks, deterministic=False, weights_output=weights_output) prev_actions.copy_(actions) for i in range(self.envs.num_envs): if Diagnostics.actions in log_diagnostics: d_stats[Diagnostics.actions][i].append( prev_actions[i].item()) if Diagnostics.weights in log_diagnostics: aux_weights = None if weights_output is None else weights_output[ i] if aux_weights is not None: d_stats[Diagnostics.weights][i].append( aux_weights.half().tolist()) outputs = self.envs.step([a[0].item() for a in actions]) observations, rewards, dones, infos = [ list(x) for x in zip(*outputs) ] batch = batch_obs(observations, device=self.device) not_done_masks = torch.tensor( [[0.0] if done else [1.0] for done in dones], dtype=torch.float, device=self.device, ) rewards = torch.tensor(rewards, dtype=torch.float, device=self.device).unsqueeze(1) current_episode_reward += rewards next_episodes = self.envs.current_episodes() envs_to_pause = [] n_envs = self.envs.num_envs for i in range(n_envs): next_k = ( next_episodes[i].scene_id, next_episodes[i].episode_id, ) if dones_per_ep.get(next_k, 0) == num_eval_runs: envs_to_pause.append(i) # wait for the rest if not_done_masks[i].item() == 0: episode_stats = dict() episode_stats["reward"] = current_episode_reward[i].item() episode_stats.update( self._extract_scalars_from_info(infos[i])) current_episode_reward[i] = 0 # use scene_id + episode_id as unique id for storing stats k = ( current_episodes[i].scene_id, current_episodes[i].episode_id, ) dones_per_ep[k] = dones_per_ep.get(k, 0) + 1 if dones_per_ep.get(k, 0) == 1 and len( self.config.VIDEO_OPTION) > 0 and len( stats_episodes) in video_indices: logger.info(f"Generating video {len(stats_episodes)}") category = getattr(current_episodes[i], "object_category", "") if category != "": category += "_" try: generate_video( video_option=self.config.VIDEO_OPTION, video_dir=self.config.VIDEO_DIR, images=rgb_frames[i], episode_id=current_episodes[i].episode_id, checkpoint_idx=checkpoint_index, metrics=self._extract_scalars_from_info( infos[i]), tag=f"{category}{label}", tb_writer=writer, ) except Exception as e: logger.warning(str(e)) rgb_frames[i] = [] stats_episodes[( current_episodes[i].scene_id, current_episodes[i].episode_id, dones_per_ep[k], )] = episode_stats if len(log_diagnostics) > 0: diagnostic_info = dict() for metric in log_diagnostics: diagnostic_info[metric] = d_stats[metric][i] d_stats[metric][i] = [] if Diagnostics.top_down_map in log_diagnostics: top_down_map = torch.tensor([]) if len(self.config.VIDEO_OPTION) > 0: top_down_map = infos[i]["top_down_map"]["map"] top_down_map = maps.colorize_topdown_map( top_down_map, fog_of_war_mask=None) diagnostic_info.update( dict(top_down_map=top_down_map)) total_stats.append( dict( stats=episode_stats, did_stop=bool(prev_actions[i] == 0), episode_info=attr.asdict(current_episodes[i]), info=diagnostic_info, )) pbar.update() # episode continues else: if len(self.config.VIDEO_OPTION) > 0: aux_weights = None if weights_output is None else weights_output[ i] frame = observations_to_image( observations[i], infos[i], current_episode_reward[i].item(), aux_weights, aux_tasks) rgb_frames[i].append(frame) if Diagnostics.gps in log_diagnostics: d_stats[Diagnostics.gps][i].append( observations[i]["gps"].tolist()) if Diagnostics.heading in log_diagnostics: d_stats[Diagnostics.heading][i].append( observations[i]["heading"].tolist()) ( self.envs, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) = self._pause_envs( envs_to_pause, self.envs, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) num_episodes = len(stats_episodes) aggregated_stats = dict() for stat_key in next(iter(stats_episodes.values())).keys(): aggregated_stats[stat_key] = ( sum([v[stat_key] for v in stats_episodes.values()]) / num_episodes) for k, v in aggregated_stats.items(): logger.info(f"Average episode {k}: {v:.4f}") step_id = checkpoint_index if "extra_state" in ckpt_dict and "step" in ckpt_dict["extra_state"]: step_id = ckpt_dict["extra_state"]["step"] writer.add_scalars( "eval_reward", {"average reward": aggregated_stats["reward"]}, step_id, ) metrics = {k: v for k, v in aggregated_stats.items() if k != "reward"} if len(metrics) > 0: writer.add_scalars("eval_metrics", metrics, step_id) logger.info("eval_metrics") logger.info(metrics) if len(log_diagnostics) > 0: os.makedirs(output_dir, exist_ok=True) eval_fn = f"{label}.json" with open(os.path.join(output_dir, eval_fn), 'w', encoding='utf-8') as f: json.dump(total_stats, f, ensure_ascii=False, indent=4) self.envs.close()
def _eval_checkpoint(self, checkpoint_path: str, writer: TensorboardWriter, checkpoint_index: int = 0) -> Dict: r"""Evaluates a single checkpoint. Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard checkpoint_index: index of cur checkpoint for logging Returns: None """ random.seed(self.config.SEED) np.random.seed(self.config.SEED) torch.manual_seed(self.config.SEED) # Map location CPU is almost always better than mapping to a CUDA device. ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu") if self.config.EVAL.USE_CKPT_CONFIG: config = self._setup_eval_config(ckpt_dict["config"]) else: config = self.config.clone() ppo_cfg = config.RL.PPO config.defrost() config.TASK_CONFIG.DATASET.SPLIT = config.EVAL.SPLIT if self.config.DISPLAY_RESOLUTION != config.TASK_CONFIG.SIMULATOR.DEPTH_SENSOR.WIDTH: model_resolution = config.TASK_CONFIG.SIMULATOR.DEPTH_SENSOR.WIDTH config.TASK_CONFIG.SIMULATOR.DEPTH_SENSOR.WIDTH = config.TASK_CONFIG.SIMULATOR.RGB_SENSOR.HEIGHT = \ config.TASK_CONFIG.SIMULATOR.RGB_SENSOR.WIDTH = config.TASK_CONFIG.SIMULATOR.DEPTH_SENSOR.HEIGHT = \ self.config.DISPLAY_RESOLUTION else: model_resolution = self.config.DISPLAY_RESOLUTION config.freeze() if len(self.config.VIDEO_OPTION) > 0: config.defrost() config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS") config.freeze() elif "top_down_map" in self.config.VISUALIZATION_OPTION: config.defrost() config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.freeze() logger.info(f"env config: {config}") self.envs = construct_envs(config, get_env_class(config.ENV_NAME)) if self.config.DISPLAY_RESOLUTION != model_resolution: observation_space = self.envs.observation_spaces[0] observation_space.spaces['depth'].shape = (model_resolution, model_resolution, 1) observation_space.spaces['rgb'].shape = (model_resolution, model_resolution, 1) else: observation_space = self.envs.observation_spaces[0] self._setup_actor_critic_agent(ppo_cfg, observation_space) self.agent.load_state_dict(ckpt_dict["state_dict"]) self.actor_critic = self.agent.actor_critic self.metric_uuids = [] # get name of performance metric, e.g. "spl" for metric_name in self.config.TASK_CONFIG.TASK.MEASUREMENTS: metric_cfg = getattr(self.config.TASK_CONFIG.TASK, metric_name) measure_type = baseline_registry.get_measure(metric_cfg.TYPE) assert measure_type is not None, "invalid measurement type {}".format( metric_cfg.TYPE) self.metric_uuids.append( measure_type(sim=None, task=None, config=None)._get_uuid()) observations = self.envs.reset() if self.config.DISPLAY_RESOLUTION != model_resolution: resize_observation(observations, model_resolution) batch = batch_obs(observations, self.device) current_episode_reward = torch.zeros(self.envs.num_envs, 1, device=self.device) test_recurrent_hidden_states = torch.zeros( self.actor_critic.net.num_recurrent_layers, self.config.NUM_PROCESSES, ppo_cfg.hidden_size, device=self.device, ) prev_actions = torch.zeros(self.config.NUM_PROCESSES, 1, device=self.device, dtype=torch.long) not_done_masks = torch.zeros(self.config.NUM_PROCESSES, 1, device=self.device) stats_episodes = dict() # dict of dicts that stores stats per episode rgb_frames = [[] for _ in range(self.config.NUM_PROCESSES) ] # type: List[List[np.ndarray]] audios = [[] for _ in range(self.config.NUM_PROCESSES)] if len(self.config.VIDEO_OPTION) > 0: os.makedirs(self.config.VIDEO_DIR, exist_ok=True) t = tqdm(total=self.config.TEST_EPISODE_COUNT) while (len(stats_episodes) < self.config.TEST_EPISODE_COUNT and self.envs.num_envs > 0): current_episodes = self.envs.current_episodes() with torch.no_grad(): _, actions, _, test_recurrent_hidden_states = self.actor_critic.act( batch, test_recurrent_hidden_states, prev_actions, not_done_masks, deterministic=False) prev_actions.copy_(actions) outputs = self.envs.step([a[0].item() for a in actions]) observations, rewards, dones, infos = [ list(x) for x in zip(*outputs) ] for i in range(self.envs.num_envs): if len(self.config.VIDEO_OPTION) > 0: if config.TASK_CONFIG.SIMULATOR.CONTINUOUS_VIEW_CHANGE and 'intermediate' in observations[ i]: for observation in observations[i]['intermediate']: frame = observations_to_image( observation, infos[i]) rgb_frames[i].append(frame) del observations[i]['intermediate'] if "rgb" not in observations[i]: observations[i]["rgb"] = np.zeros( (self.config.DISPLAY_RESOLUTION, self.config.DISPLAY_RESOLUTION, 3)) frame = observations_to_image(observations[i], infos[i]) rgb_frames[i].append(frame) audios[i].append(observations[i]['audiogoal']) if config.DISPLAY_RESOLUTION != model_resolution: resize_observation(observations, model_resolution) batch = batch_obs(observations, self.device) not_done_masks = torch.tensor( [[0.0] if done else [1.0] for done in dones], dtype=torch.float, device=self.device, ) rewards = torch.tensor(rewards, dtype=torch.float, device=self.device).unsqueeze(1) current_episode_reward += rewards next_episodes = self.envs.current_episodes() envs_to_pause = [] for i in range(self.envs.num_envs): # pause envs which runs out of episodes if ( next_episodes[i].scene_id, next_episodes[i].episode_id, ) in stats_episodes: envs_to_pause.append(i) # episode ended if not_done_masks[i].item() == 0: episode_stats = dict() for metric_uuid in self.metric_uuids: episode_stats[metric_uuid] = infos[i][metric_uuid] episode_stats["reward"] = current_episode_reward[i].item() episode_stats['geodesic_distance'] = current_episodes[ i].info['geodesic_distance'] episode_stats['euclidean_distance'] = norm( np.array(current_episodes[i].goals[0].position) - np.array(current_episodes[i].start_position)) logging.debug(episode_stats) current_episode_reward[i] = 0 # use scene_id + episode_id as unique id for storing stats stats_episodes[( current_episodes[i].scene_id, current_episodes[i].episode_id, )] = episode_stats t.update() if len(self.config.VIDEO_OPTION) > 0: fps = self.config.TASK_CONFIG.SIMULATOR.VIEW_CHANGE_FPS \ if self.config.TASK_CONFIG.SIMULATOR.CONTINUOUS_VIEW_CHANGE else 1 generate_video( video_option=self.config.VIDEO_OPTION, video_dir=self.config.VIDEO_DIR, images=rgb_frames[i][:-1], scene_name=current_episodes[i].scene_id.split( '/')[3], sound=current_episodes[i].info['sound'], sr=self.config.TASK_CONFIG.SIMULATOR.AUDIO. RIR_SAMPLING_RATE, episode_id=current_episodes[i].episode_id, checkpoint_idx=checkpoint_index, metric_name='spl', metric_value=infos[i]['spl'], tb_writer=writer, audios=audios[i][:-1], fps=fps) # observations has been reset but info has not # to be consistent, do not use the last frame rgb_frames[i] = [] audios[i] = [] if "top_down_map" in self.config.VISUALIZATION_OPTION: top_down_map = plot_top_down_map( infos[i], dataset=self.config.TASK_CONFIG.SIMULATOR. SCENE_DATASET) scene = current_episodes[i].scene_id.split('/')[3] writer.add_image('{}_{}_{}/{}'.format( config.EVAL.SPLIT, scene, current_episodes[i].episode_id, config.BASE_TASK_CONFIG_PATH.split('/')[-1][:-5]), top_down_map, dataformats='WHC') ( self.envs, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) = self._pause_envs( envs_to_pause, self.envs, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) aggregated_stats = dict() for stat_key in next(iter(stats_episodes.values())).keys(): aggregated_stats[stat_key] = sum( [v[stat_key] for v in stats_episodes.values()]) num_episodes = len(stats_episodes) stats_file = os.path.join( config.TENSORBOARD_DIR, '{}_stats_{}.json'.format(config.EVAL.SPLIT, config.SEED)) new_stats_episodes = { ','.join(key): value for key, value in stats_episodes.items() } with open(stats_file, 'w') as fo: json.dump(new_stats_episodes, fo) episode_reward_mean = aggregated_stats["reward"] / num_episodes episode_metrics_mean = {} for metric_uuid in self.metric_uuids: episode_metrics_mean[ metric_uuid] = aggregated_stats[metric_uuid] / num_episodes logger.info(f"Average episode reward: {episode_reward_mean:.6f}") for metric_uuid in self.metric_uuids: logger.info( f"Average episode {metric_uuid}: {episode_metrics_mean[metric_uuid]:.6f}" ) if not config.EVAL.SPLIT.startswith('test'): writer.add_scalar("{}/reward".format(config.EVAL.SPLIT), episode_reward_mean, checkpoint_index) for metric_uuid in self.metric_uuids: writer.add_scalar(f"{config.EVAL.SPLIT}/{metric_uuid}", episode_metrics_mean[metric_uuid], checkpoint_index) self.envs.close() result = {'episode_reward_mean': episode_reward_mean} for metric_uuid in self.metric_uuids: result['episode_{}_mean'.format( metric_uuid)] = episode_metrics_mean[metric_uuid] return result
def local_evaluate(self, agent: Agent, num_episodes: Optional[int] = None, control_period: Optional[float] = 1.0, frame_rate: Optional[int] = 1): if num_episodes is None: num_episodes = len(self._env._env.episodes) else: assert num_episodes <= len(self._env._env.episodes), ( "num_episodes({}) is larger than number of episodes " "in environment ({})".format(num_episodes, len(self._env._env.episodes))) assert num_episodes > 0, "num_episodes should be greater than 0" agg_metrics: Dict = defaultdict(float) writer = TensorboardWriter( 'tb_benchmark/', flush_secs=30) # flush_specs from base_trainer.py count_episodes = 0 print("number of episodes: " + str(num_episodes)) while count_episodes < num_episodes: print("working on episode " + str(count_episodes)) observations_per_episode = [] agent.reset() observations_per_action = self._env._env.reset() # initialize physic-enabled sim env. Do this for every # episode, since sometimes assets get deallocated if self._enable_physics: self._env._env.disable_physics() self._env._env.enable_physics() frame_counter = 0 # act until one episode is over while not self._env._env.episode_over: action = agent.act(observations_per_action) observations_per_action = reward_per_action = done_per_action = info_per_action = None if (self._enable_physics is False): (observations_per_action, reward_per_action, done_per_action, info_per_action) = self._env.step(action) else: # step with physics. For now we use hard-coded time step of 1/60 secs # (used in the rigid object tutorial in Habitat Sim) (observations_per_action, reward_per_action, done_per_action, info_per_action) = self._env.step_physics( action, time_step=1.0 / 60.0, control_period=control_period) # generate an output image for the action. The image includes observations # and a top-down map showing the agent's state in the environment # we use frame_rate (num. of frames per action) to reduce computational overhead if frame_counter % frame_rate == 0: out_im_per_action = observations_to_image( observations_per_action, info_per_action) observations_per_episode.append(out_im_per_action) frame_counter = frame_counter + 1 # episode ended # get per-episode metrics. for now we only extract # distance-to-goal, success, spl metrics = self._env._env.get_metrics() per_ep_metrics = { k: metrics[k] for k in ['distance_to_goal', 'success', 'spl'] } # print distance_to_goal, success and spl for k, v in per_ep_metrics.items(): print(f'{k},{v}') # calculate aggregated distance_to_goal, success and spl for m, v in per_ep_metrics.items(): agg_metrics[m] += v count_episodes += 1 # generate video generate_video( video_option=["disk", "tensorboard"], video_dir='video_benchmark_dir', images=observations_per_episode, episode_id=count_episodes - 1, checkpoint_idx=0, metrics=per_ep_metrics, tb_writer=writer, ) avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()} return avg_metrics
def play_env(env, args, config): render_steps_limit = None if args.no_render: render_steps_limit = DEFAULT_RENDER_STEPS_LIMIT use_arm_actions = None if args.load_actions is not None: with open(args.load_actions, "rb") as f: use_arm_actions = np.load(f) obs = env.reset() if not args.no_render: obs = env.step({"action": "EMPTY", "action_args": {}}) draw_obs = observations_to_image(obs, {}) pygame.init() screen = pygame.display.set_mode( [draw_obs.shape[1], draw_obs.shape[0]]) i = 0 target_fps = 60.0 prev_time = time.time() all_obs = [] total_reward = 0 all_arm_actions = [] while True: if render_steps_limit is not None and i > render_steps_limit: break step_result, arm_action = get_input_vel_ctlr( args.no_render, use_arm_actions[i] if use_arm_actions is not None else None, args, obs, env, ) if step_result is None: break all_arm_actions.append(arm_action) i += 1 if use_arm_actions is not None and i >= len(use_arm_actions): break # obs, reward, done, info = step_result obs = step_result reward = 0.0 info = env.get_metrics() total_reward += reward use_ob = observations_to_image(obs, info) use_ob = overlay_frame(use_ob, info) draw_ob = use_ob[:] if not args.no_render: draw_ob = np.transpose(draw_ob, (1, 0, 2)) draw_obuse_ob = pygame.surfarray.make_surface(draw_ob) screen.blit(draw_obuse_ob, (0, 0)) pygame.display.update() if args.save_obs: all_obs.append(draw_ob) if not args.no_render: pygame.event.pump() if env.episode_over: env.reset() curr_time = time.time() diff = curr_time - prev_time delay = max(1.0 / target_fps - diff, 0) time.sleep(delay) prev_time = curr_time if args.save_actions: assert len(all_arm_actions) > 200 all_arm_actions = np.array(all_arm_actions)[:200] save_dir = "orp/start_data/" if not osp.exists(save_dir): os.makedirs(save_dir) save_path = osp.join(save_dir, "bench_ac.txt") with open(save_path, "wb") as f: np.save(f, all_arm_actions) raise ValueError("done") if args.save_obs: all_obs = np.array(all_obs) all_obs = np.transpose(all_obs, (0, 2, 1, 3)) make_video_cv2(all_obs, "interactive_play") if not args.no_render: pygame.quit()
def _eval_checkpoint( self, checkpoint_path: str, writer: TensorboardWriter, checkpoint_index: int = 0, ) -> None: r"""Evaluates a single checkpoint. Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard checkpoint_index: index of cur checkpoint for logging Returns: None """ self.add_new_based_on_cfg() # Map location CPU is almost always better than mapping to a CUDA device. ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu") # ========================================================================================== # -- Update config for eval if self.config.EVAL.USE_CKPT_CONFIG: config = self._setup_eval_config(ckpt_dict["config"]) else: config = self.config.clone() ppo_cfg = config.RL.PPO # # Mostly for visualization # config.defrost() # config.TASK_CONFIG.SIMULATOR.HABITAT_SIM_V0.GPU_GPU = False # config.freeze() split = config.TASK_CONFIG.DATASET.SPLIT config.defrost() config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS") config.freeze() # ========================================================================================== num_procs = self.config.NUM_PROCESSES device = self.device cfg = self.config logger.info(f"env config: {config}") self.envs = construct_envs(config, get_env_class(self.config.ENV_NAME)) num_envs = self.envs.num_envs self._setup_actor_critic_agent(ppo_cfg, train=False) self.agent.load_state_dict(ckpt_dict["state_dict"]) self.actor_critic = self.agent.actor_critic self.r_policy = self.agent.actor_critic.reachability_policy aux_models = self.actor_critic.net.aux_models other_losses = dict({ k: torch.zeros(num_envs, 1, device=device) for k in aux_models.keys() }) other_losses_action = dict({ k: torch.zeros(num_envs, self.envs.action_spaces[0].n, device=device) for k in aux_models.keys() }) num_steps = torch.zeros(num_envs, 1, device=device) # Config aux models for eval per item in batch for k, maux in aux_models.items(): maux.set_per_element_loss() total_loss = 0 if config.EVAL_MODE: self.agent.eval() self.r_policy.eval() # get name of performance metric, e.g. "spl" metric_name = cfg.TASK_CONFIG.TASK.MEASUREMENTS[0] metric_cfg = getattr(cfg.TASK_CONFIG.TASK, metric_name) measure_type = baseline_registry.get_measure(metric_cfg.TYPE) assert measure_type is not None, "invalid measurement type {}".format( metric_cfg.TYPE) self.metric_uuid = measure_type(sim=None, task=None, config=None)._get_uuid() observations = self.envs.reset() batch = batch_obs_augment_aux(observations, self.envs.get_shared_mem()) info_data_keys = ["discovered", "collisions_wall", "collisions_prox"] log_data_keys = [ "current_episode_reward", "current_episode_go_reward" ] + info_data_keys log_data = dict({ k: torch.zeros(num_envs, 1, device=device) for k in log_data_keys }) info_data = dict({k: log_data[k] for k in info_data_keys}) test_recurrent_hidden_states = torch.zeros( self.actor_critic.net.num_recurrent_layers, num_procs, ppo_cfg.hidden_size, device=device, ) prev_actions = torch.zeros(num_procs, 1, device=device, dtype=torch.long) not_done_masks = torch.zeros(num_procs, 1, device=device) stats_episodes = dict() # dict of dicts that stores stats per episode stats_episodes_scenes = dict( ) # dict of number of collected stats from # each scene max_test_ep_count = cfg.TEST_EPISODE_COUNT # TODO this should depend on number of scenes :( # TODO But than envs shouldn't be paused but fast-fwd to next scene # TODO We consider num envs == num scenes max_ep_per_env = max_test_ep_count / float(num_envs) rgb_frames = [[] for _ in range(num_procs) ] # type: List[List[np.ndarray]] if len(cfg.VIDEO_OPTION) > 0: os.makedirs(cfg.VIDEO_DIR, exist_ok=True) video_log_int = cfg.VIDEO_OPTION_INTERVAL num_frames = 0 plot_pos = -1 prev_true_pos = [] prev_pred_pos = [] while (len(stats_episodes) <= cfg.TEST_EPISODE_COUNT and num_envs > 0): current_episodes = self.envs.current_episodes() with torch.no_grad(): prev_hidden = test_recurrent_hidden_states _, actions, _, test_recurrent_hidden_states, aux_out \ = self.actor_critic.act( batch, test_recurrent_hidden_states, prev_actions, not_done_masks, deterministic=False ) prev_actions.copy_(actions) if 'action' in batch: prev_actions = batch['action'].unsqueeze(1).to( actions.device).long() for k, v in aux_out.items(): loss = aux_models[k].calc_loss(v, batch, prev_hidden, prev_actions, not_done_masks, actions) total_loss += loss if other_losses[k] is None: other_losses[k] = loss else: other_losses[k] += loss.unsqueeze(1) if len(prev_actions) == 1: other_losses_action[k][0, prev_actions.item()] += \ loss.item() # ================================================================================== # - Hacky logs if plot_pos >= 0: prev_true_pos.append(batch["gps_compass_start"] [plot_pos].data[:2].cpu().numpy()) prev_pred_pos.append(aux_out["rel_start_pos_reg"] [plot_pos].data.cpu().numpy() * 15) if num_frames % 10 == 0: xx, yy = [], [] for x, y in prev_true_pos: xx.append(x) yy.append(y) plt.scatter(xx, yy, label="true_pos") xx, yy = [], [] for x, y in prev_pred_pos: xx.append(x) yy.append(y) plt.scatter(xx, yy, label="pred_pos") plt.legend() plt.show() plt.waitforbuttonpress() plt.close() # ================================================================================== num_steps += 1 outputs = self.envs.step([a[0].item() for a in actions]) observations, rewards, dones, infos = [ list(x) for x in zip(*outputs) ] not_done_masks = torch.tensor( [[0.0] if done else [1.0] for done in dones], dtype=torch.float, device=device, ) map_values = self._get_mapping(observations, aux_out) batch = batch_obs_augment_aux(observations, self.envs.get_shared_mem(), device=device, map_values=map_values, masks=not_done_masks) valid_map_size = [ float(ifs["top_down_map"]["valid_map"].sum()) for ifs in infos ] discovered_factor = [ infos[ix]["top_down_map"]["explored_map"].sum() / valid_map_size[ix] for ix in range(len(infos)) ] seen_factor = [ infos[ix]["top_down_map"]["ful_fog_of_war_mask"].sum() / valid_map_size[ix] for ix in range(len(infos)) ] rewards = torch.tensor(rewards, dtype=torch.float, device=device).unsqueeze(1) log_data["current_episode_reward"] += rewards # -- Add intrinsic Reward if self.only_intrinsic_reward: rewards.zero_() if self.r_enabled: ir_rewards = self._add_intrinsic_reward( batch, actions, rewards, not_done_masks) log_data["current_episode_go_reward"] += ir_rewards rewards += ir_rewards # Log other info from infos dict for iii, info in enumerate(infos): for k_info, v_info in info_data.items(): v_info[iii] += info[k_info] next_episodes = self.envs.current_episodes() envs_to_pause = [] n_envs = num_envs for i in range(n_envs): scene = next_episodes[i].scene_id if scene not in stats_episodes_scenes: stats_episodes_scenes[scene] = 0 if stats_episodes_scenes[scene] >= max_ep_per_env: envs_to_pause.append(i) # episode ended if not_done_masks[i].item() == 0: episode_stats = dict() episode_stats[self.metric_uuid] = infos[i][ self.metric_uuid] episode_stats["success"] = int( infos[i][self.metric_uuid] > 0) for kk, vv in log_data.items(): episode_stats[kk] = vv[i].item() vv[i] = 0 episode_stats["map_discovered"] = discovered_factor[i] episode_stats["map_seen"] = seen_factor[i] for k, v in other_losses.items(): episode_stats[k] = v[i].item() / num_steps[i].item() other_losses_action[k][i].fill_(0) other_losses[k][i] = 0 num_steps[i] = 0 # use scene_id + episode_id as unique id for storing stats stats_episodes[(current_episodes[i].scene_id, current_episodes[i].episode_id)] \ = episode_stats print(f"Episode {len(stats_episodes)} stats:", episode_stats) stats_episodes_scenes[current_episodes[i].scene_id] += 1 if len(cfg.VIDEO_OPTION ) > 0 and checkpoint_index % video_log_int == 0: generate_video( video_option=cfg.VIDEO_OPTION, video_dir=cfg.VIDEO_DIR, images=rgb_frames[i], episode_id=current_episodes[i].episode_id, checkpoint_idx=checkpoint_index, metric_name=self.metric_uuid, metric_value=infos[i][self.metric_uuid], tb_writer=writer, ) rgb_frames[i] = [] # episode continues elif len(cfg.VIDEO_OPTION) > 0: for k, v in observations[i].items(): if isinstance(v, torch.Tensor): observations[i][k] = v.cpu().numpy() frame = observations_to_image(observations[i], infos[i]) rgb_frames[i].append(frame) # Pop done envs: if len(envs_to_pause) > 0: s_index = list(range(num_envs)) for idx in reversed(envs_to_pause): s_index.pop(idx) for k, v in other_losses.items(): other_losses[k] = other_losses[k][s_index] for k, v in log_data.items(): log_data[k] = log_data[k][s_index] ( self.envs, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) = self._pause_envs( envs_to_pause, self.envs, test_recurrent_hidden_states, not_done_masks, None, prev_actions, batch, rgb_frames, ) aggregated_stats = dict() for stat_key in next(iter(stats_episodes.values())).keys(): aggregated_stats[stat_key] = sum( [v[stat_key] for v in stats_episodes.values()]) num_episodes = len(stats_episodes) episodes_agg_stats = dict() for k, v in aggregated_stats.items(): episodes_agg_stats[k] = v / num_episodes logger.info(f"Average episode {k}: {episodes_agg_stats[k]:.6f}") for k, v in episodes_agg_stats.items(): writer.add_scalars(f"eval_{k}", {f"{split}_average {k}": v}, checkpoint_index) print(f"[{checkpoint_index}] average {k}", v) self.envs.close()
def eval(self, checkpoint_path): r"""Evaluates a single checkpoint. Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard checkpoint_index: index of cur checkpoint for logging Returns: None """ self.device = (torch.device("cuda", self.config.TORCH_GPU_ID) if torch.cuda.is_available() else torch.device("cpu")) # Map location CPU is almost always better than mapping to a CUDA device. ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu") if self.config.EVAL.USE_CKPT_CONFIG: config = self._setup_eval_config(ckpt_dict["config"]) else: config = self.config.clone() ppo_cfg = config.RL.PPO config.defrost() config.TASK_CONFIG.DATASET.SPLIT = config.EVAL.SPLIT config.freeze() if len(self.config.VIDEO_OPTION) > 0: config.defrost() config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS") config.freeze() self.env = construct_envs(config, get_env_class(config.ENV_NAME)) self._setup_actor_critic_agent(ppo_cfg) self.agent.load_state_dict(ckpt_dict["state_dict"]) self.actor_critic = self.agent.actor_critic # get name of performance metric, e.g. "spl" metric_name = self.config.TASK_CONFIG.TASK.MEASUREMENTS[0] metric_cfg = getattr(self.config.TASK_CONFIG.TASK, metric_name) measure_type = baseline_registry.get_measure(metric_cfg.TYPE) assert measure_type is not None, "invalid measurement type {}".format( metric_cfg.TYPE) self.metric_uuid = measure_type(sim=None, task=None, config=None)._get_uuid() observations = self.env.reset() batch = batch_obs(observations, self.device) current_episode_reward = torch.zeros(self.env.num_envs, 1, device=self.device) test_recurrent_hidden_states = torch.zeros( self.actor_critic.net.num_recurrent_layers, self.config.NUM_PROCESSES, ppo_cfg.hidden_size, device=self.device, ) prev_actions = torch.zeros(self.config.NUM_PROCESSES, 1, device=self.device, dtype=torch.long) not_done_masks = torch.zeros(self.config.NUM_PROCESSES, 1, device=self.device) stats_episodes = dict() # dict of dicts that stores stats per episode rgb_frames = [[] for _ in range(self.config.NUM_PROCESSES) ] # type: List[List[np.ndarray]] if len(self.config.VIDEO_OPTION) > 0: os.makedirs(self.config.VIDEO_DIR, exist_ok=True) self.actor_critic.eval() while (len(stats_episodes) < self.config.TEST_EPISODE_COUNT and self.env.num_envs > 0): current_episodes = self.env.current_episodes() with torch.no_grad(): ( _, actions, _, test_recurrent_hidden_states, ) = self.actor_critic.act( batch, test_recurrent_hidden_states, prev_actions, not_done_masks, deterministic=False, ) prev_actions.copy_(actions) outputs = self.env.step([a[0].item() for a in actions]) observations, rewards, dones, infos = [ list(x) for x in zip(*outputs) ] batch = batch_obs(observations, self.device) not_done_masks = torch.tensor( [[0.0] if done else [1.0] for done in dones], dtype=torch.float, device=self.device, ) rewards = torch.tensor(rewards, dtype=torch.float, device=self.device).unsqueeze(1) current_episode_reward += rewards next_episodes = self.env.current_episodes() envs_to_pause = [] n_envs = self.env.num_envs for i in range(n_envs): if ( next_episodes[i].scene_id, next_episodes[i].episode_id, ) in stats_episodes: envs_to_pause.append(i) # episode ended if not_done_masks[i].item() == 0: episode_stats = dict() episode_stats[self.metric_uuid] = infos[i][ self.metric_uuid] episode_stats["success"] = int( infos[i][self.metric_uuid] > 0) episode_stats["reward"] = current_episode_reward[i].item() current_episode_reward[i] = 0 # use scene_id + episode_id as unique id for storing stats stats_episodes[( current_episodes[i].scene_id, current_episodes[i].episode_id, )] = episode_stats if len(self.config.VIDEO_OPTION) > 0: generate_video( video_option=self.config.VIDEO_OPTION, video_dir=self.config.VIDEO_DIR, images=rgb_frames[i], episode_id=current_episodes[i].episode_id, checkpoint_idx=0, metric_name=self.metric_uuid, metric_value=infos[i][self.metric_uuid], ) rgb_frames[i] = [] # episode continues elif len(self.config.VIDEO_OPTION) > 0: frame = observations_to_image(observations[i], infos[i]) rgb_frames[i].append(frame) ( self.env, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) = self._pause_envs( envs_to_pause, self.env, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) aggregated_stats = dict() for stat_key in next(iter(stats_episodes.values())).keys(): aggregated_stats[stat_key] = sum( [v[stat_key] for v in stats_episodes.values()]) num_episodes = len(stats_episodes) episode_reward_mean = aggregated_stats["reward"] / num_episodes episode_metric_mean = aggregated_stats[self.metric_uuid] / num_episodes episode_success_mean = aggregated_stats["success"] / num_episodes print(f"Average episode reward: {episode_reward_mean:.6f}") print(f"Average episode success: {episode_success_mean:.6f}") print(f"Average episode {self.metric_uuid}: {episode_metric_mean:.6f}") if "extra_state" in ckpt_dict and "step" in ckpt_dict["extra_state"]: step_id = ckpt_dict["extra_state"]["step"] print("eval_reward", {"average reward": episode_reward_mean}) print( f"eval_{self.metric_uuid}", {f"average {self.metric_uuid}": episode_metric_mean}, ) print("eval_success", {"average success": episode_success_mean}) self.env.close()
def _eval_checkpoint( self, checkpoint_path: str, writer: TensorboardWriter, checkpoint_index: int = 0, ) -> None: r"""Evaluates a single checkpoint. Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard checkpoint_index: index of cur checkpoint for logging Returns: None """ # Map location CPU is almost always better than mapping to a CUDA device. ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu") if self.config.EVAL.USE_CKPT_CONFIG: config = self._setup_eval_config(ckpt_dict["config"]) else: config = self.config.clone() ppo_cfg = config.RL.PPO config.defrost() config.TASK_CONFIG.DATASET.SPLIT = config.EVAL.SPLIT config.freeze() if len(self.config.VIDEO_OPTION) > 0: config.defrost() config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS") config.freeze() logger.info(f"env config: {config}") self.envs = construct_envs(config, get_env_class(config.ENV_NAME)) self._setup_actor_critic_agent(ppo_cfg) self.actor_critic.eval() if self._static_encoder: self._encoder = self.agent.actor_critic.net.visual_encoder self.agent.load_state_dict(ckpt_dict["state_dict"]) self.actor_critic = self.agent.actor_critic observations = self.envs.reset() batch = batch_obs(observations, device=self.device) if self._static_encoder: batch["visual_features"] = self._encoder(batch) batch["prev_visual_features"] = torch.zeros_like( batch["visual_features"]) current_episode_reward = torch.zeros(self.envs.num_envs, 1, device=self.device) test_recurrent_hidden_states = torch.zeros( self.actor_critic.net.num_recurrent_layers, self.config.NUM_PROCESSES, ppo_cfg.hidden_size, device=self.device, ) prev_actions = torch.zeros(self.config.NUM_PROCESSES, 1, device=self.device, dtype=torch.long) not_done_masks = torch.zeros(self.config.NUM_PROCESSES, 1, device=self.device) stats_episodes = dict() # dict of dicts that stores stats per episode rgb_frames = [[] for _ in range(self.config.NUM_PROCESSES) ] # type: List[List[np.ndarray]] if len(self.config.VIDEO_OPTION) > 0: os.makedirs(self.config.VIDEO_DIR, exist_ok=True) number_of_eval_episodes = self.config.TEST_EPISODE_COUNT if number_of_eval_episodes == -1: number_of_eval_episodes = sum(self.envs.number_of_episodes) else: total_num_eps = sum(self.envs.number_of_episodes) if total_num_eps < number_of_eval_episodes: logger.warn( f"Config specified {number_of_eval_episodes} eval episodes" ", dataset only has {total_num_eps}.") logger.warn(f"Evaluating with {total_num_eps} instead.") number_of_eval_episodes = total_num_eps pbar = tqdm.tqdm(total=number_of_eval_episodes) self.actor_critic.eval() while (len(stats_episodes) < number_of_eval_episodes and self.envs.num_envs > 0): current_episodes = self.envs.current_episodes() with torch.no_grad(): step_batch = batch ( _, actions, _, test_recurrent_hidden_states, ) = self.actor_critic.act( batch, test_recurrent_hidden_states, prev_actions, not_done_masks, deterministic=False, ) prev_actions.copy_(actions) outputs = self.envs.step([a[0].item() for a in actions]) observations, rewards, dones, infos = [ list(x) for x in zip(*outputs) ] batch = batch_obs(observations, device=self.device) if self._static_encoder: batch["prev_visual_features"] = step_batch["visual_features"] batch["visual_features"] = self._encoder(batch) not_done_masks = torch.tensor( [[0.0] if done else [1.0] for done in dones], dtype=torch.float, device=self.device, ) rewards = torch.tensor(rewards, dtype=torch.float, device=self.device).unsqueeze(1) current_episode_reward += rewards next_episodes = self.envs.current_episodes() envs_to_pause = [] n_envs = self.envs.num_envs for i in range(n_envs): if ( next_episodes[i].scene_id, next_episodes[i].episode_id, ) in stats_episodes: envs_to_pause.append(i) # episode ended if not_done_masks[i].item() == 0: pbar.update() episode_stats = dict() episode_stats["reward"] = current_episode_reward[i].item() episode_stats.update( self._extract_scalars_from_info(infos[i])) current_episode_reward[i] = 0 # use scene_id + episode_id as unique id for storing stats stats_episodes[( current_episodes[i].scene_id, current_episodes[i].episode_id, )] = episode_stats if len(self.config.VIDEO_OPTION) > 0: generate_video( video_option=self.config.VIDEO_OPTION, video_dir=self.config.VIDEO_DIR, images=rgb_frames[i], episode_id=current_episodes[i].episode_id, checkpoint_idx=checkpoint_index, metrics=self._extract_scalars_from_info(infos[i]), tb_writer=writer, ) rgb_frames[i] = [] # episode continues elif len(self.config.VIDEO_OPTION) > 0: frame = observations_to_image(observations[i], infos[i]) rgb_frames[i].append(frame) ( self.envs, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) = self._pause_envs( envs_to_pause, self.envs, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) num_episodes = len(stats_episodes) aggregated_stats = dict() for stat_key in next(iter(stats_episodes.values())).keys(): aggregated_stats[stat_key] = ( sum([v[stat_key] for v in stats_episodes.values()]) / num_episodes) for k, v in aggregated_stats.items(): logger.info(f"Average episode {k}: {v:.4f}") step_id = checkpoint_index if "extra_state" in ckpt_dict and "step" in ckpt_dict["extra_state"]: step_id = ckpt_dict["extra_state"]["step"] writer.add_scalars( "eval_reward", {"average reward": aggregated_stats["reward"]}, step_id, ) metrics = {k: v for k, v in aggregated_stats.items() if k != "reward"} if len(metrics) > 0: writer.add_scalars("eval_metrics", metrics, step_id) self.envs.close()
def _eval_checkpoint( self, checkpoint_path: str, writer: TensorboardWriter, cur_ckpt_idx: int = 0, ) -> None: r""" Evaluates a single checkpoint Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard cur_ckpt_idx: index of cur checkpoint for logging Returns: None """ ckpt_dict = self.load_checkpoint(checkpoint_path, map_location=self.device) ckpt_config = ckpt_dict["config"] config = self.config.clone() ckpt_cmd_opts = ckpt_config.CMD_TRAILING_OPTS eval_cmd_opts = config.CMD_TRAILING_OPTS # config merge priority: eval_opts > ckpt_opts > eval_cfg > ckpt_cfg # first line for old checkpoint compatibility config.merge_from_other_cfg(ckpt_config) config.merge_from_other_cfg(self.config) config.merge_from_list(ckpt_cmd_opts) config.merge_from_list(eval_cmd_opts) ppo_cfg = config.TRAINER.RL.PPO config.TASK_CONFIG.defrost() config.TASK_CONFIG.DATASET.SPLIT = "val" agent_sensors = ppo_cfg.sensors.strip().split(",") config.TASK_CONFIG.SIMULATOR.AGENT_0.SENSORS = agent_sensors if self.video_option: config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS") config.freeze() logger.info(f"env config: {config}") self.envs = construct_envs(config, NavRLEnv) self._setup_actor_critic_agent(ppo_cfg) self.agent.load_state_dict(ckpt_dict["state_dict"]) self.actor_critic = self.agent.actor_critic observations = self.envs.reset() batch = batch_obs(observations) for sensor in batch: batch[sensor] = batch[sensor].to(self.device) current_episode_reward = torch.zeros(self.envs.num_envs, 1, device=self.device) test_recurrent_hidden_states = torch.zeros(ppo_cfg.num_processes, ppo_cfg.hidden_size, device=self.device) not_done_masks = torch.zeros(ppo_cfg.num_processes, 1, device=self.device) stats_episodes = dict() # dict of dicts that stores stats per episode rgb_frames = [[] ] * ppo_cfg.num_processes # type: List[List[np.ndarray]] if self.video_option: os.makedirs(ppo_cfg.video_dir, exist_ok=True) while (len(stats_episodes) < ppo_cfg.count_test_episodes and self.envs.num_envs > 0): current_episodes = self.envs.current_episodes() with torch.no_grad(): _, actions, _, test_recurrent_hidden_states = self.actor_critic.act( batch, test_recurrent_hidden_states, not_done_masks, deterministic=False, ) outputs = self.envs.step([a[0].item() for a in actions]) observations, rewards, dones, infos = [ list(x) for x in zip(*outputs) ] batch = batch_obs(observations) for sensor in batch: batch[sensor] = batch[sensor].to(self.device) not_done_masks = torch.tensor( [[0.0] if done else [1.0] for done in dones], dtype=torch.float, device=self.device, ) rewards = torch.tensor(rewards, dtype=torch.float, device=self.device).unsqueeze(1) current_episode_reward += rewards next_episodes = self.envs.current_episodes() envs_to_pause = [] n_envs = self.envs.num_envs for i in range(n_envs): if ( next_episodes[i].scene_id, next_episodes[i].episode_id, ) in stats_episodes: envs_to_pause.append(i) # episode ended if not_done_masks[i].item() == 0: episode_stats = dict() episode_stats["spl"] = infos[i]["spl"] episode_stats["success"] = int(infos[i]["spl"] > 0) episode_stats["reward"] = current_episode_reward[i].item() current_episode_reward[i] = 0 # use scene_id + episode_id as unique id for storing stats stats_episodes[( current_episodes[i].scene_id, current_episodes[i].episode_id, )] = episode_stats if self.video_option: generate_video( ppo_cfg, rgb_frames[i], current_episodes[i].episode_id, cur_ckpt_idx, infos[i]["spl"], writer, ) rgb_frames[i] = [] # episode continues elif self.video_option: frame = observations_to_image(observations[i], infos[i]) rgb_frames[i].append(frame) # pausing self.envs with no new episode if len(envs_to_pause) > 0: state_index = list(range(self.envs.num_envs)) for idx in reversed(envs_to_pause): state_index.pop(idx) self.envs.pause_at(idx) # indexing along the batch dimensions test_recurrent_hidden_states = test_recurrent_hidden_states[ state_index] not_done_masks = not_done_masks[state_index] current_episode_reward = current_episode_reward[state_index] for k, v in batch.items(): batch[k] = v[state_index] if self.video_option: rgb_frames = [rgb_frames[i] for i in state_index] aggregated_stats = dict() for stat_key in next(iter(stats_episodes.values())).keys(): aggregated_stats[stat_key] = sum( [v[stat_key] for v in stats_episodes.values()]) num_episodes = len(stats_episodes) episode_reward_mean = aggregated_stats["reward"] / num_episodes episode_spl_mean = aggregated_stats["spl"] / num_episodes episode_success_mean = aggregated_stats["success"] / num_episodes logger.info( "Average episode reward: {:.6f}".format(episode_reward_mean)) logger.info( "Average episode success: {:.6f}".format(episode_success_mean)) logger.info("Average episode SPL: {:.6f}".format(episode_spl_mean)) writer.add_scalars( "eval_reward", {"average reward": episode_reward_mean}, cur_ckpt_idx, ) writer.add_scalars("eval_SPL", {"average SPL": episode_spl_mean}, cur_ckpt_idx) writer.add_scalars( "eval_success", {"average success": episode_success_mean}, cur_ckpt_idx, )
def eval_checkpoint(checkpoint_path, args, writer, cur_ckpt_idx=0): env_configs = [] baseline_configs = [] device = torch.device("cuda", args.pth_gpu_id) for _ in range(args.num_processes): config_env = get_config(config_paths=args.task_config) config_env.defrost() config_env.DATASET.SPLIT = "val" agent_sensors = args.sensors.strip().split(",") for sensor in agent_sensors: assert sensor in ["RGB_SENSOR", "DEPTH_SENSOR"] config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors if args.video_option: config_env.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config_env.TASK.MEASUREMENTS.append("COLLISIONS") config_env.freeze() env_configs.append(config_env) config_baseline = cfg_baseline() baseline_configs.append(config_baseline) assert len(baseline_configs) > 0, "empty list of datasets" envs = habitat.VectorEnv( make_env_fn=make_env_fn, env_fn_args=tuple( tuple( zip(env_configs, baseline_configs, range(args.num_processes)) ) ), ) ckpt = torch.load(checkpoint_path, map_location=device) actor_critic = Policy( observation_space=envs.observation_spaces[0], action_space=envs.action_spaces[0], hidden_size=512, goal_sensor_uuid=env_configs[0].TASK.GOAL_SENSOR_UUID, ) actor_critic.to(device) ppo = PPO( actor_critic=actor_critic, clip_param=0.1, ppo_epoch=4, num_mini_batch=32, value_loss_coef=0.5, entropy_coef=0.01, lr=2.5e-4, eps=1e-5, max_grad_norm=0.5, ) ppo.load_state_dict(ckpt["state_dict"]) actor_critic = ppo.actor_critic observations = envs.reset() batch = batch_obs(observations) for sensor in batch: batch[sensor] = batch[sensor].to(device) episode_rewards = torch.zeros(envs.num_envs, 1, device=device) episode_spls = torch.zeros(envs.num_envs, 1, device=device) episode_success = torch.zeros(envs.num_envs, 1, device=device) episode_counts = torch.zeros(envs.num_envs, 1, device=device) current_episode_reward = torch.zeros(envs.num_envs, 1, device=device) test_recurrent_hidden_states = torch.zeros( args.num_processes, args.hidden_size, device=device ) not_done_masks = torch.zeros(args.num_processes, 1, device=device) stats_episodes = set() rgb_frames = None if args.video_option: rgb_frames = [[]] * args.num_processes os.makedirs(args.video_dir, exist_ok=True) while episode_counts.sum() < args.count_test_episodes: current_episodes = envs.current_episodes() with torch.no_grad(): _, actions, _, test_recurrent_hidden_states = actor_critic.act( batch, test_recurrent_hidden_states, not_done_masks, deterministic=False, ) outputs = envs.step([a[0].item() for a in actions]) observations, rewards, dones, infos = [list(x) for x in zip(*outputs)] batch = batch_obs(observations) for sensor in batch: batch[sensor] = batch[sensor].to(device) not_done_masks = torch.tensor( [[0.0] if done else [1.0] for done in dones], dtype=torch.float, device=device, ) for i in range(not_done_masks.shape[0]): if not_done_masks[i].item() == 0: episode_spls[i] += infos[i]["spl"] if infos[i]["spl"] > 0: episode_success[i] += 1 rewards = torch.tensor( rewards, dtype=torch.float, device=device ).unsqueeze(1) current_episode_reward += rewards episode_rewards += (1 - not_done_masks) * current_episode_reward episode_counts += 1 - not_done_masks current_episode_reward *= not_done_masks next_episodes = envs.current_episodes() envs_to_pause = [] n_envs = envs.num_envs for i in range(n_envs): if next_episodes[i].episode_id in stats_episodes: envs_to_pause.append(i) # episode ended if not_done_masks[i].item() == 0: stats_episodes.add(current_episodes[i].episode_id) if args.video_option: generate_video( args, rgb_frames[i], current_episodes[i].episode_id, cur_ckpt_idx, infos[i]["spl"], writer, ) rgb_frames[i] = [] # episode continues elif args.video_option: frame = observations_to_image(observations[i], infos[i]) rgb_frames[i].append(frame) # stop tracking ended episodes if they exist if len(envs_to_pause) > 0: state_index = list(range(envs.num_envs)) for idx in reversed(envs_to_pause): state_index.pop(idx) envs.pause_at(idx) # indexing along the batch dimensions test_recurrent_hidden_states = test_recurrent_hidden_states[ :, state_index ] not_done_masks = not_done_masks[state_index] current_episode_reward = current_episode_reward[state_index] for k, v in batch.items(): batch[k] = v[state_index] if args.video_option: rgb_frames = [rgb_frames[i] for i in state_index] episode_reward_mean = (episode_rewards / episode_counts).mean().item() episode_spl_mean = (episode_spls / episode_counts).mean().item() episode_success_mean = (episode_success / episode_counts).mean().item() logger.info("Average episode reward: {:.6f}".format(episode_reward_mean)) logger.info("Average episode success: {:.6f}".format(episode_success_mean)) logger.info("Average episode SPL: {:.6f}".format(episode_spl_mean)) writer.add_scalars( "eval_reward", {"average reward": episode_reward_mean}, cur_ckpt_idx ) writer.add_scalars( "eval_SPL", {"average SPL": episode_spl_mean}, cur_ckpt_idx ) writer.add_scalars( "eval_success", {"average success": episode_success_mean}, cur_ckpt_idx )
def test_noise_models_rgbd(): DEMO_MODE = False N_STEPS = 100 config = get_config() config.defrost() config.SIMULATOR.SCENE = ( "data/scene_datasets/habitat-test-scenes/skokloster-castle.glb") config.SIMULATOR.AGENT_0.SENSORS = ["RGB_SENSOR", "DEPTH_SENSOR"] config.freeze() if not os.path.exists(config.SIMULATOR.SCENE): pytest.skip("Please download Habitat test data to data folder.") valid_start_position = [-1.3731, 0.08431, 8.60692] expected_pointgoal = [0.1, 0.2, 0.3] goal_position = np.add(valid_start_position, expected_pointgoal) # starting quaternion is rotated 180 degree along z-axis, which # corresponds to simulator using z-negative as forward action start_rotation = [0, 0, 0, 1] test_episode = NavigationEpisode( episode_id="0", scene_id=config.SIMULATOR.SCENE, start_position=valid_start_position, start_rotation=start_rotation, goals=[NavigationGoal(position=goal_position)], ) print(f"{test_episode}") with habitat.Env(config=config, dataset=None) as env: env.episode_iterator = iter([test_episode]) no_noise_obs = [env.reset()] no_noise_states = [env.sim.get_agent_state()] actions = [ sample_non_stop_action(env.action_space) for _ in range(N_STEPS) ] for action in actions: no_noise_obs.append(env.step(action)) no_noise_states.append(env.sim.get_agent_state()) env.close() config.defrost() config.SIMULATOR.RGB_SENSOR.NOISE_MODEL = "GaussianNoiseModel" config.SIMULATOR.RGB_SENSOR.NOISE_MODEL_KWARGS = habitat.Config() config.SIMULATOR.RGB_SENSOR.NOISE_MODEL_KWARGS.INTENSITY_CONSTANT = 0.5 config.SIMULATOR.DEPTH_SENSOR.NOISE_MODEL = "RedwoodDepthNoiseModel" config.SIMULATOR.ACTION_SPACE_CONFIG = "pyrobotnoisy" config.SIMULATOR.NOISE_MODEL = habitat.Config() config.SIMULATOR.NOISE_MODEL.ROBOT = "LoCoBot" config.SIMULATOR.NOISE_MODEL.CONTROLLER = "Proportional" config.SIMULATOR.NOISE_MODEL.NOISE_MULTIPLIER = 0.5 config.freeze() env = habitat.Env(config=config, dataset=None) env.episode_iterator = iter([test_episode]) obs = env.reset() assert np.linalg.norm( obs["rgb"].astype(np.float) - no_noise_obs[0]["rgb"].astype(np.float)) > 1.5e-2 * np.linalg.norm( no_noise_obs[0]["rgb"].astype( np.float)), "No RGB noise detected." assert np.linalg.norm(obs["depth"].astype(np.float) - no_noise_obs[0]["depth"].astype(np.float) ) > 1.5e-2 * np.linalg.norm( no_noise_obs[0]["depth"].astype( np.float)), "No Depth noise detected." images = [] state = env.sim.get_agent_state() angle_diffs = [] pos_diffs = [] for action in actions: prev_state = state obs = env.step(action) state = env.sim.get_agent_state() position_change = np.linalg.norm(np.array(state.position) - np.array(prev_state.position), ord=2) if action["action"][:5] == "TURN_": angle_diff = abs( angle_between_quaternions(state.rotation, prev_state.rotation) - np.deg2rad(config.SIMULATOR.TURN_ANGLE)) angle_diffs.append(angle_diff) else: pos_diffs.append( abs(position_change - config.SIMULATOR.FORWARD_STEP_SIZE)) if DEMO_MODE: images.append(observations_to_image(obs, {})) if DEMO_MODE: images_to_video(images, "data/video/test_noise", "test_noise") assert (np.mean(angle_diffs) > 0.025), "No turn action actuation noise detected." assert (np.mean(pos_diffs) > 0.025), "No forward action actuation noise detected."
def run(config, env, max_steps): r"""Main method for training PPO. Returns: None """ observations = env.reset() batch = batch_obs(observations) batch = None observations = None episode_rewards = torch.zeros(env.num_envs, 1) episode_counts = torch.zeros(env.num_envs, 1) episode_dist = torch.zeros(env.num_envs, 1) current_episode_reward = torch.zeros(env.num_envs, 1) window_episode_reward = deque(maxlen=max_steps) window_episode_counts = deque(maxlen=max_steps) dist_val = deque(maxlen=max_steps) t_start = time.time() env_time = 0 pth_time = 0 count_steps = 0 count_checkpoints = 0 for update in range(max_steps): print(update) reward_sum = 0 dist_sum = 0 iter = 0 rgb_frames = [] if len(config.VIDEO_OPTION) > 0: os.makedirs(config.VIDEO_DIR, exist_ok=True) # get name of performance metric, e.g. "spl" metric_name = config.TASK_CONFIG.TASK.MEASUREMENTS[0] metric_cfg = getattr(config.TASK_CONFIG.TASK, metric_name) measure_type = baseline_registry.get_measure(metric_cfg.TYPE) for step in range(500): dones = [False] while dones[0] == False: outputs = env.step([env.action_spaces[0].sample()]) observations, rewards, dones, infos = [ list(x) for x in zip(*outputs) ] reward_sum += rewards[0] dist_sum += observations[0]['pointgoal_with_gps_compass'][0] iter += 1 frame = observations_to_image(observations[0], []) rgb_frames.append(frame) observations = env.reset() window_episode_reward.append(reward_sum / iter) window_episode_counts.append(iter) dist_val.append(dist_sum / iter) generate_video( video_option=config.VIDEO_OPTION, video_dir=config.VIDEO_DIR, images=np.array(rgb_frames), episode_id=update, checkpoint_idx=0, metric_name="spl", metric_value=1.0, ) rgb_frames = [] np.savetxt("window_episode_reward_ppo.csv", window_episode_reward, delimiter=",") np.savetxt("window_episode_counts_ppo.csv", window_episode_counts, delimiter=",") np.savetxt("episode_dist_ppo.csv", episode_dist, delimiter=",") env.close()
def _eval_checkpoint( self, checkpoint_path: str, writer: TensorboardWriter, checkpoint_index: int = 0, ) -> None: r"""Evaluates a single checkpoint. Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard checkpoint_index: index of cur checkpoint for logging Returns: None """ if self._is_distributed: raise RuntimeError("Evaluation does not support distributed mode") # Map location CPU is almost always better than mapping to a CUDA device. ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu") if self.config.EVAL.USE_CKPT_CONFIG: config = self._setup_eval_config(ckpt_dict["config"]) else: config = self.config.clone() ppo_cfg = config.RL.PPO config.defrost() config.TASK_CONFIG.DATASET.SPLIT = config.EVAL.SPLIT config.freeze() if len(self.config.VIDEO_OPTION) > 0: config.defrost() config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS") config.freeze() if config.VERBOSE: logger.info(f"env config: {config}") self._init_envs(config) self._setup_actor_critic_agent(ppo_cfg) self.agent.load_state_dict(ckpt_dict["state_dict"]) self.actor_critic = self.agent.actor_critic observations = self.envs.reset() batch = batch_obs( observations, device=self.device, cache=self._obs_batching_cache ) batch = apply_obs_transforms_batch(batch, self.obs_transforms) current_episode_reward = torch.zeros( self.envs.num_envs, 1, device="cpu" ) test_recurrent_hidden_states = torch.zeros( self.config.NUM_ENVIRONMENTS, self.actor_critic.net.num_recurrent_layers, ppo_cfg.hidden_size, device=self.device, ) prev_actions = torch.zeros( self.config.NUM_ENVIRONMENTS, 1, device=self.device, dtype=torch.long, ) not_done_masks = torch.zeros( self.config.NUM_ENVIRONMENTS, 1, device=self.device, dtype=torch.bool, ) stats_episodes: Dict[ Any, Any ] = {} # dict of dicts that stores stats per episode rgb_frames = [ [] for _ in range(self.config.NUM_ENVIRONMENTS) ] # type: List[List[np.ndarray]] if len(self.config.VIDEO_OPTION) > 0: os.makedirs(self.config.VIDEO_DIR, exist_ok=True) number_of_eval_episodes = self.config.TEST_EPISODE_COUNT if number_of_eval_episodes == -1: number_of_eval_episodes = sum(self.envs.number_of_episodes) else: total_num_eps = sum(self.envs.number_of_episodes) if total_num_eps < number_of_eval_episodes: logger.warn( f"Config specified {number_of_eval_episodes} eval episodes" ", dataset only has {total_num_eps}." ) logger.warn(f"Evaluating with {total_num_eps} instead.") number_of_eval_episodes = total_num_eps pbar = tqdm.tqdm(total=number_of_eval_episodes) self.actor_critic.eval() while ( len(stats_episodes) < number_of_eval_episodes and self.envs.num_envs > 0 ): current_episodes = self.envs.current_episodes() with torch.no_grad(): ( _, actions, _, test_recurrent_hidden_states, ) = self.actor_critic.act( batch, test_recurrent_hidden_states, prev_actions, not_done_masks, deterministic=False, ) prev_actions.copy_(actions) # type: ignore # NB: Move actions to CPU. If CUDA tensors are # sent in to env.step(), that will create CUDA contexts # in the subprocesses. # For backwards compatibility, we also call .item() to convert to # an int step_data = [a.item() for a in actions.to(device="cpu")] outputs = self.envs.step(step_data) observations, rewards_l, dones, infos = [ list(x) for x in zip(*outputs) ] batch = batch_obs( observations, device=self.device, cache=self._obs_batching_cache, ) batch = apply_obs_transforms_batch(batch, self.obs_transforms) not_done_masks = torch.tensor( [[not done] for done in dones], dtype=torch.bool, device="cpu", ) rewards = torch.tensor( rewards_l, dtype=torch.float, device="cpu" ).unsqueeze(1) current_episode_reward += rewards next_episodes = self.envs.current_episodes() envs_to_pause = [] n_envs = self.envs.num_envs for i in range(n_envs): if ( next_episodes[i].scene_id, next_episodes[i].episode_id, ) in stats_episodes: envs_to_pause.append(i) # episode ended if not not_done_masks[i].item(): pbar.update() episode_stats = {} episode_stats["reward"] = current_episode_reward[i].item() episode_stats.update( self._extract_scalars_from_info(infos[i]) ) current_episode_reward[i] = 0 # use scene_id + episode_id as unique id for storing stats stats_episodes[ ( current_episodes[i].scene_id, current_episodes[i].episode_id, ) ] = episode_stats if len(self.config.VIDEO_OPTION) > 0: generate_video( video_option=self.config.VIDEO_OPTION, video_dir=self.config.VIDEO_DIR, images=rgb_frames[i], episode_id=current_episodes[i].episode_id, checkpoint_idx=checkpoint_index, metrics=self._extract_scalars_from_info(infos[i]), tb_writer=writer, ) rgb_frames[i] = [] # episode continues elif len(self.config.VIDEO_OPTION) > 0: # TODO move normalization / channel changing out of the policy and undo it here frame = observations_to_image( {k: v[i] for k, v in batch.items()}, infos[i] ) rgb_frames[i].append(frame) not_done_masks = not_done_masks.to(device=self.device) ( self.envs, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) = self._pause_envs( envs_to_pause, self.envs, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) num_episodes = len(stats_episodes) aggregated_stats = {} for stat_key in next(iter(stats_episodes.values())).keys(): aggregated_stats[stat_key] = ( sum(v[stat_key] for v in stats_episodes.values()) / num_episodes ) for k, v in aggregated_stats.items(): logger.info(f"Average episode {k}: {v:.4f}") step_id = checkpoint_index if "extra_state" in ckpt_dict and "step" in ckpt_dict["extra_state"]: step_id = ckpt_dict["extra_state"]["step"] writer.add_scalars( "eval_reward", {"average reward": aggregated_stats["reward"]}, step_id, ) metrics = {k: v for k, v in aggregated_stats.items() if k != "reward"} if len(metrics) > 0: writer.add_scalars("eval_metrics", metrics, step_id) self.envs.close()
def _eval_checkpoint( self, checkpoint_path: str, writer: TensorboardWriter, checkpoint_index: int = 0, ) -> None: r"""Evaluates a single checkpoint. Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard checkpoint_index: index of cur checkpoint for logging Returns: None """ # Map location CPU is almost always better than mapping to a CUDA device. ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu") if self.config.EVAL.USE_CKPT_CONFIG: config = self._setup_eval_config(ckpt_dict["config"]) else: config = self.config.clone() ppo_cfg = config.RL.PPO # config.defrost() # config.TASK_CONFIG.DATASET.SPLIT = config.EVAL.SPLIT # config.freeze() if len(self.config.VIDEO_OPTION) > 0: config.defrost() config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") # config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS") config.freeze() logger.info(f"env config: {config}") self.envs = construct_envs(config, training=False) self._setup_actor_critic_agent(ppo_cfg) self.agent.load_state_dict(ckpt_dict["state_dict"]) self.actor_critic = self.agent.actor_critic # get name of performance metric, e.g. "spl" metric_name = self.config.TASK_CONFIG.TASK.MEASUREMENTS[0] metric_cfg = getattr(self.config.TASK_CONFIG.TASK, metric_name) measure_type = baseline_registry.get_measure(metric_cfg.TYPE) assert measure_type is not None, "invalid measurement type {}".format( metric_cfg.TYPE ) self.metric_uuid = measure_type( sim=None, task=None, config=None )._get_uuid() observations = self.envs.reset() batch = batch_obs(observations, self.device) current_episode_reward = torch.zeros( self.envs.num_envs, 1, device=self.device ) test_recurrent_hidden_states = torch.zeros( self.actor_critic.net.num_recurrent_layers, self.config.NUM_PROCESSES, ppo_cfg.hidden_size, device=self.device, ) prev_actions = torch.zeros( self.config.NUM_PROCESSES, 1, device=self.device, dtype=torch.long ) not_done_masks = torch.zeros( self.config.NUM_PROCESSES, 1, device=self.device ) stats_episodes = dict() # dict of dicts that stores stats per episode rgb_frames = [ [] for _ in range(self.config.NUM_PROCESSES) ] # type: List[List[np.ndarray]] if len(self.config.VIDEO_OPTION) > 0: os.makedirs(self.config.VIDEO_DIR, exist_ok=True) self.actor_critic.eval() plan_time = 0 steps = 0 while ( len(stats_episodes) < self.config.TEST_EPISODE_COUNT and self.envs.num_envs > 0 ): current_episodes = self.envs.current_episodes() t0 = time.time() with torch.no_grad(): ( _, actions, _, test_recurrent_hidden_states, ) = self.actor_critic.act( batch, test_recurrent_hidden_states, prev_actions, not_done_masks, deterministic=False, ) prev_actions.copy_(actions) plan_time += time.time() - t0 steps += 1 # outputs = self.envs.step([a[0].item() for a in actions]) outputs = self.envs.step(data=[{'action': a[0].item()} for a in actions]) observations, rewards, dones, infos = [ list(x) for x in zip(*outputs) ] batch = batch_obs(observations, self.device) not_done_masks = torch.tensor( [[0.0] if done else [1.0] for done in dones], dtype=torch.float, device=self.device, ) rewards = torch.tensor( rewards, dtype=torch.float, device=self.device ).unsqueeze(1) current_episode_reward += rewards next_episodes = self.envs.current_episodes() envs_to_pause = [] n_envs = self.envs.num_envs for i in range(n_envs): if ( next_episodes[i].scene_id, next_episodes[i].episode_id, ) in stats_episodes: envs_to_pause.append(i) # episode ended if not_done_masks[i].item() == 0: episode_stats = infos[i].copy() del episode_stats['scene'] del episode_stats['taken_action'] episode_stats['reward'] = current_episode_reward[i].item() current_episode_reward[i] = 0 # use scene_id + episode_id as unique id for storing stats stats_episodes[ ( current_episodes[i].scene_id, current_episodes[i].episode_id, ) ] = episode_stats if len(self.config.VIDEO_OPTION) > 0: generate_video( video_option=self.config.VIDEO_OPTION, video_dir=self.config.VIDEO_DIR, images=rgb_frames[i], scene_id=current_episodes[i].scene_id.split('/')[-1].split('.')[0], episode_id=current_episodes[i].episode_id, checkpoint_idx=checkpoint_index, metric_name=self.metric_uuid, metric_value=infos[i][self.metric_uuid], tb_writer=writer, ) rgb_frames[i] = [] # episode continues elif len(self.config.VIDEO_OPTION) > 0: infos[i].pop('collisions') frame = observations_to_image({'rgb': observations[i]['image']}, infos[i]) rgb_frames[i].append(frame) ( self.envs, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) = self._pause_envs( envs_to_pause, self.envs, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) step_id = checkpoint_index if "extra_state" in ckpt_dict and "step" in ckpt_dict["extra_state"]: step_id = ckpt_dict["extra_state"]["step"] num_episodes = len(stats_episodes) mean_stats = dict() for stat_key in next(iter(stats_episodes.values())).keys(): mean_stats[stat_key] = sum([v[stat_key] for v in stats_episodes.values()]) / num_episodes mean_stats['plan_time'] = plan_time / steps for name, val in mean_stats.items(): logger.info(f"Average episode {name}: {val:.6f}") writer.add_scalars(f'eval_{name}', {f'average {name}': val}, step_id) self.envs.close()