Beispiel #1
0
def get_frames(env, batch_size=30):
    rgb_frames = []
    observations = env.reset()

    frame = observations_to_image(observations[0], [])
    rgb_frames.append(frame)
    dones = [False]
    while dones[0] == False:
        outputs = env.step([env.action_spaces[0].sample()])
        observations, rewards, dones, infos = [list(x) for x in zip(*outputs)]
        frame = observations_to_image(observations[0], [])
        rgb_frames.append(frame)
    return rgb_frames
Beispiel #2
0
    def _store_video_frame(
        self,
        obs: Observations,
        action: Optional[Union[np.ndarray, float]] = None,
        info: Optional[Dict[str, Any]] = None,
    ) -> None:
        new_obs = obs.copy()

        for key in ['image', 'depth']:
            if key not in new_obs:
                continue
            if new_obs[key].shape[0] < 200:
                # upscale image to make the resulting video more viewable
                new_obs[key] = np.repeat(np.repeat(new_obs[key], 4, axis=0),
                                         4,
                                         axis=1)
        if action:
            act = action * 0.9
            img_size = new_obs['image'].shape[0]
            start = img_size / 2
            end = img_size * (1 - act) / 2
            left = round(min(start, end))
            right = round(max(start, end))
            new_obs['image'][round(img_size * 0.9):round(img_size * 0.95),
                             round(left):round(right)] = np.array([0, 0, 255])

        new_obs['rgb'] = new_obs.pop('image')
        self._rgb_frames.append(observations_to_image(new_obs, info or {}))
def test_different_dim_observations_to_image():
    observations = {
        "1_rgb": np.random.rand(512, 512, 3),
        "2_rgb": np.random.rand(418, 418, 3),
        "1_depth": np.random.rand(128, 128, 1),
        "2_depth": np.random.rand(128, 128, 1),
    }
    info = {
        "collisions": {
            "is_collision": True
        },
        "top_down_map": {
            "map": np.random.randint(low=0, high=255, size=(300, 300)),
            "fog_of_war_mask": np.random.randint(low=0,
                                                 high=1,
                                                 size=(300, 300)),
            "agent_map_coord": (10, 10),
            "agent_angle": np.random.random(),
        },
    }
    image = observations_to_image(observations, info)
    assert image.shape == (
        512,
        1570,
        3,
    ), "Resulted image resolution doesn't match."
Beispiel #4
0
    def render(self, mode='rgb_array'):
        if mode == "rgb_array":
            frame = observations_to_image(
                self.env._last_full_obs, self.env.unwrapped._env.get_metrics())
        else:
            raise ValueError(f"Render mode {mode} not currently supported.")

        self._viewer.display(frame)
Beispiel #5
0
def save_map(observations, info, images):

    im = observations_to_image(observations, info)
    top_down_map = draw_top_down_map(info, im.shape[0])
    output_im = im
    output_im = append_text_to_image(output_im,
                                     observations["instruction"]["text"])
    images.append(output_im)
Beispiel #6
0
    def render(self, mode: str = "rgb_array") -> np.ndarray:
        frame = None
        if mode == "rgb_array":
            frame = observations_to_image(self._last_obs,
                                          self._env._env.get_metrics())
        else:
            raise ValueError(f"Render mode {mode} not currently supported.")

        return frame
def following(config, env, keys):
    observation = env.reset()
    frames = list()
    audios = list()
    for key in keys:
        if key == 'w':  # w
            action = HabitatSimActions.MOVE_FORWARD
        elif key == 'a':  # a
            action = HabitatSimActions.TURN_LEFT
        elif key == 'd':  # d
            action = HabitatSimActions.TURN_RIGHT
        elif key == 'f':  # f
            action = HabitatSimActions.STOP

        # --- Game logic should go here
        observation, reward, done, info = env.step(**{'action': action})
        if env.get_done(None):
            break

        if config.TASK_CONFIG.SIMULATOR.CONTINUOUS_VIEW_CHANGE and 'intermediate' in observation:
            for obs in observation['intermediate']:
                frame = observations_to_image(obs, info)
                frames.append(frame)
        frame = observations_to_image(observation, info)
        frames.append(frame)
        audio = observation['audiogoal']
        audios.append(audio)

    env.close()

    # write frames and audio into videos
    video_dir = 'data/visualizations/demo'
    video_name = 'demo'
    fps = config.TASK_CONFIG.SIMULATOR.VIEW_CHANGE_FPS \
        if config.TASK_CONFIG.SIMULATOR.CONTINUOUS_VIEW_CHANGE else 1
    images_to_video_with_audio(
        frames,
        video_dir,
        video_name,
        audios,
        sr=config.TASK_CONFIG.SIMULATOR.AUDIO.RIR_SAMPLING_RATE,
        fps=fps)
Beispiel #8
0
    def step(self, *args, **kwargs):
        intermediate_goal = kwargs["action"]
        self._previous_action = intermediate_goal
        goal = self.planner.get_map_coordinates(intermediate_goal)
        stop = int(self._config.TASK_CONFIG.TASK.ACTION_MAP.MAP_SIZE**2 //
                   2) == intermediate_goal
        observation = self._previous_observation
        cumulative_reward = 0
        done = False
        reaching_waypoint = False
        cant_reach_waypoint = False
        if len(self._config.VIDEO_OPTION) > 0:
            rgb_frames = list()
            audios = list()

        for step_count in range(self._config.PREDICTION_INTERVAL):
            if step_count != 0 and not self.planner.check_navigability(goal):
                cant_reach_waypoint = True
                break
            action = self.planner.plan(observation, goal, stop=stop)
            observation, reward, done, info = super().step({"action": action})
            if len(self._config.VIDEO_OPTION) > 0:
                if "rgb" not in observation:
                    observation["rgb"] = np.zeros(
                        (self.config.DISPLAY_RESOLUTION,
                         self.config.DISPLAY_RESOLUTION, 3))
                frame = observations_to_image(observation, info)
                rgb_frames.append(frame)
                audios.append(observation['audiogoal'])
            cumulative_reward += reward
            if done:
                self.planner.reset()
                observation = self.reset()
                break
            else:
                self.planner.update_map_and_graph(observation)
                # reaching intermediate goal
                x, y = self.planner.mapper.get_maps_and_agent_pose()[2:4]
                if (x - goal[0]) == (y - goal[1]) == 0:
                    reaching_waypoint = True
                    break

        if not done:
            self.planner.add_maps_to_observation(observation)
        self._previous_observation = observation
        info['reaching_waypoint'] = done or reaching_waypoint
        info['cant_reach_waypoint'] = cant_reach_waypoint
        if len(self._config.VIDEO_OPTION) > 0:
            assert len(rgb_frames) != 0
            info['rgb_frames'] = rgb_frames
            info['audios'] = audios

        return observation, cumulative_reward, done, info
def interactive_demo(config, env):
    # Set the width and height of the screen [width, height]
    pygame.init()
    size = (728, 256)
    screen = pygame.display.set_mode(size)
    pygame.display.set_caption("Interactive Demo")
    # Loop until the user clicks the close button.
    done = False
    # Used to manage how fast the screen updates
    clock = pygame.time.Clock()

    frames = list()
    audios = list()
    observation = env.reset()
    rgb_image = np.swapaxes(observation['rgb'], 0, 1)
    # screen.blit(pygame.surfarray.make_surface(rgb_image), (0, 0))
    pygame.display.flip()
    # -------- Main Program Loop -----------
    keys = []
    while not done:
        # --- Main event loop
        def wait():
            while True:
                for event in pygame.event.get():
                    if event.type == pygame.QUIT:
                        pygame.quit()
                        sys.exit()
                    if event.type == pygame.KEYDOWN:
                        action = None
                        if event.key == pygame.K_w:  # w
                            action = HabitatSimActions.MOVE_FORWARD
                            keys.append('w')
                        elif event.key == pygame.K_a:  # a
                            action = HabitatSimActions.TURN_LEFT
                            keys.append('a')
                        elif event.key == pygame.K_d:  # d
                            action = HabitatSimActions.TURN_RIGHT
                            keys.append('d')
                        elif event.key == pygame.K_f:  # f
                            action = HabitatSimActions.STOP
                            keys.append('f')
                        if action is not None:
                            return action

        action = wait()
        # --- Game logic should go here
        observation, reward, done, info = env.step(**{'action': action})
        if env.get_done(None):
            # observation = env.reset()
            break

        if config.TASK_CONFIG.SIMULATOR.CONTINUOUS_VIEW_CHANGE and 'intermediate' in observation:
            for obs in observation['intermediate']:
                frame = observations_to_image(obs, info)
                frames.append(frame)
        frame = observations_to_image(observation, info)
        frames.append(frame)
        frame = np.swapaxes(frame, 0, 1)
        audio = observation['audiogoal']
        audios.append(audio)

        # Here, we clear the screen to white. Don't put other drawing commands
        # above this, or they will be erased with this command.
        screen.fill((255, 255, 255))
        screen.blit(pygame.surfarray.make_surface(frame), (0, 0))
        # smaller_frame = block_reduce(frame, block_size=(down_sampling, down_sampling, 1), func=np.mean)
        # screen.blit(pygame.surfarray.make_surface(smaller_frame), (0, 0))

        # play sound
        # temp_file = 'data/temp/temp.wav'
        # sr = config.TASK_CONFIG.SIMULATOR.AUDIO.RIR_SAMPLING_RATE
        # audio = np.int16(audio * 32767).T
        # wavfile.write(temp_file, sr, audio)
        # wavfile.write(temp_file, sr, audio.T)
        # pygame.mixer.music.load(temp_file)
        # pygame.mixer.music.play(-1)

        # --- Go ahead and update the screen with what we've drawn.
        pygame.display.flip()

        # --- Limit to 60 frames per second
        clock.tick(1)

    # Close the window and quit.
    pygame.quit()
    env.close()
    print('Keys: {}'.format(','.join(keys)))

    # write frames and audio into videos
    video_dir = 'data/visualizations/demo'
    video_name = 'demo'
    fps = config.TASK_CONFIG.SIMULATOR.VIEW_CHANGE_FPS \
        if config.TASK_CONFIG.SIMULATOR.CONTINUOUS_VIEW_CHANGE else 1
    images_to_video_with_audio(
        frames,
        video_dir,
        video_name,
        audios,
        sr=config.TASK_CONFIG.SIMULATOR.AUDIO.RIR_SAMPLING_RATE,
        fps=fps)
Beispiel #10
0
    def _eval_checkpoint(
        self,
        checkpoint_path: str,
        writer: TensorboardWriter,
        checkpoint_index: int = 0,
    ) -> None:
        r"""Evaluates a single checkpoint.

        Args:
            checkpoint_path: path of checkpoint
            writer: tensorboard writer object for logging to tensorboard
            checkpoint_index: index of cur checkpoint for logging

        Returns:
            None
        """
        ckpt_dict = self.load_checkpoint(checkpoint_path,
                                         map_location=self.device)

        config = self._setup_eval_config(ckpt_dict["config"])
        ppo_cfg = config.RL.PPO

        if len(self.config.VIDEO_OPTION) > 0:
            config.defrost()
            config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP")
            config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS")
            config.freeze()

        logger.info(f"env config: {config}")
        self.envs = construct_envs(self.config,
                                   get_env_class(self.config.ENV_NAME))
        self._setup_actor_critic_agent(ppo_cfg)

        self.agent.load_state_dict(ckpt_dict["state_dict"])
        self.actor_critic = self.agent.actor_critic

        # get name of performance metric, e.g. "spl"
        metric_name = self.config.TASK_CONFIG.TASK.MEASUREMENTS[0]
        metric_cfg = getattr(self.config.TASK_CONFIG.TASK, metric_name)
        measure_type = baseline_registry.get_measure(metric_cfg.TYPE)
        assert measure_type is not None, "invalid measurement type {}".format(
            metric_cfg.TYPE)
        self.metric_uuid = measure_type(None, None)._get_uuid()

        observations = self.envs.reset()
        batch = batch_obs(observations)
        for sensor in batch:
            batch[sensor] = batch[sensor].to(self.device)

        current_episode_reward = torch.zeros(self.envs.num_envs,
                                             1,
                                             device=self.device)

        test_recurrent_hidden_states = torch.zeros(
            self.actor_critic.net.num_recurrent_layers,
            self.config.NUM_PROCESSES,
            ppo_cfg.hidden_size,
            device=self.device,
        )
        prev_actions = torch.zeros(self.config.NUM_PROCESSES,
                                   1,
                                   device=self.device,
                                   dtype=torch.long)
        not_done_masks = torch.zeros(self.config.NUM_PROCESSES,
                                     1,
                                     device=self.device)
        stats_episodes = dict()  # dict of dicts that stores stats per episode

        rgb_frames = [
            []
        ] * self.config.NUM_PROCESSES  # type: List[List[np.ndarray]]
        if len(self.config.VIDEO_OPTION) > 0:
            os.makedirs(self.config.VIDEO_DIR, exist_ok=True)

        while (len(stats_episodes) < self.config.TEST_EPISODE_COUNT
               and self.envs.num_envs > 0):
            current_episodes = self.envs.current_episodes()

            with torch.no_grad():
                _, actions, _, test_recurrent_hidden_states = self.actor_critic.act(
                    batch,
                    test_recurrent_hidden_states,
                    prev_actions,
                    not_done_masks,
                    deterministic=False,
                )

                prev_actions.copy_(actions)

            outputs = self.envs.step([a[0].item() for a in actions])

            observations, rewards, dones, infos = [
                list(x) for x in zip(*outputs)
            ]
            batch = batch_obs(observations)
            for sensor in batch:
                batch[sensor] = batch[sensor].to(self.device)

            not_done_masks = torch.tensor(
                [[0.0] if done else [1.0] for done in dones],
                dtype=torch.float,
                device=self.device,
            )

            rewards = torch.tensor(rewards,
                                   dtype=torch.float,
                                   device=self.device).unsqueeze(1)
            current_episode_reward += rewards
            next_episodes = self.envs.current_episodes()
            envs_to_pause = []
            n_envs = self.envs.num_envs
            for i in range(n_envs):
                if (
                        next_episodes[i].scene_id,
                        next_episodes[i].episode_id,
                ) in stats_episodes:
                    envs_to_pause.append(i)

                # episode ended
                if not_done_masks[i].item() == 0:
                    episode_stats = dict()
                    episode_stats[self.metric_uuid] = infos[i][
                        self.metric_uuid]
                    episode_stats["success"] = int(
                        infos[i][self.metric_uuid] > 0)
                    episode_stats["reward"] = current_episode_reward[i].item()
                    current_episode_reward[i] = 0
                    # use scene_id + episode_id as unique id for storing stats
                    stats_episodes[(
                        current_episodes[i].scene_id,
                        current_episodes[i].episode_id,
                    )] = episode_stats

                    if len(self.config.VIDEO_OPTION) > 0:
                        generate_video(
                            video_option=self.config.VIDEO_OPTION,
                            video_dir=self.config.VIDEO_DIR,
                            images=rgb_frames[i],
                            episode_id=current_episodes[i].episode_id,
                            checkpoint_idx=checkpoint_index,
                            metric_name=self.metric_uuid,
                            metric_value=infos[i][self.metric_uuid],
                            tb_writer=writer,
                        )

                        rgb_frames[i] = []

                # episode continues
                elif len(self.config.VIDEO_OPTION) > 0:
                    frame = observations_to_image(observations[i], infos[i])
                    rgb_frames[i].append(frame)

            # pausing self.envs with no new episode
            if len(envs_to_pause) > 0:
                state_index = list(range(self.envs.num_envs))
                for idx in reversed(envs_to_pause):
                    state_index.pop(idx)
                    self.envs.pause_at(idx)

                # indexing along the batch dimensions
                test_recurrent_hidden_states = test_recurrent_hidden_states[
                    state_index]
                not_done_masks = not_done_masks[state_index]
                current_episode_reward = current_episode_reward[state_index]
                prev_actions = prev_actions[state_index]

                for k, v in batch.items():
                    batch[k] = v[state_index]

                if len(self.config.VIDEO_OPTION) > 0:
                    rgb_frames = [rgb_frames[i] for i in state_index]

        aggregated_stats = dict()
        for stat_key in next(iter(stats_episodes.values())).keys():
            aggregated_stats[stat_key] = sum(
                [v[stat_key] for v in stats_episodes.values()])
        num_episodes = len(stats_episodes)

        episode_reward_mean = aggregated_stats["reward"] / num_episodes
        episode_metric_mean = aggregated_stats[self.metric_uuid] / num_episodes
        episode_success_mean = aggregated_stats["success"] / num_episodes

        logger.info(f"Average episode reward: {episode_reward_mean:.6f}")
        logger.info(f"Average episode success: {episode_success_mean:.6f}")
        logger.info(
            f"Average episode {self.metric_uuid}: {episode_metric_mean:.6f}")

        writer.add_scalars(
            "eval_reward",
            {"average reward": episode_reward_mean},
            checkpoint_index,
        )
        writer.add_scalars(
            f"eval_{self.metric_uuid}",
            {f"average {self.metric_uuid}": episode_metric_mean},
            checkpoint_index,
        )
        writer.add_scalars(
            "eval_success",
            {"average success": episode_success_mean},
            checkpoint_index,
        )

        self.envs.close()
    def _eval_checkpoint(self,
                         checkpoint_path: str,
                         writer: TensorboardWriter,
                         checkpoint_index: int = 0,
                         log_diagnostics=[],
                         output_dir='.',
                         label='.',
                         num_eval_runs=1) -> None:
        r"""Evaluates a single checkpoint.

        Args:
            checkpoint_path: path of checkpoint
            writer: tensorboard writer object for logging to tensorboard
            checkpoint_index: index of cur checkpoint for logging

        Returns:
            None
        """
        if checkpoint_index == -1:
            ckpt_file = checkpoint_path.split('/')[-1]
            split_info = ckpt_file.split('.')
            checkpoint_index = split_info[1]
        # Map location CPU is almost always better than mapping to a CUDA device.
        ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu")

        if self.config.EVAL.USE_CKPT_CONFIG:
            config = self._setup_eval_config(ckpt_dict["config"])
        else:
            config = self.config.clone()

        ppo_cfg = config.RL.PPO
        task_cfg = config.TASK_CONFIG.TASK

        config.defrost()
        config.TASK_CONFIG.DATASET.SPLIT = config.EVAL.SPLIT
        config.freeze()

        if len(self.config.VIDEO_OPTION) > 0:
            config.defrost()
            config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP")
            config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS")
            config.freeze()

        logger.info(f"env config: {config}")
        self.envs = construct_envs(config, get_env_class(config.ENV_NAME))
        # pass in aux config if we're doing attention
        aux_cfg = self.config.RL.AUX_TASKS
        self._setup_actor_critic_agent(ppo_cfg, task_cfg, aux_cfg)

        # Check if we accidentally recorded `visual_resnet` in our checkpoint and drop it (it's redundant with `visual_encoder`)
        ckpt_dict['state_dict'] = {
            k: v
            for k, v in ckpt_dict['state_dict'].items()
            if 'visual_resnet' not in k
        }
        self.agent.load_state_dict(ckpt_dict["state_dict"])

        logger.info("agent number of trainable parameters: {}".format(
            sum(param.numel() for param in self.agent.parameters()
                if param.requires_grad)))

        self.actor_critic = self.agent.actor_critic

        observations = self.envs.reset()
        batch = batch_obs(observations, device=self.device)

        current_episode_reward = torch.zeros(self.envs.num_envs,
                                             1,
                                             device=self.device)

        test_recurrent_hidden_states = torch.zeros(
            self.actor_critic.net.num_recurrent_layers,
            self.config.NUM_PROCESSES,
            ppo_cfg.hidden_size,
            device=self.device,
        )
        _, num_recurrent_memories, _ = self._setup_auxiliary_tasks(
            aux_cfg, ppo_cfg, task_cfg, is_eval=True)
        if self.config.RL.PPO.policy in MULTIPLE_BELIEF_CLASSES:
            aux_tasks = self.config.RL.AUX_TASKS.tasks
            num_recurrent_memories = len(self.config.RL.AUX_TASKS.tasks)
            test_recurrent_hidden_states = test_recurrent_hidden_states.unsqueeze(
                2).repeat(1, 1, num_recurrent_memories, 1)

        prev_actions = torch.zeros(self.config.NUM_PROCESSES,
                                   1,
                                   device=self.device,
                                   dtype=torch.long)

        not_done_masks = torch.zeros(self.config.NUM_PROCESSES,
                                     1,
                                     device=self.device)
        stats_episodes = dict()  # dict of dicts that stores stats per episode

        rgb_frames = [[] for _ in range(self.config.NUM_PROCESSES)
                      ]  # type: List[List[np.ndarray]]

        if len(self.config.VIDEO_OPTION) > 0:
            os.makedirs(self.config.VIDEO_DIR, exist_ok=True)

        number_of_eval_episodes = self.config.TEST_EPISODE_COUNT
        if number_of_eval_episodes == -1:
            number_of_eval_episodes = sum(self.envs.number_of_episodes)
        else:
            total_num_eps = sum(self.envs.number_of_episodes)
            if total_num_eps < number_of_eval_episodes:
                logger.warn(
                    f"Config specified {number_of_eval_episodes} eval episodes"
                    ", dataset only has {total_num_eps}.")
                logger.warn(f"Evaluating with {total_num_eps} instead.")
                number_of_eval_episodes = total_num_eps

        videos_cap = 2  # number of videos to generate per checkpoint
        if len(log_diagnostics) > 0:
            videos_cap = 10
        # video_indices = random.sample(range(self.config.TEST_EPISODE_COUNT),
        # min(videos_cap, self.config.TEST_EPISODE_COUNT))
        video_indices = range(10)
        print(f"Videos: {video_indices}")

        total_stats = []
        dones_per_ep = dict()

        # Logging more extensive evaluation stats for analysis
        if len(log_diagnostics) > 0:
            d_stats = {}
            for d in log_diagnostics:
                d_stats[d] = [
                    [] for _ in range(self.config.NUM_PROCESSES)
                ]  # stored as nested list envs x timesteps x k (# tasks)

        pbar = tqdm.tqdm(total=number_of_eval_episodes * num_eval_runs)
        self.agent.eval()
        while (len(stats_episodes) < number_of_eval_episodes * num_eval_runs
               and self.envs.num_envs > 0):
            current_episodes = self.envs.current_episodes()
            with torch.no_grad():
                weights_output = None
                if self.config.RL.PPO.policy in MULTIPLE_BELIEF_CLASSES:
                    weights_output = torch.empty(self.envs.num_envs,
                                                 len(aux_tasks))
                (
                    _,
                    actions,
                    _,
                    test_recurrent_hidden_states,
                ) = self.actor_critic.act(batch,
                                          test_recurrent_hidden_states,
                                          prev_actions,
                                          not_done_masks,
                                          deterministic=False,
                                          weights_output=weights_output)
                prev_actions.copy_(actions)

                for i in range(self.envs.num_envs):
                    if Diagnostics.actions in log_diagnostics:
                        d_stats[Diagnostics.actions][i].append(
                            prev_actions[i].item())
                    if Diagnostics.weights in log_diagnostics:
                        aux_weights = None if weights_output is None else weights_output[
                            i]
                        if aux_weights is not None:
                            d_stats[Diagnostics.weights][i].append(
                                aux_weights.half().tolist())

            outputs = self.envs.step([a[0].item() for a in actions])

            observations, rewards, dones, infos = [
                list(x) for x in zip(*outputs)
            ]
            batch = batch_obs(observations, device=self.device)

            not_done_masks = torch.tensor(
                [[0.0] if done else [1.0] for done in dones],
                dtype=torch.float,
                device=self.device,
            )

            rewards = torch.tensor(rewards,
                                   dtype=torch.float,
                                   device=self.device).unsqueeze(1)
            current_episode_reward += rewards
            next_episodes = self.envs.current_episodes()
            envs_to_pause = []
            n_envs = self.envs.num_envs
            for i in range(n_envs):
                next_k = (
                    next_episodes[i].scene_id,
                    next_episodes[i].episode_id,
                )
                if dones_per_ep.get(next_k, 0) == num_eval_runs:
                    envs_to_pause.append(i)  # wait for the rest

                if not_done_masks[i].item() == 0:
                    episode_stats = dict()

                    episode_stats["reward"] = current_episode_reward[i].item()
                    episode_stats.update(
                        self._extract_scalars_from_info(infos[i]))

                    current_episode_reward[i] = 0
                    # use scene_id + episode_id as unique id for storing stats

                    k = (
                        current_episodes[i].scene_id,
                        current_episodes[i].episode_id,
                    )
                    dones_per_ep[k] = dones_per_ep.get(k, 0) + 1

                    if dones_per_ep.get(k, 0) == 1 and len(
                            self.config.VIDEO_OPTION) > 0 and len(
                                stats_episodes) in video_indices:
                        logger.info(f"Generating video {len(stats_episodes)}")
                        category = getattr(current_episodes[i],
                                           "object_category", "")
                        if category != "":
                            category += "_"
                        try:
                            generate_video(
                                video_option=self.config.VIDEO_OPTION,
                                video_dir=self.config.VIDEO_DIR,
                                images=rgb_frames[i],
                                episode_id=current_episodes[i].episode_id,
                                checkpoint_idx=checkpoint_index,
                                metrics=self._extract_scalars_from_info(
                                    infos[i]),
                                tag=f"{category}{label}",
                                tb_writer=writer,
                            )
                        except Exception as e:
                            logger.warning(str(e))
                    rgb_frames[i] = []

                    stats_episodes[(
                        current_episodes[i].scene_id,
                        current_episodes[i].episode_id,
                        dones_per_ep[k],
                    )] = episode_stats

                    if len(log_diagnostics) > 0:
                        diagnostic_info = dict()
                        for metric in log_diagnostics:
                            diagnostic_info[metric] = d_stats[metric][i]
                            d_stats[metric][i] = []
                        if Diagnostics.top_down_map in log_diagnostics:
                            top_down_map = torch.tensor([])
                            if len(self.config.VIDEO_OPTION) > 0:
                                top_down_map = infos[i]["top_down_map"]["map"]
                                top_down_map = maps.colorize_topdown_map(
                                    top_down_map, fog_of_war_mask=None)
                            diagnostic_info.update(
                                dict(top_down_map=top_down_map))
                        total_stats.append(
                            dict(
                                stats=episode_stats,
                                did_stop=bool(prev_actions[i] == 0),
                                episode_info=attr.asdict(current_episodes[i]),
                                info=diagnostic_info,
                            ))
                    pbar.update()

                # episode continues
                else:
                    if len(self.config.VIDEO_OPTION) > 0:
                        aux_weights = None if weights_output is None else weights_output[
                            i]
                        frame = observations_to_image(
                            observations[i], infos[i],
                            current_episode_reward[i].item(), aux_weights,
                            aux_tasks)
                        rgb_frames[i].append(frame)
                    if Diagnostics.gps in log_diagnostics:
                        d_stats[Diagnostics.gps][i].append(
                            observations[i]["gps"].tolist())
                    if Diagnostics.heading in log_diagnostics:
                        d_stats[Diagnostics.heading][i].append(
                            observations[i]["heading"].tolist())

            (
                self.envs,
                test_recurrent_hidden_states,
                not_done_masks,
                current_episode_reward,
                prev_actions,
                batch,
                rgb_frames,
            ) = self._pause_envs(
                envs_to_pause,
                self.envs,
                test_recurrent_hidden_states,
                not_done_masks,
                current_episode_reward,
                prev_actions,
                batch,
                rgb_frames,
            )

        num_episodes = len(stats_episodes)
        aggregated_stats = dict()
        for stat_key in next(iter(stats_episodes.values())).keys():
            aggregated_stats[stat_key] = (
                sum([v[stat_key]
                     for v in stats_episodes.values()]) / num_episodes)

        for k, v in aggregated_stats.items():
            logger.info(f"Average episode {k}: {v:.4f}")

        step_id = checkpoint_index
        if "extra_state" in ckpt_dict and "step" in ckpt_dict["extra_state"]:
            step_id = ckpt_dict["extra_state"]["step"]

        writer.add_scalars(
            "eval_reward",
            {"average reward": aggregated_stats["reward"]},
            step_id,
        )

        metrics = {k: v for k, v in aggregated_stats.items() if k != "reward"}
        if len(metrics) > 0:
            writer.add_scalars("eval_metrics", metrics, step_id)
            logger.info("eval_metrics")
            logger.info(metrics)
        if len(log_diagnostics) > 0:
            os.makedirs(output_dir, exist_ok=True)
            eval_fn = f"{label}.json"
            with open(os.path.join(output_dir, eval_fn), 'w',
                      encoding='utf-8') as f:
                json.dump(total_stats, f, ensure_ascii=False, indent=4)
        self.envs.close()
Beispiel #12
0
    def _eval_checkpoint(self,
                         checkpoint_path: str,
                         writer: TensorboardWriter,
                         checkpoint_index: int = 0) -> Dict:
        r"""Evaluates a single checkpoint.

        Args:
            checkpoint_path: path of checkpoint
            writer: tensorboard writer object for logging to tensorboard
            checkpoint_index: index of cur checkpoint for logging

        Returns:
            None
        """
        random.seed(self.config.SEED)
        np.random.seed(self.config.SEED)
        torch.manual_seed(self.config.SEED)

        # Map location CPU is almost always better than mapping to a CUDA device.
        ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu")

        if self.config.EVAL.USE_CKPT_CONFIG:
            config = self._setup_eval_config(ckpt_dict["config"])
        else:
            config = self.config.clone()

        ppo_cfg = config.RL.PPO

        config.defrost()
        config.TASK_CONFIG.DATASET.SPLIT = config.EVAL.SPLIT
        if self.config.DISPLAY_RESOLUTION != config.TASK_CONFIG.SIMULATOR.DEPTH_SENSOR.WIDTH:
            model_resolution = config.TASK_CONFIG.SIMULATOR.DEPTH_SENSOR.WIDTH
            config.TASK_CONFIG.SIMULATOR.DEPTH_SENSOR.WIDTH = config.TASK_CONFIG.SIMULATOR.RGB_SENSOR.HEIGHT = \
                config.TASK_CONFIG.SIMULATOR.RGB_SENSOR.WIDTH = config.TASK_CONFIG.SIMULATOR.DEPTH_SENSOR.HEIGHT = \
                self.config.DISPLAY_RESOLUTION
        else:
            model_resolution = self.config.DISPLAY_RESOLUTION
        config.freeze()

        if len(self.config.VIDEO_OPTION) > 0:
            config.defrost()
            config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP")
            config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS")
            config.freeze()
        elif "top_down_map" in self.config.VISUALIZATION_OPTION:
            config.defrost()
            config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP")
            config.freeze()

        logger.info(f"env config: {config}")
        self.envs = construct_envs(config, get_env_class(config.ENV_NAME))
        if self.config.DISPLAY_RESOLUTION != model_resolution:
            observation_space = self.envs.observation_spaces[0]
            observation_space.spaces['depth'].shape = (model_resolution,
                                                       model_resolution, 1)
            observation_space.spaces['rgb'].shape = (model_resolution,
                                                     model_resolution, 1)
        else:
            observation_space = self.envs.observation_spaces[0]
        self._setup_actor_critic_agent(ppo_cfg, observation_space)

        self.agent.load_state_dict(ckpt_dict["state_dict"])
        self.actor_critic = self.agent.actor_critic

        self.metric_uuids = []
        # get name of performance metric, e.g. "spl"
        for metric_name in self.config.TASK_CONFIG.TASK.MEASUREMENTS:
            metric_cfg = getattr(self.config.TASK_CONFIG.TASK, metric_name)
            measure_type = baseline_registry.get_measure(metric_cfg.TYPE)
            assert measure_type is not None, "invalid measurement type {}".format(
                metric_cfg.TYPE)
            self.metric_uuids.append(
                measure_type(sim=None, task=None, config=None)._get_uuid())

        observations = self.envs.reset()
        if self.config.DISPLAY_RESOLUTION != model_resolution:
            resize_observation(observations, model_resolution)
        batch = batch_obs(observations, self.device)

        current_episode_reward = torch.zeros(self.envs.num_envs,
                                             1,
                                             device=self.device)

        test_recurrent_hidden_states = torch.zeros(
            self.actor_critic.net.num_recurrent_layers,
            self.config.NUM_PROCESSES,
            ppo_cfg.hidden_size,
            device=self.device,
        )
        prev_actions = torch.zeros(self.config.NUM_PROCESSES,
                                   1,
                                   device=self.device,
                                   dtype=torch.long)
        not_done_masks = torch.zeros(self.config.NUM_PROCESSES,
                                     1,
                                     device=self.device)
        stats_episodes = dict()  # dict of dicts that stores stats per episode

        rgb_frames = [[] for _ in range(self.config.NUM_PROCESSES)
                      ]  # type: List[List[np.ndarray]]
        audios = [[] for _ in range(self.config.NUM_PROCESSES)]
        if len(self.config.VIDEO_OPTION) > 0:
            os.makedirs(self.config.VIDEO_DIR, exist_ok=True)

        t = tqdm(total=self.config.TEST_EPISODE_COUNT)
        while (len(stats_episodes) < self.config.TEST_EPISODE_COUNT
               and self.envs.num_envs > 0):
            current_episodes = self.envs.current_episodes()

            with torch.no_grad():
                _, actions, _, test_recurrent_hidden_states = self.actor_critic.act(
                    batch,
                    test_recurrent_hidden_states,
                    prev_actions,
                    not_done_masks,
                    deterministic=False)

                prev_actions.copy_(actions)

            outputs = self.envs.step([a[0].item() for a in actions])

            observations, rewards, dones, infos = [
                list(x) for x in zip(*outputs)
            ]
            for i in range(self.envs.num_envs):
                if len(self.config.VIDEO_OPTION) > 0:
                    if config.TASK_CONFIG.SIMULATOR.CONTINUOUS_VIEW_CHANGE and 'intermediate' in observations[
                            i]:
                        for observation in observations[i]['intermediate']:
                            frame = observations_to_image(
                                observation, infos[i])
                            rgb_frames[i].append(frame)
                        del observations[i]['intermediate']

                    if "rgb" not in observations[i]:
                        observations[i]["rgb"] = np.zeros(
                            (self.config.DISPLAY_RESOLUTION,
                             self.config.DISPLAY_RESOLUTION, 3))
                    frame = observations_to_image(observations[i], infos[i])
                    rgb_frames[i].append(frame)
                    audios[i].append(observations[i]['audiogoal'])

            if config.DISPLAY_RESOLUTION != model_resolution:
                resize_observation(observations, model_resolution)
            batch = batch_obs(observations, self.device)

            not_done_masks = torch.tensor(
                [[0.0] if done else [1.0] for done in dones],
                dtype=torch.float,
                device=self.device,
            )

            rewards = torch.tensor(rewards,
                                   dtype=torch.float,
                                   device=self.device).unsqueeze(1)
            current_episode_reward += rewards
            next_episodes = self.envs.current_episodes()
            envs_to_pause = []
            for i in range(self.envs.num_envs):
                # pause envs which runs out of episodes
                if (
                        next_episodes[i].scene_id,
                        next_episodes[i].episode_id,
                ) in stats_episodes:
                    envs_to_pause.append(i)

                # episode ended
                if not_done_masks[i].item() == 0:
                    episode_stats = dict()
                    for metric_uuid in self.metric_uuids:
                        episode_stats[metric_uuid] = infos[i][metric_uuid]
                    episode_stats["reward"] = current_episode_reward[i].item()
                    episode_stats['geodesic_distance'] = current_episodes[
                        i].info['geodesic_distance']
                    episode_stats['euclidean_distance'] = norm(
                        np.array(current_episodes[i].goals[0].position) -
                        np.array(current_episodes[i].start_position))
                    logging.debug(episode_stats)
                    current_episode_reward[i] = 0
                    # use scene_id + episode_id as unique id for storing stats
                    stats_episodes[(
                        current_episodes[i].scene_id,
                        current_episodes[i].episode_id,
                    )] = episode_stats
                    t.update()

                    if len(self.config.VIDEO_OPTION) > 0:
                        fps = self.config.TASK_CONFIG.SIMULATOR.VIEW_CHANGE_FPS \
                                    if self.config.TASK_CONFIG.SIMULATOR.CONTINUOUS_VIEW_CHANGE else 1
                        generate_video(
                            video_option=self.config.VIDEO_OPTION,
                            video_dir=self.config.VIDEO_DIR,
                            images=rgb_frames[i][:-1],
                            scene_name=current_episodes[i].scene_id.split(
                                '/')[3],
                            sound=current_episodes[i].info['sound'],
                            sr=self.config.TASK_CONFIG.SIMULATOR.AUDIO.
                            RIR_SAMPLING_RATE,
                            episode_id=current_episodes[i].episode_id,
                            checkpoint_idx=checkpoint_index,
                            metric_name='spl',
                            metric_value=infos[i]['spl'],
                            tb_writer=writer,
                            audios=audios[i][:-1],
                            fps=fps)

                        # observations has been reset but info has not
                        # to be consistent, do not use the last frame
                        rgb_frames[i] = []
                        audios[i] = []

                    if "top_down_map" in self.config.VISUALIZATION_OPTION:
                        top_down_map = plot_top_down_map(
                            infos[i],
                            dataset=self.config.TASK_CONFIG.SIMULATOR.
                            SCENE_DATASET)
                        scene = current_episodes[i].scene_id.split('/')[3]
                        writer.add_image('{}_{}_{}/{}'.format(
                            config.EVAL.SPLIT, scene,
                            current_episodes[i].episode_id,
                            config.BASE_TASK_CONFIG_PATH.split('/')[-1][:-5]),
                                         top_down_map,
                                         dataformats='WHC')

            (
                self.envs,
                test_recurrent_hidden_states,
                not_done_masks,
                current_episode_reward,
                prev_actions,
                batch,
                rgb_frames,
            ) = self._pause_envs(
                envs_to_pause,
                self.envs,
                test_recurrent_hidden_states,
                not_done_masks,
                current_episode_reward,
                prev_actions,
                batch,
                rgb_frames,
            )

        aggregated_stats = dict()
        for stat_key in next(iter(stats_episodes.values())).keys():
            aggregated_stats[stat_key] = sum(
                [v[stat_key] for v in stats_episodes.values()])
        num_episodes = len(stats_episodes)

        stats_file = os.path.join(
            config.TENSORBOARD_DIR,
            '{}_stats_{}.json'.format(config.EVAL.SPLIT, config.SEED))
        new_stats_episodes = {
            ','.join(key): value
            for key, value in stats_episodes.items()
        }
        with open(stats_file, 'w') as fo:
            json.dump(new_stats_episodes, fo)

        episode_reward_mean = aggregated_stats["reward"] / num_episodes
        episode_metrics_mean = {}
        for metric_uuid in self.metric_uuids:
            episode_metrics_mean[
                metric_uuid] = aggregated_stats[metric_uuid] / num_episodes

        logger.info(f"Average episode reward: {episode_reward_mean:.6f}")
        for metric_uuid in self.metric_uuids:
            logger.info(
                f"Average episode {metric_uuid}: {episode_metrics_mean[metric_uuid]:.6f}"
            )

        if not config.EVAL.SPLIT.startswith('test'):
            writer.add_scalar("{}/reward".format(config.EVAL.SPLIT),
                              episode_reward_mean, checkpoint_index)
            for metric_uuid in self.metric_uuids:
                writer.add_scalar(f"{config.EVAL.SPLIT}/{metric_uuid}",
                                  episode_metrics_mean[metric_uuid],
                                  checkpoint_index)

        self.envs.close()

        result = {'episode_reward_mean': episode_reward_mean}
        for metric_uuid in self.metric_uuids:
            result['episode_{}_mean'.format(
                metric_uuid)] = episode_metrics_mean[metric_uuid]

        return result
Beispiel #13
0
    def local_evaluate(self,
                       agent: Agent,
                       num_episodes: Optional[int] = None,
                       control_period: Optional[float] = 1.0,
                       frame_rate: Optional[int] = 1):
        if num_episodes is None:
            num_episodes = len(self._env._env.episodes)
        else:
            assert num_episodes <= len(self._env._env.episodes), (
                "num_episodes({}) is larger than number of episodes "
                "in environment ({})".format(num_episodes,
                                             len(self._env._env.episodes)))

        assert num_episodes > 0, "num_episodes should be greater than 0"

        agg_metrics: Dict = defaultdict(float)

        writer = TensorboardWriter(
            'tb_benchmark/', flush_secs=30)  # flush_specs from base_trainer.py

        count_episodes = 0
        print("number of episodes: " + str(num_episodes))
        while count_episodes < num_episodes:
            print("working on episode " + str(count_episodes))
            observations_per_episode = []
            agent.reset()
            observations_per_action = self._env._env.reset()
            # initialize physic-enabled sim env. Do this for every
            # episode, since sometimes assets get deallocated
            if self._enable_physics:
                self._env._env.disable_physics()
                self._env._env.enable_physics()

            frame_counter = 0
            # act until one episode is over
            while not self._env._env.episode_over:
                action = agent.act(observations_per_action)
                observations_per_action = reward_per_action = done_per_action = info_per_action = None
                if (self._enable_physics is False):
                    (observations_per_action, reward_per_action,
                     done_per_action, info_per_action) = self._env.step(action)
                else:
                    # step with physics. For now we use hard-coded time step of 1/60 secs
                    # (used in the rigid object tutorial in Habitat Sim)
                    (observations_per_action, reward_per_action,
                     done_per_action,
                     info_per_action) = self._env.step_physics(
                         action,
                         time_step=1.0 / 60.0,
                         control_period=control_period)
                # generate an output image for the action. The image includes observations
                # and a top-down map showing the agent's state in the environment
                # we use frame_rate (num. of frames per action) to reduce computational overhead
                if frame_counter % frame_rate == 0:
                    out_im_per_action = observations_to_image(
                        observations_per_action, info_per_action)
                    observations_per_episode.append(out_im_per_action)
                frame_counter = frame_counter + 1

            # episode ended
            # get per-episode metrics. for now we only extract
            # distance-to-goal, success, spl
            metrics = self._env._env.get_metrics()
            per_ep_metrics = {
                k: metrics[k]
                for k in ['distance_to_goal', 'success', 'spl']
            }
            # print distance_to_goal, success and spl
            for k, v in per_ep_metrics.items():
                print(f'{k},{v}')
            # calculate aggregated distance_to_goal, success and spl
            for m, v in per_ep_metrics.items():
                agg_metrics[m] += v
            count_episodes += 1
            # generate video
            generate_video(
                video_option=["disk", "tensorboard"],
                video_dir='video_benchmark_dir',
                images=observations_per_episode,
                episode_id=count_episodes - 1,
                checkpoint_idx=0,
                metrics=per_ep_metrics,
                tb_writer=writer,
            )

        avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()}

        return avg_metrics
def play_env(env, args, config):
    render_steps_limit = None
    if args.no_render:
        render_steps_limit = DEFAULT_RENDER_STEPS_LIMIT

    use_arm_actions = None
    if args.load_actions is not None:
        with open(args.load_actions, "rb") as f:
            use_arm_actions = np.load(f)

    obs = env.reset()

    if not args.no_render:
        obs = env.step({"action": "EMPTY", "action_args": {}})
        draw_obs = observations_to_image(obs, {})
        pygame.init()
        screen = pygame.display.set_mode(
            [draw_obs.shape[1], draw_obs.shape[0]])

    i = 0
    target_fps = 60.0
    prev_time = time.time()
    all_obs = []
    total_reward = 0
    all_arm_actions = []

    while True:
        if render_steps_limit is not None and i > render_steps_limit:
            break
        step_result, arm_action = get_input_vel_ctlr(
            args.no_render,
            use_arm_actions[i] if use_arm_actions is not None else None,
            args,
            obs,
            env,
        )
        if step_result is None:
            break
        all_arm_actions.append(arm_action)
        i += 1
        if use_arm_actions is not None and i >= len(use_arm_actions):
            break

        # obs, reward, done, info = step_result
        obs = step_result
        reward = 0.0
        info = env.get_metrics()

        total_reward += reward

        use_ob = observations_to_image(obs, info)
        use_ob = overlay_frame(use_ob, info)

        draw_ob = use_ob[:]

        if not args.no_render:
            draw_ob = np.transpose(draw_ob, (1, 0, 2))
            draw_obuse_ob = pygame.surfarray.make_surface(draw_ob)
            screen.blit(draw_obuse_ob, (0, 0))
            pygame.display.update()
        if args.save_obs:
            all_obs.append(draw_ob)

        if not args.no_render:
            pygame.event.pump()
        if env.episode_over:
            env.reset()

        curr_time = time.time()
        diff = curr_time - prev_time
        delay = max(1.0 / target_fps - diff, 0)
        time.sleep(delay)
        prev_time = curr_time

    if args.save_actions:
        assert len(all_arm_actions) > 200
        all_arm_actions = np.array(all_arm_actions)[:200]
        save_dir = "orp/start_data/"
        if not osp.exists(save_dir):
            os.makedirs(save_dir)
        save_path = osp.join(save_dir, "bench_ac.txt")
        with open(save_path, "wb") as f:
            np.save(f, all_arm_actions)
        raise ValueError("done")

    if args.save_obs:
        all_obs = np.array(all_obs)
        all_obs = np.transpose(all_obs, (0, 2, 1, 3))
        make_video_cv2(all_obs, "interactive_play")
    if not args.no_render:
        pygame.quit()
Beispiel #15
0
    def _eval_checkpoint(
        self,
        checkpoint_path: str,
        writer: TensorboardWriter,
        checkpoint_index: int = 0,
    ) -> None:
        r"""Evaluates a single checkpoint.

        Args:
            checkpoint_path: path of checkpoint
            writer: tensorboard writer object for logging to tensorboard
            checkpoint_index: index of cur checkpoint for logging

        Returns:
            None
        """
        self.add_new_based_on_cfg()

        # Map location CPU is almost always better than mapping to a CUDA device.
        ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu")

        # ==========================================================================================
        # -- Update config for eval
        if self.config.EVAL.USE_CKPT_CONFIG:
            config = self._setup_eval_config(ckpt_dict["config"])
        else:
            config = self.config.clone()

        ppo_cfg = config.RL.PPO

        # # Mostly for visualization
        # config.defrost()
        # config.TASK_CONFIG.SIMULATOR.HABITAT_SIM_V0.GPU_GPU = False
        # config.freeze()

        split = config.TASK_CONFIG.DATASET.SPLIT

        config.defrost()
        config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP")
        config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS")
        config.freeze()
        # ==========================================================================================

        num_procs = self.config.NUM_PROCESSES
        device = self.device
        cfg = self.config

        logger.info(f"env config: {config}")
        self.envs = construct_envs(config, get_env_class(self.config.ENV_NAME))
        num_envs = self.envs.num_envs

        self._setup_actor_critic_agent(ppo_cfg, train=False)

        self.agent.load_state_dict(ckpt_dict["state_dict"])
        self.actor_critic = self.agent.actor_critic
        self.r_policy = self.agent.actor_critic.reachability_policy

        aux_models = self.actor_critic.net.aux_models

        other_losses = dict({
            k: torch.zeros(num_envs, 1, device=device)
            for k in aux_models.keys()
        })
        other_losses_action = dict({
            k: torch.zeros(num_envs,
                           self.envs.action_spaces[0].n,
                           device=device)
            for k in aux_models.keys()
        })

        num_steps = torch.zeros(num_envs, 1, device=device)

        # Config aux models for eval per item in batch
        for k, maux in aux_models.items():
            maux.set_per_element_loss()

        total_loss = 0

        if config.EVAL_MODE:
            self.agent.eval()
            self.r_policy.eval()

        # get name of performance metric, e.g. "spl"
        metric_name = cfg.TASK_CONFIG.TASK.MEASUREMENTS[0]
        metric_cfg = getattr(cfg.TASK_CONFIG.TASK, metric_name)
        measure_type = baseline_registry.get_measure(metric_cfg.TYPE)
        assert measure_type is not None, "invalid measurement type {}".format(
            metric_cfg.TYPE)

        self.metric_uuid = measure_type(sim=None, task=None,
                                        config=None)._get_uuid()

        observations = self.envs.reset()
        batch = batch_obs_augment_aux(observations, self.envs.get_shared_mem())

        info_data_keys = ["discovered", "collisions_wall", "collisions_prox"]
        log_data_keys = [
            "current_episode_reward", "current_episode_go_reward"
        ] + info_data_keys
        log_data = dict({
            k: torch.zeros(num_envs, 1, device=device)
            for k in log_data_keys
        })
        info_data = dict({k: log_data[k] for k in info_data_keys})

        test_recurrent_hidden_states = torch.zeros(
            self.actor_critic.net.num_recurrent_layers,
            num_procs,
            ppo_cfg.hidden_size,
            device=device,
        )
        prev_actions = torch.zeros(num_procs,
                                   1,
                                   device=device,
                                   dtype=torch.long)
        not_done_masks = torch.zeros(num_procs, 1, device=device)

        stats_episodes = dict()  # dict of dicts that stores stats per episode
        stats_episodes_scenes = dict(
        )  # dict of number of collected stats from

        # each scene
        max_test_ep_count = cfg.TEST_EPISODE_COUNT

        # TODO this should depend on number of scenes :(
        # TODO But than envs shouldn't be paused but fast-fwd to next scene
        # TODO We consider num envs == num scenes
        max_ep_per_env = max_test_ep_count / float(num_envs)

        rgb_frames = [[] for _ in range(num_procs)
                      ]  # type: List[List[np.ndarray]]

        if len(cfg.VIDEO_OPTION) > 0:
            os.makedirs(cfg.VIDEO_DIR, exist_ok=True)

        video_log_int = cfg.VIDEO_OPTION_INTERVAL
        num_frames = 0

        plot_pos = -1
        prev_true_pos = []
        prev_pred_pos = []

        while (len(stats_episodes) <= cfg.TEST_EPISODE_COUNT and num_envs > 0):
            current_episodes = self.envs.current_episodes()

            with torch.no_grad():
                prev_hidden = test_recurrent_hidden_states
                _, actions, _, test_recurrent_hidden_states, aux_out \
                    = self.actor_critic.act(
                        batch,
                        test_recurrent_hidden_states,
                        prev_actions,
                        not_done_masks,
                        deterministic=False
                    )

                prev_actions.copy_(actions)

                if 'action' in batch:
                    prev_actions = batch['action'].unsqueeze(1).to(
                        actions.device).long()

                for k, v in aux_out.items():
                    loss = aux_models[k].calc_loss(v, batch, prev_hidden,
                                                   prev_actions,
                                                   not_done_masks, actions)
                    total_loss += loss

                    if other_losses[k] is None:
                        other_losses[k] = loss
                    else:
                        other_losses[k] += loss.unsqueeze(1)
                    if len(prev_actions) == 1:
                        other_losses_action[k][0, prev_actions.item()] += \
                            loss.item()

                # ==================================================================================
                # - Hacky logs

                if plot_pos >= 0:
                    prev_true_pos.append(batch["gps_compass_start"]
                                         [plot_pos].data[:2].cpu().numpy())
                    prev_pred_pos.append(aux_out["rel_start_pos_reg"]
                                         [plot_pos].data.cpu().numpy() * 15)
                    if num_frames % 10 == 0:
                        xx, yy = [], []
                        for x, y in prev_true_pos:
                            xx.append(x)
                            yy.append(y)
                        plt.scatter(xx, yy, label="true_pos")
                        xx, yy = [], []
                        for x, y in prev_pred_pos:
                            xx.append(x)
                            yy.append(y)
                        plt.scatter(xx, yy, label="pred_pos")
                        plt.legend()
                        plt.show()
                        plt.waitforbuttonpress()
                        plt.close()
                # ==================================================================================

            num_steps += 1
            outputs = self.envs.step([a[0].item() for a in actions])

            observations, rewards, dones, infos = [
                list(x) for x in zip(*outputs)
            ]

            not_done_masks = torch.tensor(
                [[0.0] if done else [1.0] for done in dones],
                dtype=torch.float,
                device=device,
            )

            map_values = self._get_mapping(observations, aux_out)
            batch = batch_obs_augment_aux(observations,
                                          self.envs.get_shared_mem(),
                                          device=device,
                                          map_values=map_values,
                                          masks=not_done_masks)

            valid_map_size = [
                float(ifs["top_down_map"]["valid_map"].sum()) for ifs in infos
            ]
            discovered_factor = [
                infos[ix]["top_down_map"]["explored_map"].sum() /
                valid_map_size[ix] for ix in range(len(infos))
            ]

            seen_factor = [
                infos[ix]["top_down_map"]["ful_fog_of_war_mask"].sum() /
                valid_map_size[ix] for ix in range(len(infos))
            ]

            rewards = torch.tensor(rewards, dtype=torch.float,
                                   device=device).unsqueeze(1)

            log_data["current_episode_reward"] += rewards

            # -- Add intrinsic Reward
            if self.only_intrinsic_reward:
                rewards.zero_()

            if self.r_enabled:
                ir_rewards = self._add_intrinsic_reward(
                    batch, actions, rewards, not_done_masks)
                log_data["current_episode_go_reward"] += ir_rewards

                rewards += ir_rewards

            # Log other info from infos dict
            for iii, info in enumerate(infos):
                for k_info, v_info in info_data.items():
                    v_info[iii] += info[k_info]

            next_episodes = self.envs.current_episodes()

            envs_to_pause = []
            n_envs = num_envs

            for i in range(n_envs):
                scene = next_episodes[i].scene_id

                if scene not in stats_episodes_scenes:
                    stats_episodes_scenes[scene] = 0

                if stats_episodes_scenes[scene] >= max_ep_per_env:
                    envs_to_pause.append(i)

                # episode ended
                if not_done_masks[i].item() == 0:
                    episode_stats = dict()
                    episode_stats[self.metric_uuid] = infos[i][
                        self.metric_uuid]
                    episode_stats["success"] = int(
                        infos[i][self.metric_uuid] > 0)

                    for kk, vv in log_data.items():
                        episode_stats[kk] = vv[i].item()
                        vv[i] = 0

                    episode_stats["map_discovered"] = discovered_factor[i]
                    episode_stats["map_seen"] = seen_factor[i]

                    for k, v in other_losses.items():
                        episode_stats[k] = v[i].item() / num_steps[i].item()
                        other_losses_action[k][i].fill_(0)
                        other_losses[k][i] = 0

                    num_steps[i] = 0

                    # use scene_id + episode_id as unique id for storing stats
                    stats_episodes[(current_episodes[i].scene_id, current_episodes[i].episode_id)] \
                        = episode_stats

                    print(f"Episode {len(stats_episodes)} stats:",
                          episode_stats)

                    stats_episodes_scenes[current_episodes[i].scene_id] += 1

                    if len(cfg.VIDEO_OPTION
                           ) > 0 and checkpoint_index % video_log_int == 0:
                        generate_video(
                            video_option=cfg.VIDEO_OPTION,
                            video_dir=cfg.VIDEO_DIR,
                            images=rgb_frames[i],
                            episode_id=current_episodes[i].episode_id,
                            checkpoint_idx=checkpoint_index,
                            metric_name=self.metric_uuid,
                            metric_value=infos[i][self.metric_uuid],
                            tb_writer=writer,
                        )

                        rgb_frames[i] = []

                # episode continues
                elif len(cfg.VIDEO_OPTION) > 0:
                    for k, v in observations[i].items():
                        if isinstance(v, torch.Tensor):
                            observations[i][k] = v.cpu().numpy()
                    frame = observations_to_image(observations[i], infos[i])
                    rgb_frames[i].append(frame)

            # Pop done envs:
            if len(envs_to_pause) > 0:
                s_index = list(range(num_envs))
                for idx in reversed(envs_to_pause):
                    s_index.pop(idx)

                for k, v in other_losses.items():
                    other_losses[k] = other_losses[k][s_index]

                for k, v in log_data.items():
                    log_data[k] = log_data[k][s_index]

            (
                self.envs,
                test_recurrent_hidden_states,
                not_done_masks,
                current_episode_reward,
                prev_actions,
                batch,
                rgb_frames,
            ) = self._pause_envs(
                envs_to_pause,
                self.envs,
                test_recurrent_hidden_states,
                not_done_masks,
                None,
                prev_actions,
                batch,
                rgb_frames,
            )

        aggregated_stats = dict()
        for stat_key in next(iter(stats_episodes.values())).keys():
            aggregated_stats[stat_key] = sum(
                [v[stat_key] for v in stats_episodes.values()])
        num_episodes = len(stats_episodes)

        episodes_agg_stats = dict()
        for k, v in aggregated_stats.items():
            episodes_agg_stats[k] = v / num_episodes
            logger.info(f"Average episode {k}: {episodes_agg_stats[k]:.6f}")

        for k, v in episodes_agg_stats.items():
            writer.add_scalars(f"eval_{k}", {f"{split}_average {k}": v},
                               checkpoint_index)
            print(f"[{checkpoint_index}] average {k}", v)

        self.envs.close()
Beispiel #16
0
    def eval(self, checkpoint_path):
        r"""Evaluates a single checkpoint.

        Args:
            checkpoint_path: path of checkpoint
            writer: tensorboard writer object for logging to tensorboard
            checkpoint_index: index of cur checkpoint for logging

        Returns:
            None
        """
        self.device = (torch.device("cuda", self.config.TORCH_GPU_ID)
                       if torch.cuda.is_available() else torch.device("cpu"))
        # Map location CPU is almost always better than mapping to a CUDA device.
        ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu")

        if self.config.EVAL.USE_CKPT_CONFIG:
            config = self._setup_eval_config(ckpt_dict["config"])
        else:
            config = self.config.clone()

        ppo_cfg = config.RL.PPO

        config.defrost()
        config.TASK_CONFIG.DATASET.SPLIT = config.EVAL.SPLIT
        config.freeze()

        if len(self.config.VIDEO_OPTION) > 0:
            config.defrost()
            config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP")
            config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS")
            config.freeze()

        self.env = construct_envs(config, get_env_class(config.ENV_NAME))
        self._setup_actor_critic_agent(ppo_cfg)

        self.agent.load_state_dict(ckpt_dict["state_dict"])
        self.actor_critic = self.agent.actor_critic

        # get name of performance metric, e.g. "spl"
        metric_name = self.config.TASK_CONFIG.TASK.MEASUREMENTS[0]
        metric_cfg = getattr(self.config.TASK_CONFIG.TASK, metric_name)
        measure_type = baseline_registry.get_measure(metric_cfg.TYPE)
        assert measure_type is not None, "invalid measurement type {}".format(
            metric_cfg.TYPE)
        self.metric_uuid = measure_type(sim=None, task=None,
                                        config=None)._get_uuid()

        observations = self.env.reset()
        batch = batch_obs(observations, self.device)

        current_episode_reward = torch.zeros(self.env.num_envs,
                                             1,
                                             device=self.device)

        test_recurrent_hidden_states = torch.zeros(
            self.actor_critic.net.num_recurrent_layers,
            self.config.NUM_PROCESSES,
            ppo_cfg.hidden_size,
            device=self.device,
        )
        prev_actions = torch.zeros(self.config.NUM_PROCESSES,
                                   1,
                                   device=self.device,
                                   dtype=torch.long)
        not_done_masks = torch.zeros(self.config.NUM_PROCESSES,
                                     1,
                                     device=self.device)
        stats_episodes = dict()  # dict of dicts that stores stats per episode

        rgb_frames = [[] for _ in range(self.config.NUM_PROCESSES)
                      ]  # type: List[List[np.ndarray]]
        if len(self.config.VIDEO_OPTION) > 0:
            os.makedirs(self.config.VIDEO_DIR, exist_ok=True)

        self.actor_critic.eval()
        while (len(stats_episodes) < self.config.TEST_EPISODE_COUNT
               and self.env.num_envs > 0):
            current_episodes = self.env.current_episodes()

            with torch.no_grad():
                (
                    _,
                    actions,
                    _,
                    test_recurrent_hidden_states,
                ) = self.actor_critic.act(
                    batch,
                    test_recurrent_hidden_states,
                    prev_actions,
                    not_done_masks,
                    deterministic=False,
                )

                prev_actions.copy_(actions)

            outputs = self.env.step([a[0].item() for a in actions])

            observations, rewards, dones, infos = [
                list(x) for x in zip(*outputs)
            ]
            batch = batch_obs(observations, self.device)

            not_done_masks = torch.tensor(
                [[0.0] if done else [1.0] for done in dones],
                dtype=torch.float,
                device=self.device,
            )

            rewards = torch.tensor(rewards,
                                   dtype=torch.float,
                                   device=self.device).unsqueeze(1)
            current_episode_reward += rewards
            next_episodes = self.env.current_episodes()
            envs_to_pause = []
            n_envs = self.env.num_envs
            for i in range(n_envs):
                if (
                        next_episodes[i].scene_id,
                        next_episodes[i].episode_id,
                ) in stats_episodes:
                    envs_to_pause.append(i)

                # episode ended
                if not_done_masks[i].item() == 0:
                    episode_stats = dict()
                    episode_stats[self.metric_uuid] = infos[i][
                        self.metric_uuid]
                    episode_stats["success"] = int(
                        infos[i][self.metric_uuid] > 0)
                    episode_stats["reward"] = current_episode_reward[i].item()
                    current_episode_reward[i] = 0
                    # use scene_id + episode_id as unique id for storing stats
                    stats_episodes[(
                        current_episodes[i].scene_id,
                        current_episodes[i].episode_id,
                    )] = episode_stats

                    if len(self.config.VIDEO_OPTION) > 0:
                        generate_video(
                            video_option=self.config.VIDEO_OPTION,
                            video_dir=self.config.VIDEO_DIR,
                            images=rgb_frames[i],
                            episode_id=current_episodes[i].episode_id,
                            checkpoint_idx=0,
                            metric_name=self.metric_uuid,
                            metric_value=infos[i][self.metric_uuid],
                        )

                        rgb_frames[i] = []

                # episode continues
                elif len(self.config.VIDEO_OPTION) > 0:
                    frame = observations_to_image(observations[i], infos[i])
                    rgb_frames[i].append(frame)

            (
                self.env,
                test_recurrent_hidden_states,
                not_done_masks,
                current_episode_reward,
                prev_actions,
                batch,
                rgb_frames,
            ) = self._pause_envs(
                envs_to_pause,
                self.env,
                test_recurrent_hidden_states,
                not_done_masks,
                current_episode_reward,
                prev_actions,
                batch,
                rgb_frames,
            )

        aggregated_stats = dict()
        for stat_key in next(iter(stats_episodes.values())).keys():
            aggregated_stats[stat_key] = sum(
                [v[stat_key] for v in stats_episodes.values()])
        num_episodes = len(stats_episodes)

        episode_reward_mean = aggregated_stats["reward"] / num_episodes
        episode_metric_mean = aggregated_stats[self.metric_uuid] / num_episodes
        episode_success_mean = aggregated_stats["success"] / num_episodes

        print(f"Average episode reward: {episode_reward_mean:.6f}")
        print(f"Average episode success: {episode_success_mean:.6f}")
        print(f"Average episode {self.metric_uuid}: {episode_metric_mean:.6f}")

        if "extra_state" in ckpt_dict and "step" in ckpt_dict["extra_state"]:
            step_id = ckpt_dict["extra_state"]["step"]

        print("eval_reward", {"average reward": episode_reward_mean})
        print(
            f"eval_{self.metric_uuid}",
            {f"average {self.metric_uuid}": episode_metric_mean},
        )
        print("eval_success", {"average success": episode_success_mean})

        self.env.close()
Beispiel #17
0
    def _eval_checkpoint(
        self,
        checkpoint_path: str,
        writer: TensorboardWriter,
        checkpoint_index: int = 0,
    ) -> None:
        r"""Evaluates a single checkpoint.

        Args:
            checkpoint_path: path of checkpoint
            writer: tensorboard writer object for logging to tensorboard
            checkpoint_index: index of cur checkpoint for logging

        Returns:
            None
        """
        # Map location CPU is almost always better than mapping to a CUDA device.
        ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu")

        if self.config.EVAL.USE_CKPT_CONFIG:
            config = self._setup_eval_config(ckpt_dict["config"])
        else:
            config = self.config.clone()

        ppo_cfg = config.RL.PPO

        config.defrost()
        config.TASK_CONFIG.DATASET.SPLIT = config.EVAL.SPLIT
        config.freeze()

        if len(self.config.VIDEO_OPTION) > 0:
            config.defrost()
            config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP")
            config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS")
            config.freeze()

        logger.info(f"env config: {config}")
        self.envs = construct_envs(config, get_env_class(config.ENV_NAME))
        self._setup_actor_critic_agent(ppo_cfg)
        self.actor_critic.eval()

        if self._static_encoder:
            self._encoder = self.agent.actor_critic.net.visual_encoder

        self.agent.load_state_dict(ckpt_dict["state_dict"])
        self.actor_critic = self.agent.actor_critic

        observations = self.envs.reset()
        batch = batch_obs(observations, device=self.device)

        if self._static_encoder:
            batch["visual_features"] = self._encoder(batch)
            batch["prev_visual_features"] = torch.zeros_like(
                batch["visual_features"])

        current_episode_reward = torch.zeros(self.envs.num_envs,
                                             1,
                                             device=self.device)

        test_recurrent_hidden_states = torch.zeros(
            self.actor_critic.net.num_recurrent_layers,
            self.config.NUM_PROCESSES,
            ppo_cfg.hidden_size,
            device=self.device,
        )
        prev_actions = torch.zeros(self.config.NUM_PROCESSES,
                                   1,
                                   device=self.device,
                                   dtype=torch.long)
        not_done_masks = torch.zeros(self.config.NUM_PROCESSES,
                                     1,
                                     device=self.device)
        stats_episodes = dict()  # dict of dicts that stores stats per episode

        rgb_frames = [[] for _ in range(self.config.NUM_PROCESSES)
                      ]  # type: List[List[np.ndarray]]
        if len(self.config.VIDEO_OPTION) > 0:
            os.makedirs(self.config.VIDEO_DIR, exist_ok=True)

        number_of_eval_episodes = self.config.TEST_EPISODE_COUNT
        if number_of_eval_episodes == -1:
            number_of_eval_episodes = sum(self.envs.number_of_episodes)
        else:
            total_num_eps = sum(self.envs.number_of_episodes)
            if total_num_eps < number_of_eval_episodes:
                logger.warn(
                    f"Config specified {number_of_eval_episodes} eval episodes"
                    ", dataset only has {total_num_eps}.")
                logger.warn(f"Evaluating with {total_num_eps} instead.")
                number_of_eval_episodes = total_num_eps

        pbar = tqdm.tqdm(total=number_of_eval_episodes)
        self.actor_critic.eval()
        while (len(stats_episodes) < number_of_eval_episodes
               and self.envs.num_envs > 0):
            current_episodes = self.envs.current_episodes()

            with torch.no_grad():
                step_batch = batch
                (
                    _,
                    actions,
                    _,
                    test_recurrent_hidden_states,
                ) = self.actor_critic.act(
                    batch,
                    test_recurrent_hidden_states,
                    prev_actions,
                    not_done_masks,
                    deterministic=False,
                )

                prev_actions.copy_(actions)

            outputs = self.envs.step([a[0].item() for a in actions])

            observations, rewards, dones, infos = [
                list(x) for x in zip(*outputs)
            ]
            batch = batch_obs(observations, device=self.device)

            if self._static_encoder:
                batch["prev_visual_features"] = step_batch["visual_features"]
                batch["visual_features"] = self._encoder(batch)

            not_done_masks = torch.tensor(
                [[0.0] if done else [1.0] for done in dones],
                dtype=torch.float,
                device=self.device,
            )

            rewards = torch.tensor(rewards,
                                   dtype=torch.float,
                                   device=self.device).unsqueeze(1)
            current_episode_reward += rewards
            next_episodes = self.envs.current_episodes()
            envs_to_pause = []
            n_envs = self.envs.num_envs
            for i in range(n_envs):
                if (
                        next_episodes[i].scene_id,
                        next_episodes[i].episode_id,
                ) in stats_episodes:
                    envs_to_pause.append(i)

                # episode ended
                if not_done_masks[i].item() == 0:
                    pbar.update()
                    episode_stats = dict()
                    episode_stats["reward"] = current_episode_reward[i].item()
                    episode_stats.update(
                        self._extract_scalars_from_info(infos[i]))
                    current_episode_reward[i] = 0
                    # use scene_id + episode_id as unique id for storing stats
                    stats_episodes[(
                        current_episodes[i].scene_id,
                        current_episodes[i].episode_id,
                    )] = episode_stats

                    if len(self.config.VIDEO_OPTION) > 0:
                        generate_video(
                            video_option=self.config.VIDEO_OPTION,
                            video_dir=self.config.VIDEO_DIR,
                            images=rgb_frames[i],
                            episode_id=current_episodes[i].episode_id,
                            checkpoint_idx=checkpoint_index,
                            metrics=self._extract_scalars_from_info(infos[i]),
                            tb_writer=writer,
                        )

                        rgb_frames[i] = []

                # episode continues
                elif len(self.config.VIDEO_OPTION) > 0:
                    frame = observations_to_image(observations[i], infos[i])
                    rgb_frames[i].append(frame)

            (
                self.envs,
                test_recurrent_hidden_states,
                not_done_masks,
                current_episode_reward,
                prev_actions,
                batch,
                rgb_frames,
            ) = self._pause_envs(
                envs_to_pause,
                self.envs,
                test_recurrent_hidden_states,
                not_done_masks,
                current_episode_reward,
                prev_actions,
                batch,
                rgb_frames,
            )

        num_episodes = len(stats_episodes)
        aggregated_stats = dict()
        for stat_key in next(iter(stats_episodes.values())).keys():
            aggregated_stats[stat_key] = (
                sum([v[stat_key]
                     for v in stats_episodes.values()]) / num_episodes)

        for k, v in aggregated_stats.items():
            logger.info(f"Average episode {k}: {v:.4f}")

        step_id = checkpoint_index
        if "extra_state" in ckpt_dict and "step" in ckpt_dict["extra_state"]:
            step_id = ckpt_dict["extra_state"]["step"]

        writer.add_scalars(
            "eval_reward",
            {"average reward": aggregated_stats["reward"]},
            step_id,
        )

        metrics = {k: v for k, v in aggregated_stats.items() if k != "reward"}
        if len(metrics) > 0:
            writer.add_scalars("eval_metrics", metrics, step_id)

        self.envs.close()
Beispiel #18
0
    def _eval_checkpoint(
        self,
        checkpoint_path: str,
        writer: TensorboardWriter,
        cur_ckpt_idx: int = 0,
    ) -> None:
        r"""
        Evaluates a single checkpoint
        Args:
            checkpoint_path: path of checkpoint
            writer: tensorboard writer object for logging to tensorboard
            cur_ckpt_idx: index of cur checkpoint for logging

        Returns:
            None
        """
        ckpt_dict = self.load_checkpoint(checkpoint_path,
                                         map_location=self.device)

        ckpt_config = ckpt_dict["config"]
        config = self.config.clone()
        ckpt_cmd_opts = ckpt_config.CMD_TRAILING_OPTS
        eval_cmd_opts = config.CMD_TRAILING_OPTS

        # config merge priority: eval_opts > ckpt_opts > eval_cfg > ckpt_cfg
        # first line for old checkpoint compatibility
        config.merge_from_other_cfg(ckpt_config)
        config.merge_from_other_cfg(self.config)
        config.merge_from_list(ckpt_cmd_opts)
        config.merge_from_list(eval_cmd_opts)

        ppo_cfg = config.TRAINER.RL.PPO
        config.TASK_CONFIG.defrost()
        config.TASK_CONFIG.DATASET.SPLIT = "val"
        agent_sensors = ppo_cfg.sensors.strip().split(",")
        config.TASK_CONFIG.SIMULATOR.AGENT_0.SENSORS = agent_sensors
        if self.video_option:
            config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP")
            config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS")
        config.freeze()

        logger.info(f"env config: {config}")
        self.envs = construct_envs(config, NavRLEnv)
        self._setup_actor_critic_agent(ppo_cfg)

        self.agent.load_state_dict(ckpt_dict["state_dict"])
        self.actor_critic = self.agent.actor_critic

        observations = self.envs.reset()
        batch = batch_obs(observations)
        for sensor in batch:
            batch[sensor] = batch[sensor].to(self.device)

        current_episode_reward = torch.zeros(self.envs.num_envs,
                                             1,
                                             device=self.device)

        test_recurrent_hidden_states = torch.zeros(ppo_cfg.num_processes,
                                                   ppo_cfg.hidden_size,
                                                   device=self.device)
        not_done_masks = torch.zeros(ppo_cfg.num_processes,
                                     1,
                                     device=self.device)
        stats_episodes = dict()  # dict of dicts that stores stats per episode

        rgb_frames = [[]
                      ] * ppo_cfg.num_processes  # type: List[List[np.ndarray]]
        if self.video_option:
            os.makedirs(ppo_cfg.video_dir, exist_ok=True)

        while (len(stats_episodes) < ppo_cfg.count_test_episodes
               and self.envs.num_envs > 0):
            current_episodes = self.envs.current_episodes()

            with torch.no_grad():
                _, actions, _, test_recurrent_hidden_states = self.actor_critic.act(
                    batch,
                    test_recurrent_hidden_states,
                    not_done_masks,
                    deterministic=False,
                )

            outputs = self.envs.step([a[0].item() for a in actions])

            observations, rewards, dones, infos = [
                list(x) for x in zip(*outputs)
            ]
            batch = batch_obs(observations)
            for sensor in batch:
                batch[sensor] = batch[sensor].to(self.device)

            not_done_masks = torch.tensor(
                [[0.0] if done else [1.0] for done in dones],
                dtype=torch.float,
                device=self.device,
            )

            rewards = torch.tensor(rewards,
                                   dtype=torch.float,
                                   device=self.device).unsqueeze(1)
            current_episode_reward += rewards
            next_episodes = self.envs.current_episodes()
            envs_to_pause = []
            n_envs = self.envs.num_envs
            for i in range(n_envs):
                if (
                        next_episodes[i].scene_id,
                        next_episodes[i].episode_id,
                ) in stats_episodes:
                    envs_to_pause.append(i)

                # episode ended
                if not_done_masks[i].item() == 0:
                    episode_stats = dict()
                    episode_stats["spl"] = infos[i]["spl"]
                    episode_stats["success"] = int(infos[i]["spl"] > 0)
                    episode_stats["reward"] = current_episode_reward[i].item()
                    current_episode_reward[i] = 0
                    # use scene_id + episode_id as unique id for storing stats
                    stats_episodes[(
                        current_episodes[i].scene_id,
                        current_episodes[i].episode_id,
                    )] = episode_stats
                    if self.video_option:
                        generate_video(
                            ppo_cfg,
                            rgb_frames[i],
                            current_episodes[i].episode_id,
                            cur_ckpt_idx,
                            infos[i]["spl"],
                            writer,
                        )
                        rgb_frames[i] = []

                # episode continues
                elif self.video_option:
                    frame = observations_to_image(observations[i], infos[i])
                    rgb_frames[i].append(frame)

            # pausing self.envs with no new episode
            if len(envs_to_pause) > 0:
                state_index = list(range(self.envs.num_envs))
                for idx in reversed(envs_to_pause):
                    state_index.pop(idx)
                    self.envs.pause_at(idx)

                # indexing along the batch dimensions
                test_recurrent_hidden_states = test_recurrent_hidden_states[
                    state_index]
                not_done_masks = not_done_masks[state_index]
                current_episode_reward = current_episode_reward[state_index]

                for k, v in batch.items():
                    batch[k] = v[state_index]

                if self.video_option:
                    rgb_frames = [rgb_frames[i] for i in state_index]

        aggregated_stats = dict()
        for stat_key in next(iter(stats_episodes.values())).keys():
            aggregated_stats[stat_key] = sum(
                [v[stat_key] for v in stats_episodes.values()])
        num_episodes = len(stats_episodes)

        episode_reward_mean = aggregated_stats["reward"] / num_episodes
        episode_spl_mean = aggregated_stats["spl"] / num_episodes
        episode_success_mean = aggregated_stats["success"] / num_episodes

        logger.info(
            "Average episode reward: {:.6f}".format(episode_reward_mean))
        logger.info(
            "Average episode success: {:.6f}".format(episode_success_mean))
        logger.info("Average episode SPL: {:.6f}".format(episode_spl_mean))

        writer.add_scalars(
            "eval_reward",
            {"average reward": episode_reward_mean},
            cur_ckpt_idx,
        )
        writer.add_scalars("eval_SPL", {"average SPL": episode_spl_mean},
                           cur_ckpt_idx)
        writer.add_scalars(
            "eval_success",
            {"average success": episode_success_mean},
            cur_ckpt_idx,
        )
def eval_checkpoint(checkpoint_path, args, writer, cur_ckpt_idx=0):
    env_configs = []
    baseline_configs = []
    device = torch.device("cuda", args.pth_gpu_id)

    for _ in range(args.num_processes):
        config_env = get_config(config_paths=args.task_config)
        config_env.defrost()
        config_env.DATASET.SPLIT = "val"

        agent_sensors = args.sensors.strip().split(",")
        for sensor in agent_sensors:
            assert sensor in ["RGB_SENSOR", "DEPTH_SENSOR"]
        config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors
        if args.video_option:
            config_env.TASK.MEASUREMENTS.append("TOP_DOWN_MAP")
            config_env.TASK.MEASUREMENTS.append("COLLISIONS")
        config_env.freeze()
        env_configs.append(config_env)

        config_baseline = cfg_baseline()
        baseline_configs.append(config_baseline)

    assert len(baseline_configs) > 0, "empty list of datasets"

    envs = habitat.VectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(
            tuple(
                zip(env_configs, baseline_configs, range(args.num_processes))
            )
        ),
    )

    ckpt = torch.load(checkpoint_path, map_location=device)

    actor_critic = Policy(
        observation_space=envs.observation_spaces[0],
        action_space=envs.action_spaces[0],
        hidden_size=512,
        goal_sensor_uuid=env_configs[0].TASK.GOAL_SENSOR_UUID,
    )
    actor_critic.to(device)

    ppo = PPO(
        actor_critic=actor_critic,
        clip_param=0.1,
        ppo_epoch=4,
        num_mini_batch=32,
        value_loss_coef=0.5,
        entropy_coef=0.01,
        lr=2.5e-4,
        eps=1e-5,
        max_grad_norm=0.5,
    )

    ppo.load_state_dict(ckpt["state_dict"])

    actor_critic = ppo.actor_critic

    observations = envs.reset()
    batch = batch_obs(observations)
    for sensor in batch:
        batch[sensor] = batch[sensor].to(device)

    episode_rewards = torch.zeros(envs.num_envs, 1, device=device)
    episode_spls = torch.zeros(envs.num_envs, 1, device=device)
    episode_success = torch.zeros(envs.num_envs, 1, device=device)
    episode_counts = torch.zeros(envs.num_envs, 1, device=device)
    current_episode_reward = torch.zeros(envs.num_envs, 1, device=device)

    test_recurrent_hidden_states = torch.zeros(
        args.num_processes, args.hidden_size, device=device
    )
    not_done_masks = torch.zeros(args.num_processes, 1, device=device)
    stats_episodes = set()

    rgb_frames = None
    if args.video_option:
        rgb_frames = [[]] * args.num_processes
        os.makedirs(args.video_dir, exist_ok=True)

    while episode_counts.sum() < args.count_test_episodes:
        current_episodes = envs.current_episodes()

        with torch.no_grad():
            _, actions, _, test_recurrent_hidden_states = actor_critic.act(
                batch,
                test_recurrent_hidden_states,
                not_done_masks,
                deterministic=False,
            )

        outputs = envs.step([a[0].item() for a in actions])

        observations, rewards, dones, infos = [list(x) for x in zip(*outputs)]
        batch = batch_obs(observations)
        for sensor in batch:
            batch[sensor] = batch[sensor].to(device)

        not_done_masks = torch.tensor(
            [[0.0] if done else [1.0] for done in dones],
            dtype=torch.float,
            device=device,
        )

        for i in range(not_done_masks.shape[0]):
            if not_done_masks[i].item() == 0:
                episode_spls[i] += infos[i]["spl"]
                if infos[i]["spl"] > 0:
                    episode_success[i] += 1

        rewards = torch.tensor(
            rewards, dtype=torch.float, device=device
        ).unsqueeze(1)
        current_episode_reward += rewards
        episode_rewards += (1 - not_done_masks) * current_episode_reward
        episode_counts += 1 - not_done_masks
        current_episode_reward *= not_done_masks

        next_episodes = envs.current_episodes()
        envs_to_pause = []
        n_envs = envs.num_envs
        for i in range(n_envs):
            if next_episodes[i].episode_id in stats_episodes:
                envs_to_pause.append(i)

            # episode ended
            if not_done_masks[i].item() == 0:
                stats_episodes.add(current_episodes[i].episode_id)
                if args.video_option:
                    generate_video(
                        args,
                        rgb_frames[i],
                        current_episodes[i].episode_id,
                        cur_ckpt_idx,
                        infos[i]["spl"],
                        writer,
                    )
                    rgb_frames[i] = []

            # episode continues
            elif args.video_option:
                frame = observations_to_image(observations[i], infos[i])
                rgb_frames[i].append(frame)

        # stop tracking ended episodes if they exist
        if len(envs_to_pause) > 0:
            state_index = list(range(envs.num_envs))
            for idx in reversed(envs_to_pause):
                state_index.pop(idx)
                envs.pause_at(idx)

            # indexing along the batch dimensions
            test_recurrent_hidden_states = test_recurrent_hidden_states[
                :, state_index
            ]
            not_done_masks = not_done_masks[state_index]
            current_episode_reward = current_episode_reward[state_index]

            for k, v in batch.items():
                batch[k] = v[state_index]

            if args.video_option:
                rgb_frames = [rgb_frames[i] for i in state_index]

    episode_reward_mean = (episode_rewards / episode_counts).mean().item()
    episode_spl_mean = (episode_spls / episode_counts).mean().item()
    episode_success_mean = (episode_success / episode_counts).mean().item()

    logger.info("Average episode reward: {:.6f}".format(episode_reward_mean))
    logger.info("Average episode success: {:.6f}".format(episode_success_mean))
    logger.info("Average episode SPL: {:.6f}".format(episode_spl_mean))

    writer.add_scalars(
        "eval_reward", {"average reward": episode_reward_mean}, cur_ckpt_idx
    )
    writer.add_scalars(
        "eval_SPL", {"average SPL": episode_spl_mean}, cur_ckpt_idx
    )
    writer.add_scalars(
        "eval_success", {"average success": episode_success_mean}, cur_ckpt_idx
    )
Beispiel #20
0
def test_noise_models_rgbd():
    DEMO_MODE = False
    N_STEPS = 100

    config = get_config()
    config.defrost()
    config.SIMULATOR.SCENE = (
        "data/scene_datasets/habitat-test-scenes/skokloster-castle.glb")
    config.SIMULATOR.AGENT_0.SENSORS = ["RGB_SENSOR", "DEPTH_SENSOR"]
    config.freeze()
    if not os.path.exists(config.SIMULATOR.SCENE):
        pytest.skip("Please download Habitat test data to data folder.")

    valid_start_position = [-1.3731, 0.08431, 8.60692]

    expected_pointgoal = [0.1, 0.2, 0.3]
    goal_position = np.add(valid_start_position, expected_pointgoal)

    # starting quaternion is rotated 180 degree along z-axis, which
    # corresponds to simulator using z-negative as forward action
    start_rotation = [0, 0, 0, 1]
    test_episode = NavigationEpisode(
        episode_id="0",
        scene_id=config.SIMULATOR.SCENE,
        start_position=valid_start_position,
        start_rotation=start_rotation,
        goals=[NavigationGoal(position=goal_position)],
    )

    print(f"{test_episode}")
    with habitat.Env(config=config, dataset=None) as env:

        env.episode_iterator = iter([test_episode])
        no_noise_obs = [env.reset()]
        no_noise_states = [env.sim.get_agent_state()]

        actions = [
            sample_non_stop_action(env.action_space) for _ in range(N_STEPS)
        ]
        for action in actions:
            no_noise_obs.append(env.step(action))
            no_noise_states.append(env.sim.get_agent_state())
        env.close()

        config.defrost()

        config.SIMULATOR.RGB_SENSOR.NOISE_MODEL = "GaussianNoiseModel"
        config.SIMULATOR.RGB_SENSOR.NOISE_MODEL_KWARGS = habitat.Config()
        config.SIMULATOR.RGB_SENSOR.NOISE_MODEL_KWARGS.INTENSITY_CONSTANT = 0.5
        config.SIMULATOR.DEPTH_SENSOR.NOISE_MODEL = "RedwoodDepthNoiseModel"

        config.SIMULATOR.ACTION_SPACE_CONFIG = "pyrobotnoisy"
        config.SIMULATOR.NOISE_MODEL = habitat.Config()
        config.SIMULATOR.NOISE_MODEL.ROBOT = "LoCoBot"
        config.SIMULATOR.NOISE_MODEL.CONTROLLER = "Proportional"
        config.SIMULATOR.NOISE_MODEL.NOISE_MULTIPLIER = 0.5

        config.freeze()

        env = habitat.Env(config=config, dataset=None)

        env.episode_iterator = iter([test_episode])

        obs = env.reset()
        assert np.linalg.norm(
            obs["rgb"].astype(np.float) -
            no_noise_obs[0]["rgb"].astype(np.float)) > 1.5e-2 * np.linalg.norm(
                no_noise_obs[0]["rgb"].astype(
                    np.float)), "No RGB noise detected."

        assert np.linalg.norm(obs["depth"].astype(np.float) -
                              no_noise_obs[0]["depth"].astype(np.float)
                              ) > 1.5e-2 * np.linalg.norm(
                                  no_noise_obs[0]["depth"].astype(
                                      np.float)), "No Depth noise detected."

        images = []
        state = env.sim.get_agent_state()
        angle_diffs = []
        pos_diffs = []
        for action in actions:
            prev_state = state
            obs = env.step(action)
            state = env.sim.get_agent_state()
            position_change = np.linalg.norm(np.array(state.position) -
                                             np.array(prev_state.position),
                                             ord=2)

            if action["action"][:5] == "TURN_":
                angle_diff = abs(
                    angle_between_quaternions(state.rotation,
                                              prev_state.rotation) -
                    np.deg2rad(config.SIMULATOR.TURN_ANGLE))
                angle_diffs.append(angle_diff)
            else:
                pos_diffs.append(
                    abs(position_change - config.SIMULATOR.FORWARD_STEP_SIZE))

            if DEMO_MODE:
                images.append(observations_to_image(obs, {}))

        if DEMO_MODE:
            images_to_video(images, "data/video/test_noise", "test_noise")

        assert (np.mean(angle_diffs) >
                0.025), "No turn action actuation noise detected."
        assert (np.mean(pos_diffs) >
                0.025), "No forward action actuation noise detected."
def run(config, env, max_steps):
    r"""Main method for training PPO.

    Returns:
        None
    """

    observations = env.reset()
    batch = batch_obs(observations)

    batch = None
    observations = None

    episode_rewards = torch.zeros(env.num_envs, 1)
    episode_counts = torch.zeros(env.num_envs, 1)
    episode_dist = torch.zeros(env.num_envs, 1)
    current_episode_reward = torch.zeros(env.num_envs, 1)

    window_episode_reward = deque(maxlen=max_steps)
    window_episode_counts = deque(maxlen=max_steps)
    dist_val = deque(maxlen=max_steps)

    t_start = time.time()
    env_time = 0
    pth_time = 0
    count_steps = 0
    count_checkpoints = 0

    for update in range(max_steps):
        print(update)
        reward_sum = 0
        dist_sum = 0
        iter = 0
        rgb_frames = []
        if len(config.VIDEO_OPTION) > 0:
            os.makedirs(config.VIDEO_DIR, exist_ok=True)

        # get name of performance metric, e.g. "spl"
        metric_name = config.TASK_CONFIG.TASK.MEASUREMENTS[0]
        metric_cfg = getattr(config.TASK_CONFIG.TASK, metric_name)
        measure_type = baseline_registry.get_measure(metric_cfg.TYPE)

        for step in range(500):
            dones = [False]
            while dones[0] == False:
                outputs = env.step([env.action_spaces[0].sample()])
                observations, rewards, dones, infos = [
                    list(x) for x in zip(*outputs)
                ]
                reward_sum += rewards[0]
                dist_sum += observations[0]['pointgoal_with_gps_compass'][0]
                iter += 1

                frame = observations_to_image(observations[0], [])
                rgb_frames.append(frame)

        observations = env.reset()
        window_episode_reward.append(reward_sum / iter)
        window_episode_counts.append(iter)
        dist_val.append(dist_sum / iter)

        generate_video(
            video_option=config.VIDEO_OPTION,
            video_dir=config.VIDEO_DIR,
            images=np.array(rgb_frames),
            episode_id=update,
            checkpoint_idx=0,
            metric_name="spl",
            metric_value=1.0,
        )

        rgb_frames = []

    np.savetxt("window_episode_reward_ppo.csv",
               window_episode_reward,
               delimiter=",")
    np.savetxt("window_episode_counts_ppo.csv",
               window_episode_counts,
               delimiter=",")
    np.savetxt("episode_dist_ppo.csv", episode_dist, delimiter=",")

    env.close()
Beispiel #22
0
    def _eval_checkpoint(
        self,
        checkpoint_path: str,
        writer: TensorboardWriter,
        checkpoint_index: int = 0,
    ) -> None:
        r"""Evaluates a single checkpoint.

        Args:
            checkpoint_path: path of checkpoint
            writer: tensorboard writer object for logging to tensorboard
            checkpoint_index: index of cur checkpoint for logging

        Returns:
            None
        """
        if self._is_distributed:
            raise RuntimeError("Evaluation does not support distributed mode")

        # Map location CPU is almost always better than mapping to a CUDA device.
        ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu")

        if self.config.EVAL.USE_CKPT_CONFIG:
            config = self._setup_eval_config(ckpt_dict["config"])
        else:
            config = self.config.clone()

        ppo_cfg = config.RL.PPO

        config.defrost()
        config.TASK_CONFIG.DATASET.SPLIT = config.EVAL.SPLIT
        config.freeze()

        if len(self.config.VIDEO_OPTION) > 0:
            config.defrost()
            config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP")
            config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS")
            config.freeze()

        if config.VERBOSE:
            logger.info(f"env config: {config}")

        self._init_envs(config)
        self._setup_actor_critic_agent(ppo_cfg)

        self.agent.load_state_dict(ckpt_dict["state_dict"])
        self.actor_critic = self.agent.actor_critic

        observations = self.envs.reset()
        batch = batch_obs(
            observations, device=self.device, cache=self._obs_batching_cache
        )
        batch = apply_obs_transforms_batch(batch, self.obs_transforms)

        current_episode_reward = torch.zeros(
            self.envs.num_envs, 1, device="cpu"
        )

        test_recurrent_hidden_states = torch.zeros(
            self.config.NUM_ENVIRONMENTS,
            self.actor_critic.net.num_recurrent_layers,
            ppo_cfg.hidden_size,
            device=self.device,
        )
        prev_actions = torch.zeros(
            self.config.NUM_ENVIRONMENTS,
            1,
            device=self.device,
            dtype=torch.long,
        )
        not_done_masks = torch.zeros(
            self.config.NUM_ENVIRONMENTS,
            1,
            device=self.device,
            dtype=torch.bool,
        )
        stats_episodes: Dict[
            Any, Any
        ] = {}  # dict of dicts that stores stats per episode

        rgb_frames = [
            [] for _ in range(self.config.NUM_ENVIRONMENTS)
        ]  # type: List[List[np.ndarray]]
        if len(self.config.VIDEO_OPTION) > 0:
            os.makedirs(self.config.VIDEO_DIR, exist_ok=True)

        number_of_eval_episodes = self.config.TEST_EPISODE_COUNT
        if number_of_eval_episodes == -1:
            number_of_eval_episodes = sum(self.envs.number_of_episodes)
        else:
            total_num_eps = sum(self.envs.number_of_episodes)
            if total_num_eps < number_of_eval_episodes:
                logger.warn(
                    f"Config specified {number_of_eval_episodes} eval episodes"
                    ", dataset only has {total_num_eps}."
                )
                logger.warn(f"Evaluating with {total_num_eps} instead.")
                number_of_eval_episodes = total_num_eps

        pbar = tqdm.tqdm(total=number_of_eval_episodes)
        self.actor_critic.eval()
        while (
            len(stats_episodes) < number_of_eval_episodes
            and self.envs.num_envs > 0
        ):
            current_episodes = self.envs.current_episodes()

            with torch.no_grad():
                (
                    _,
                    actions,
                    _,
                    test_recurrent_hidden_states,
                ) = self.actor_critic.act(
                    batch,
                    test_recurrent_hidden_states,
                    prev_actions,
                    not_done_masks,
                    deterministic=False,
                )

                prev_actions.copy_(actions)  # type: ignore

            # NB: Move actions to CPU.  If CUDA tensors are
            # sent in to env.step(), that will create CUDA contexts
            # in the subprocesses.
            # For backwards compatibility, we also call .item() to convert to
            # an int
            step_data = [a.item() for a in actions.to(device="cpu")]

            outputs = self.envs.step(step_data)

            observations, rewards_l, dones, infos = [
                list(x) for x in zip(*outputs)
            ]
            batch = batch_obs(
                observations,
                device=self.device,
                cache=self._obs_batching_cache,
            )
            batch = apply_obs_transforms_batch(batch, self.obs_transforms)

            not_done_masks = torch.tensor(
                [[not done] for done in dones],
                dtype=torch.bool,
                device="cpu",
            )

            rewards = torch.tensor(
                rewards_l, dtype=torch.float, device="cpu"
            ).unsqueeze(1)
            current_episode_reward += rewards
            next_episodes = self.envs.current_episodes()
            envs_to_pause = []
            n_envs = self.envs.num_envs
            for i in range(n_envs):
                if (
                    next_episodes[i].scene_id,
                    next_episodes[i].episode_id,
                ) in stats_episodes:
                    envs_to_pause.append(i)

                # episode ended
                if not not_done_masks[i].item():
                    pbar.update()
                    episode_stats = {}
                    episode_stats["reward"] = current_episode_reward[i].item()
                    episode_stats.update(
                        self._extract_scalars_from_info(infos[i])
                    )
                    current_episode_reward[i] = 0
                    # use scene_id + episode_id as unique id for storing stats
                    stats_episodes[
                        (
                            current_episodes[i].scene_id,
                            current_episodes[i].episode_id,
                        )
                    ] = episode_stats

                    if len(self.config.VIDEO_OPTION) > 0:
                        generate_video(
                            video_option=self.config.VIDEO_OPTION,
                            video_dir=self.config.VIDEO_DIR,
                            images=rgb_frames[i],
                            episode_id=current_episodes[i].episode_id,
                            checkpoint_idx=checkpoint_index,
                            metrics=self._extract_scalars_from_info(infos[i]),
                            tb_writer=writer,
                        )

                        rgb_frames[i] = []

                # episode continues
                elif len(self.config.VIDEO_OPTION) > 0:
                    # TODO move normalization / channel changing out of the policy and undo it here
                    frame = observations_to_image(
                        {k: v[i] for k, v in batch.items()}, infos[i]
                    )
                    rgb_frames[i].append(frame)

            not_done_masks = not_done_masks.to(device=self.device)
            (
                self.envs,
                test_recurrent_hidden_states,
                not_done_masks,
                current_episode_reward,
                prev_actions,
                batch,
                rgb_frames,
            ) = self._pause_envs(
                envs_to_pause,
                self.envs,
                test_recurrent_hidden_states,
                not_done_masks,
                current_episode_reward,
                prev_actions,
                batch,
                rgb_frames,
            )

        num_episodes = len(stats_episodes)
        aggregated_stats = {}
        for stat_key in next(iter(stats_episodes.values())).keys():
            aggregated_stats[stat_key] = (
                sum(v[stat_key] for v in stats_episodes.values())
                / num_episodes
            )

        for k, v in aggregated_stats.items():
            logger.info(f"Average episode {k}: {v:.4f}")

        step_id = checkpoint_index
        if "extra_state" in ckpt_dict and "step" in ckpt_dict["extra_state"]:
            step_id = ckpt_dict["extra_state"]["step"]

        writer.add_scalars(
            "eval_reward",
            {"average reward": aggregated_stats["reward"]},
            step_id,
        )

        metrics = {k: v for k, v in aggregated_stats.items() if k != "reward"}
        if len(metrics) > 0:
            writer.add_scalars("eval_metrics", metrics, step_id)

        self.envs.close()
Beispiel #23
0
    def _eval_checkpoint(
        self,
        checkpoint_path: str,
        writer: TensorboardWriter,
        checkpoint_index: int = 0,
    ) -> None:
        r"""Evaluates a single checkpoint.

        Args:
            checkpoint_path: path of checkpoint
            writer: tensorboard writer object for logging to tensorboard
            checkpoint_index: index of cur checkpoint for logging

        Returns:
            None
        """
        # Map location CPU is almost always better than mapping to a CUDA device.
        ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu")

        if self.config.EVAL.USE_CKPT_CONFIG:
            config = self._setup_eval_config(ckpt_dict["config"])
        else:
            config = self.config.clone()

        ppo_cfg = config.RL.PPO

        # config.defrost()
        # config.TASK_CONFIG.DATASET.SPLIT = config.EVAL.SPLIT
        # config.freeze()

        if len(self.config.VIDEO_OPTION) > 0:
            config.defrost()
            config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP")
            # config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS")
            config.freeze()

        logger.info(f"env config: {config}")
        self.envs = construct_envs(config, training=False)
        self._setup_actor_critic_agent(ppo_cfg)

        self.agent.load_state_dict(ckpt_dict["state_dict"])
        self.actor_critic = self.agent.actor_critic

        # get name of performance metric, e.g. "spl"
        metric_name = self.config.TASK_CONFIG.TASK.MEASUREMENTS[0]
        metric_cfg = getattr(self.config.TASK_CONFIG.TASK, metric_name)
        measure_type = baseline_registry.get_measure(metric_cfg.TYPE)
        assert measure_type is not None, "invalid measurement type {}".format(
            metric_cfg.TYPE
        )
        self.metric_uuid = measure_type(
            sim=None, task=None, config=None
        )._get_uuid()

        observations = self.envs.reset()
        batch = batch_obs(observations, self.device)

        current_episode_reward = torch.zeros(
            self.envs.num_envs, 1, device=self.device
        )

        test_recurrent_hidden_states = torch.zeros(
            self.actor_critic.net.num_recurrent_layers,
            self.config.NUM_PROCESSES,
            ppo_cfg.hidden_size,
            device=self.device,
        )
        prev_actions = torch.zeros(
            self.config.NUM_PROCESSES, 1, device=self.device, dtype=torch.long
        )
        not_done_masks = torch.zeros(
            self.config.NUM_PROCESSES, 1, device=self.device
        )
        stats_episodes = dict()  # dict of dicts that stores stats per episode

        rgb_frames = [
            [] for _ in range(self.config.NUM_PROCESSES)
        ]  # type: List[List[np.ndarray]]
        if len(self.config.VIDEO_OPTION) > 0:
            os.makedirs(self.config.VIDEO_DIR, exist_ok=True)

        self.actor_critic.eval()

        plan_time = 0
        steps = 0
        while (
            len(stats_episodes) < self.config.TEST_EPISODE_COUNT
            and self.envs.num_envs > 0
        ):
            current_episodes = self.envs.current_episodes()

            t0 = time.time()
            with torch.no_grad():
                (
                    _,
                    actions,
                    _,
                    test_recurrent_hidden_states,
                ) = self.actor_critic.act(
                    batch,
                    test_recurrent_hidden_states,
                    prev_actions,
                    not_done_masks,
                    deterministic=False,
                )

                prev_actions.copy_(actions)

            plan_time += time.time() - t0
            steps += 1

            # outputs = self.envs.step([a[0].item() for a in actions])
            outputs = self.envs.step(data=[{'action': a[0].item()} for a in actions])

            observations, rewards, dones, infos = [
                list(x) for x in zip(*outputs)
            ]
            batch = batch_obs(observations, self.device)

            not_done_masks = torch.tensor(
                [[0.0] if done else [1.0] for done in dones],
                dtype=torch.float,
                device=self.device,
            )

            rewards = torch.tensor(
                rewards, dtype=torch.float, device=self.device
            ).unsqueeze(1)
            current_episode_reward += rewards
            next_episodes = self.envs.current_episodes()
            envs_to_pause = []
            n_envs = self.envs.num_envs
            for i in range(n_envs):
                if (
                    next_episodes[i].scene_id,
                    next_episodes[i].episode_id,
                ) in stats_episodes:
                    envs_to_pause.append(i)

                # episode ended
                if not_done_masks[i].item() == 0:
                    episode_stats = infos[i].copy()
                    del episode_stats['scene']
                    del episode_stats['taken_action']
                    episode_stats['reward'] = current_episode_reward[i].item()
                    current_episode_reward[i] = 0
                    # use scene_id + episode_id as unique id for storing stats
                    stats_episodes[
                        (
                            current_episodes[i].scene_id,
                            current_episodes[i].episode_id,
                        )
                    ] = episode_stats

                    if len(self.config.VIDEO_OPTION) > 0:
                        generate_video(
                            video_option=self.config.VIDEO_OPTION,
                            video_dir=self.config.VIDEO_DIR,
                            images=rgb_frames[i],
                            scene_id=current_episodes[i].scene_id.split('/')[-1].split('.')[0],
                            episode_id=current_episodes[i].episode_id,
                            checkpoint_idx=checkpoint_index,
                            metric_name=self.metric_uuid,
                            metric_value=infos[i][self.metric_uuid],
                            tb_writer=writer,
                        )

                        rgb_frames[i] = []

                # episode continues
                elif len(self.config.VIDEO_OPTION) > 0:
                    infos[i].pop('collisions')
                    frame = observations_to_image({'rgb': observations[i]['image']}, infos[i])
                    rgb_frames[i].append(frame)

            (
                self.envs,
                test_recurrent_hidden_states,
                not_done_masks,
                current_episode_reward,
                prev_actions,
                batch,
                rgb_frames,
            ) = self._pause_envs(
                envs_to_pause,
                self.envs,
                test_recurrent_hidden_states,
                not_done_masks,
                current_episode_reward,
                prev_actions,
                batch,
                rgb_frames,
            )

        step_id = checkpoint_index
        if "extra_state" in ckpt_dict and "step" in ckpt_dict["extra_state"]:
            step_id = ckpt_dict["extra_state"]["step"]

        num_episodes = len(stats_episodes)
        mean_stats = dict()
        for stat_key in next(iter(stats_episodes.values())).keys():
            mean_stats[stat_key] = sum([v[stat_key] for v in stats_episodes.values()]) / num_episodes
        mean_stats['plan_time'] = plan_time / steps

        for name, val in mean_stats.items():
            logger.info(f"Average episode {name}: {val:.6f}")
            writer.add_scalars(f'eval_{name}', {f'average {name}': val}, step_id)

        self.envs.close()