def generate_video(
    args, images, episode_id, checkpoint_idx, spl, tb_writer, fps=10
) -> None:
    r"""Generate video according to specified information.

    Args:
        args: contains args.video_option and args.video_dir.
        images: list of images to be converted to video.
        episode_id: episode id for video naming.
        checkpoint_idx: checkpoint index for video naming.
        spl: SPL for this episode for video naming.
        tb_writer: tensorboard writer object for uploading video
        fps: fps for generated video

    Returns:
        None
    """
    if args.video_option and len(images) > 0:
        video_name = f"episode{episode_id}_ckpt{checkpoint_idx}_spl{spl:.2f}"
        if "disk" in args.video_option:
            images_to_video(images, args.video_dir, video_name)
        if "tensorboard" in args.video_option:
            tb_writer.add_video_from_np_images(
                f"episode{episode_id}", checkpoint_idx, images, fps=fps
            )
Beispiel #2
0
def generate_video(
    video_option: List[str],
    video_dir: Optional[str],
    images: List[np.ndarray],
    episode_id: int,
    checkpoint_idx: int,
    metric_name: str,
    metric_value: float,
    fps: int = 10,
) -> None:
    r"""Generate video according to specified information.

    Args:
        video_option: string list of "tensorboard" or "disk" or both.
        video_dir: path to target video directory.
        images: list of images to be converted to video.
        episode_id: episode id for video naming.
        checkpoint_idx: checkpoint index for video naming.
        metric_name: name of the performance metric, e.g. "spl".
        metric_value: value of metric.
        tb_writer: tensorboard writer object for uploading video.
        fps: fps for generated video.
    Returns:
        None
    """
    if len(images) < 1:
        return

    video_name = f"episode{episode_id}_ckpt{checkpoint_idx}_{metric_name}{metric_value:.2f}"
    if "disk" in video_option:
        assert video_dir is not None
        images_to_video(images, video_dir, video_name)
Beispiel #3
0
def shortest_path_example(mode):
    config = habitat.get_config(config_paths="configs/tasks/pointnav.yaml")
    config.TASK.MEASUREMENTS.append("TOP_DOWN_MAP")
    config.TASK.SENSORS.append("HEADING_SENSOR")
    env = SimpleRLEnv(config=config)
    goal_radius = env.episodes[0].goals[0].radius
    if goal_radius is None:
        goal_radius = config.SIMULATOR.FORWARD_STEP_SIZE
    follower = ShortestPathFollower(env.habitat_env.sim, goal_radius, False)
    follower.mode = mode

    print("Environment creation successful")
    for episode in range(3):
        env.reset()
        dirname = os.path.join(
            IMAGE_DIR, "shortest_path_example", mode, "%02d" % episode
        )
        if os.path.exists(dirname):
            shutil.rmtree(dirname)
        os.makedirs(dirname)
        print("Agent stepping around inside environment.")
        images = []
        while not env.habitat_env.episode_over:
            best_action = follower.get_next_action(
                env.habitat_env.current_episode.goals[0].position
            )
            observations, reward, done, info = env.step(best_action.value)
            im = observations["rgb"]
            top_down_map = draw_top_down_map(
                info, observations["heading"], im.shape[0]
            )
            output_im = np.concatenate((im, top_down_map), axis=1)
            images.append(output_im)
        images_to_video(images, dirname, "trajectory")
        print("Episode finished")
Beispiel #4
0
def generate_video(
    config: Config,
    images: List[np.ndarray],
    episode_id: int,
    checkpoint_idx: int,
    spl: float,
    tb_writer: Union[DummyWriter, TensorboardWriter],
    fps: int = 10,
) -> None:
    r"""
    Generate video according to specified information.
    Args:
        config: config object that contains video_option and video_dir.
        images: list of images to be converted to video.
        episode_id: episode id for video naming.
        checkpoint_idx: checkpoint index for video naming.
        spl: SPL for this episode for video naming.
        tb_writer: tensorboard writer object for uploading video
        fps: fps for generated video
    Returns:
        None
    """
    if config.video_option and len(images) > 0:
        video_name = f"episode{episode_id}_ckpt{checkpoint_idx}_spl{spl:.2f}"
        if "disk" in config.video_option:
            images_to_video(images, config.video_dir, video_name)
        if "tensorboard" in config.video_option:
            tb_writer.add_video_from_np_images(
                f"episode{episode_id}", checkpoint_idx, images, fps=fps
            )
Beispiel #5
0
def reference_path_example(mode):
    """
    Saves a video of a shortest path follower agent navigating from a start
    position to a goal. Agent follows the ground truth reference path by
    navigating to intermediate viewpoints en route to goal.
    Args:
        mode: 'geodesic_path' or 'greedy'
    """
    config = habitat.get_config(
        config_paths="configs/test/habitat_r2r_vln_test.yaml")
    config.defrost()
    config.TASK.MEASUREMENTS.append("TOP_DOWN_MAP")
    config.TASK.SENSORS.append("HEADING_SENSOR")
    config.freeze()
    with SimpleRLEnv(config=config) as env:
        follower = ShortestPathFollower(env.habitat_env.sim,
                                        goal_radius=0.5,
                                        return_one_hot=False)
        follower.mode = mode
        print("Environment creation successful")

        for episode in range(3):
            env.reset()

            print(env.habitat_env.current_episode)
            episode_id = env.habitat_env.current_episode.episode_id
            print(
                f"Agent stepping around inside environment. Episode id: {episode_id}"
            )

            dirname = os.path.join(IMAGE_DIR, "vln_reference_path_example",
                                   mode, "%02d" % episode)
            if os.path.exists(dirname):
                shutil.rmtree(dirname)
            os.makedirs(dirname)

            images = []
            steps = 0
            reference_path = env.habitat_env.current_episode.reference_path + [
                env.habitat_env.current_episode.goals[0].position
            ]
            for point in reference_path:
                done = False
                while not done:
                    best_action = follower.get_next_action(point)
                    print(best_action)
                    if best_action == None or best_action == 0:
                        break
                    observations, reward, done, info = env.step(best_action)
                    save_map(observations, info, images)
                    steps += 1

            print(f"Navigated to goal in {steps} steps.")
            images_to_video(images, dirname, str(episode_id))
            images = []
Beispiel #6
0
def generate_video(video_option: List[str],
                   video_dir: Optional[str],
                   images: List[np.ndarray],
                   scene_name: str,
                   sound: str,
                   sr: int,
                   episode_id: int,
                   checkpoint_idx: int,
                   metric_name: str,
                   metric_value: float,
                   tb_writer: TensorboardWriter,
                   fps: int = 10,
                   audios: List[str] = None) -> None:
    r"""Generate video according to specified information.

    Args:
        video_option: string list of "tensorboard" or "disk" or both.
        video_dir: path to target video directory.
        images: list of images to be converted to video.
        episode_id: episode id for video naming.
        checkpoint_idx: checkpoint index for video naming.
        metric_name: name of the performance metric, e.g. "spl".
        metric_value: value of metric.
        tb_writer: tensorboard writer object for uploading video.
        fps: fps for generated video.
        audios: raw audio files
    Returns:
        None
    """
    if len(images) < 1:
        return

    video_name = f"{scene_name}_{episode_id}_{sound}_{metric_name}{metric_value:.2f}"
    if "disk" in video_option:
        assert video_dir is not None
        if audios is None:
            images_to_video(images, video_dir, video_name)
        else:
            images_to_video_with_audio(images,
                                       video_dir,
                                       video_name,
                                       audios,
                                       sr,
                                       fps=fps)
    if "tensorboard" in video_option:
        tb_writer.add_video_from_np_images(f"episode{episode_id}",
                                           checkpoint_idx,
                                           images,
                                           fps=fps)
Beispiel #7
0
def generate_video(
    video_option: List[str],
    video_dir: Optional[str],
    images: List[np.ndarray],
    episode_id: Union[int, str],
    checkpoint_idx: int,
    metrics: Dict[str, float],
    tb_writer: TensorboardWriter,
    fps: int = 10,
) -> None:
    r"""Generate video according to specified information.

    Args:
        video_option: string list of "tensorboard" or "disk" or both.
        video_dir: path to target video directory.
        images: list of images to be converted to video.
        episode_id: episode id for video naming.
        checkpoint_idx: checkpoint index for video naming.
        metric_name: name of the performance metric, e.g. "spl".
        metric_value: value of metric.
        tb_writer: tensorboard writer object for uploading video.
        fps: fps for generated video.
    Returns:
        None
    """
    if len(images) < 1:
        return

    metric_strs = []
    for k, v in metrics.items():
        if isinstance(v, str):
            metric_strs.append(f"{k}={v}")
        else:
            metric_strs.append(f"{k}={v:.2f}")

    video_name = f"episode={episode_id}-ckpt={checkpoint_idx}-" + "-".join(
        metric_strs)
    if "disk" in video_option:
        assert video_dir is not None
        images_to_video(images, video_dir, video_name, fps=fps)
    if "tensorboard" in video_option:
        tb_writer.add_video_from_np_images(f"episode{episode_id}",
                                           checkpoint_idx,
                                           images,
                                           fps=fps)
    return video_name
Beispiel #8
0
def generate_video(
    video_option: List[str],
    video_dir: Optional[str],
    images: List[np.ndarray],
    episode_id: int,
    checkpoint_idx: int,
    tag: str,
    metrics: Dict[str, float],
    tb_writer: TensorboardWriter,
    fps: int = 10,
) -> None:
    r"""Generate video according to specified information.

    Args:
        video_option: string list of "tensorboard" or "disk" or both.
        video_dir: path to target video directory.
        images: list of images to be converted to video.
        episode_id: episode id for video naming.
        checkpoint_idx: checkpoint index for video naming.
        info: metric dictionary
        tag: Additional tag for naming video
        tb_writer: tensorboard writer object for uploading video.
        fps: fps for generated video.
    Returns:
        None
    """
    print(len(images))
    if len(images) < 1:
        return

    metric_strs = []
    for k, v in metrics.items():
        metric_strs.append(f"{k}={v:.2f}")

    video_name = f"{tag}_episode={episode_id}-ckpt={checkpoint_idx}-" + "-".join(
        metric_strs
    )
    if "disk" in video_option:
        assert video_dir is not None
        images_to_video(images, video_dir, video_name)
    if "tensorboard" in video_option:
        tb_writer.add_video_from_np_images(
            f"episode{episode_id}", checkpoint_idx, images, fps=fps
        )
Beispiel #9
0
 def save_video(self, file: Union[str, Path], fps: int = 10) -> None:
     assert self._capture_video, 'Not capturing video; nothing to save.'
     if len(self._rgb_frames) == 0:
         return
     first_shape = self._rgb_frames[0].shape
     next_shape = self._rgb_frames[1].shape
     if first_shape != next_shape:
         assert first_shape[0] == next_shape[0]
         # First frame is missing the top-down map, so we copy it from the next one
         td_map = self._rgb_frames[1][:, first_shape[1]:, :]
         self._rgb_frames[0] = np.concatenate((self._rgb_frames[0], td_map),
                                              1)
     file = Path(file)
     with capture_output('save_video'):
         images_to_video(self._rgb_frames,
                         str(file.parent),
                         file.name,
                         fps=fps,
                         quality=5)
    def reset(self):
        # reset hidden state and set done
        self.test_recurrent_hidden_states = torch.zeros(
            1, self.hidden_size
        ).cuda()
        self.not_done_masks = torch.zeros(1, 1).cuda()

        # reset observation storage (and verify)
        z = torch.zeros(1, 2).cuda()
        mask_out_done = { name: z for name in self.current_obs.sensor_names }
        if 'global_pos' in self.current_obs.sensor_names:
            mask_out_done['global_pos'] = torch.zeros(1,1).cuda()
        self.current_obs.clear_done(mask_out_done)
        for value in self.current_obs.peek().values():
            assert torch.sum(value.peek()).item() < 1e-6, 'did not clear the curent_obs properly'

        # log everything
        if len(self.episode_pgs) != 0:
            # log video (and save to log_dir)
            if self.save_eval_videos:
                images_to_video(images=self.episode_rgbs, output_dir=self.log_dir, video_name=f'test_{self.episode_num}')
                self.mlog.add_meter(f'diagnostics/rollout_{self.episode_num}', tnt.meter.SingletonMeter(), ptype='video')
                if self.use_visdom:
                    vid_path = os.path.join(self.log_dir, f'test_{self.episode_num}.mp4')
                    self.mlog.update_meter(vid_path, meters={f'diagnostics/rollout_{self.episode_num}'}, phase='val')
                else:
                    print('video support for TB is weak not recommended')
                    rgb_tensor = torch.Tensor(self.episode_rgbs).unsqueeze(dim=0)
                    self.mlog.update_meter(rgb_tensor, meters={f'diagnostics/rollout_{self.episode_num}'}, phase='val')

            # reset log
            self.mlog.reset_meter(self.episode_num, mode='val')

            # reset episode logs
            self.episode_rgbs = []
            self.episode_pgs = []
            self.episode_values = []
            self.episode_entropy = []
            self.episode_lengths.append(self.t)
            self.episode_num += 1
            self.t = 0
            self.last_action = None
def video_from_dir(vidDir, images_type="png"):
    """
    Images will be ordered alpha-numeric order for video
    vidDir: str directory path that has images

    Output video is called 'video.mp4' and placed in same directory
    """
    assert os.path.isdir(vidDir), "vidDir must be a valid directory"
    images = sorted(os.listdir(vidDir))
    images.sort(key=natural_sort_key)

    imArray = []
    for i in images:
        if not i.endswith('.' + images_type): continue
        tmp = imread(os.path.join(vidDir, i))
        # print(tmp)
        # print(type(tmp))
        # print(tmp.shape)
        imArray.append(tmp)
    images_to_video(imArray, vidDir, "video", fps=2)
Beispiel #12
0
def generate_video(
    video_option: List[str],
    video_dir: Optional[str],
    images: List[np.ndarray],
    episode_id: int,
    checkpoint_idx: int,
    spl: float,
    tb_writer: TensorboardWriter,
    fps: int = 10,
) -> None:
    r"""Generate video according to specified information.

    Args:
        video_option: string list of "tensorboard" or "disk" or both.
        video_dir: path to target video directory.
        images: list of images to be converted to video.
        episode_id: episode id for video naming.
        checkpoint_idx: checkpoint index for video naming.
        spl: SPL for this episode for video naming.
        tb_writer: tensorboard writer object for uploading video.
        fps: fps for generated video.
    Returns:
        None
    """
    if len(images) < 1:
        return

    video_name = f"episode{episode_id}_ckpt{checkpoint_idx}_spl{spl:.2f}"
    if "disk" in video_option:
        assert video_dir is not None
        images_to_video(images, video_dir, video_name)
    if "tensorboard" in video_option:
        tb_writer.add_video_from_np_images(f"episode{episode_id}",
                                           checkpoint_idx,
                                           images,
                                           fps=fps)
Beispiel #13
0
def reference_path_example(mode):
    """
    Saves a video of a shortest path follower agent navigating from a start
    position to a goal. Agent follows the ground truth reference path by
    navigating to intermediate viewpoints en route to goal.
    Args:
        mode: 'geodesic_path' or 'greedy'
    """
    config = habitat.get_config(
        config_paths="configs/test/habitat_r2r_vln_test.yaml")
    config.defrost()
    config.TASK.MEASUREMENTS.append("TOP_DOWN_MAP")
    config.TASK.SENSORS.append("HEADING_SENSOR")
    config.freeze()
    with SimpleRLEnv(config=config) as env:
        print("Environment creation successful")
        sim_time = 30  # @param {type:"integer"}
        continuous_nav = True  # @param {type:"boolean"}
        if continuous_nav:
            control_frequency = 10  # @param {type:"slider", min:1, max:30, step:1}
            frame_skip = 6  # @param {type:"slider", min:1, max:30, step:1}
        fps = control_frequency * frame_skip
        print("fps = " + str(fps))
        control_sequence = []
        for action in range(int(sim_time * control_frequency)):
            if continuous_nav:
                # allow forward velocity and y rotation to vary
                control_sequence.append({
                    "forward_velocity":
                    random.random() * 2.0,  # [0,2)
                    "rotation_velocity":
                    (random.random() - 0.5) * 2.0,  # [-1,1)
                })
            else:
                control_sequence.append(random.choice(action_names))

                # create and configure a new VelocityControl structure
        vel_control = habitat_sim.physics.VelocityControl()
        vel_control.controlling_lin_vel = True
        vel_control.lin_vel_is_local = True
        vel_control.controlling_ang_vel = True
        vel_control.ang_vel_is_local = True

        for episode in range(6):
            env.reset()
            print(env.habitat_env.current_episode)
            episode_id = env.habitat_env.current_episode.episode_id
            print(
                f"Agent stepping around inside environment. Episode id: {episode_id}"
            )

            dirname = os.path.join(IMAGE_DIR, "vln_reference_path_example",
                                   mode, "%02d" % episode)
            if os.path.exists(dirname):
                shutil.rmtree(dirname)
            os.makedirs(dirname)

            images = []
            steps = 0
            reference_path = env.habitat_env.current_episode.reference_path + [
                env.habitat_env.current_episode.goals[0].position
            ]

            # manually control the object's kinematic state via velocity integration
            time_step = 1.0 / (frame_skip * control_frequency)
            print("time_step = " + str(time_step))
            for action in control_sequence:
                # apply actions
                if continuous_nav:
                    # update the velocity control
                    # local forward is -z
                    vel_control.linear_velocity = np.array(
                        [0, 0, -action["forward_velocity"]])
                    # local up is y
                    vel_control.angular_velocity = np.array(
                        [0, action["rotation_velocity"], 0])

                    observations, reward, done, info = env.step(vel_control)
                    save_map(observations, info, images)
                    steps += 1

            print(f"Navigated to goal in {steps} steps.")
            images_to_video(images,
                            dirname,
                            str(episode_id),
                            fps=int(1.0 / time_step))
            images = []
Beispiel #14
0
def test_noise_models_rgbd():
    DEMO_MODE = False
    N_STEPS = 100

    config = get_config()
    config.defrost()
    config.SIMULATOR.SCENE = (
        "data/scene_datasets/habitat-test-scenes/skokloster-castle.glb")
    config.SIMULATOR.AGENT_0.SENSORS = ["RGB_SENSOR", "DEPTH_SENSOR"]
    config.freeze()
    if not os.path.exists(config.SIMULATOR.SCENE):
        pytest.skip("Please download Habitat test data to data folder.")

    valid_start_position = [-1.3731, 0.08431, 8.60692]

    expected_pointgoal = [0.1, 0.2, 0.3]
    goal_position = np.add(valid_start_position, expected_pointgoal)

    # starting quaternion is rotated 180 degree along z-axis, which
    # corresponds to simulator using z-negative as forward action
    start_rotation = [0, 0, 0, 1]
    test_episode = NavigationEpisode(
        episode_id="0",
        scene_id=config.SIMULATOR.SCENE,
        start_position=valid_start_position,
        start_rotation=start_rotation,
        goals=[NavigationGoal(position=goal_position)],
    )

    print(f"{test_episode}")
    with habitat.Env(config=config, dataset=None) as env:

        env.episode_iterator = iter([test_episode])
        no_noise_obs = [env.reset()]
        no_noise_states = [env.sim.get_agent_state()]

        actions = [
            sample_non_stop_action(env.action_space) for _ in range(N_STEPS)
        ]
        for action in actions:
            no_noise_obs.append(env.step(action))
            no_noise_states.append(env.sim.get_agent_state())
        env.close()

        config.defrost()

        config.SIMULATOR.RGB_SENSOR.NOISE_MODEL = "GaussianNoiseModel"
        config.SIMULATOR.RGB_SENSOR.NOISE_MODEL_KWARGS = habitat.Config()
        config.SIMULATOR.RGB_SENSOR.NOISE_MODEL_KWARGS.INTENSITY_CONSTANT = 0.5
        config.SIMULATOR.DEPTH_SENSOR.NOISE_MODEL = "RedwoodDepthNoiseModel"

        config.SIMULATOR.ACTION_SPACE_CONFIG = "pyrobotnoisy"
        config.SIMULATOR.NOISE_MODEL = habitat.Config()
        config.SIMULATOR.NOISE_MODEL.ROBOT = "LoCoBot"
        config.SIMULATOR.NOISE_MODEL.CONTROLLER = "Proportional"
        config.SIMULATOR.NOISE_MODEL.NOISE_MULTIPLIER = 0.5

        config.freeze()

        env = habitat.Env(config=config, dataset=None)

        env.episode_iterator = iter([test_episode])

        obs = env.reset()
        assert np.linalg.norm(
            obs["rgb"].astype(np.float) -
            no_noise_obs[0]["rgb"].astype(np.float)) > 1.5e-2 * np.linalg.norm(
                no_noise_obs[0]["rgb"].astype(
                    np.float)), "No RGB noise detected."

        assert np.linalg.norm(obs["depth"].astype(np.float) -
                              no_noise_obs[0]["depth"].astype(np.float)
                              ) > 1.5e-2 * np.linalg.norm(
                                  no_noise_obs[0]["depth"].astype(
                                      np.float)), "No Depth noise detected."

        images = []
        state = env.sim.get_agent_state()
        angle_diffs = []
        pos_diffs = []
        for action in actions:
            prev_state = state
            obs = env.step(action)
            state = env.sim.get_agent_state()
            position_change = np.linalg.norm(np.array(state.position) -
                                             np.array(prev_state.position),
                                             ord=2)

            if action["action"][:5] == "TURN_":
                angle_diff = abs(
                    angle_between_quaternions(state.rotation,
                                              prev_state.rotation) -
                    np.deg2rad(config.SIMULATOR.TURN_ANGLE))
                angle_diffs.append(angle_diff)
            else:
                pos_diffs.append(
                    abs(position_change - config.SIMULATOR.FORWARD_STEP_SIZE))

            if DEMO_MODE:
                images.append(observations_to_image(obs, {}))

        if DEMO_MODE:
            images_to_video(images, "data/video/test_noise", "test_noise")

        assert (np.mean(angle_diffs) >
                0.025), "No turn action actuation noise detected."
        assert (np.mean(pos_diffs) >
                0.025), "No forward action actuation noise detected."
    for maps_chunk in torch.split(maps, batch_size):
        maps_chunk = map_transform(maps_chunk)
        maps_lst.append(maps_chunk.to('cpu', non_blocking=True))
    maps = torch.cat(maps_lst).numpy()

    for (s, _), mapp in zip(traj, maps):
        s['map'] = mapp

    for task, taskonomy_data in zip(tasks, all_taskonomys):
        for (s, _), taskonomy_frame in zip(traj, taskonomy_data):
            s[f'taskonomy_{task}'] = taskonomy_frame
            if task == 'curvature':  # for backwards support
                s['taskonomy'] = taskonomy_frame


    # write trajectory
    cur_scene_name = env._env.current_episode.scene_id.split('/')[-1].split('.')[0]
    episode_id = env._env.current_episode.episode_id
    single_traj_dir = os.path.join(TRAJ_DIR, cur_scene_name, f'episode_{episode_id}')
    os.makedirs(single_traj_dir, exist_ok=True)

    for i, (obs, act) in enumerate(traj):
        Image.fromarray(obs['rgb_filled']).save(os.path.join(single_traj_dir, f'rgb_filled_{i:03d}.png'))
        del obs['rgb_filled']
        np.savez_compressed(os.path.join(single_traj_dir, f'action_{i:03d}.npz'), act)
        for k, v in obs.items():
            np.savez_compressed(os.path.join(single_traj_dir, f'{k}_{i:03d}.npz'), v)

    if SAVE_VIDEO:
        images_to_video(images, IMAGE_DIR, str(episode))
def reference_path_example(mode):
    """
	Saves a video of a shortest path follower agent navigating from a start
	position to a goal. Agent follows the ground truth reference path by
	navigating to intermediate viewpoints en route to goal.
	Args:
		mode: 'geodesic_path' or 'greedy'
	"""
    config = habitat.get_config(
        config_paths="configs/test/habitat_r2r_vln_test_continuous.yaml")
    config.defrost()
    config.TASK.MEASUREMENTS.append("TOP_DOWN_MAP")
    config.TASK.SENSORS.append("HEADING_SENSOR")
    config.freeze()
    with SimpleRLEnv(config=config) as env:
        print("Environment creation successful")
        sim_time = 30  # @param {type:"integer"}
        continuous_nav = True  # @param {type:"boolean"}
        if continuous_nav:
            control_frequency = 10  # @param {type:"slider", min:1, max:30, step:1}
            frame_skip = 6  # @param {type:"slider", min:1, max:30, step:1}
        fps = control_frequency * frame_skip
        print("fps = " + str(fps))
        control_sequence = []
        # create and configure a new VelocityControl structure
        vel_control = habitat_sim.physics.VelocityControl()
        vel_control.controlling_lin_vel = True
        vel_control.lin_vel_is_local = True
        vel_control.controlling_ang_vel = True
        vel_control.ang_vel_is_local = True

        for episode in range(6):
            env.reset()
            print(env.habitat_env.current_episode)
            episode_id = env.habitat_env.current_episode.episode_id
            print(
                f"Agent stepping around inside environment. Episode id: {episode_id}"
            )

            dirname = os.path.join(IMAGE_DIR, "vln_reference_path_example",
                                   mode, "%02d" % episode)
            if os.path.exists(dirname):
                shutil.rmtree(dirname)
            os.makedirs(dirname)

            images = []
            steps = 0

            reference_path = env.habitat_env.current_episode.reference_path

            waypoints = np.array(reference_path)

            for i in range(waypoints.shape[1] - 1):
                if np.abs(np.linalg.norm(waypoints[i + 1, :] -
                                         waypoints[i, :])) < 5:
                    waypoints = np.delete(waypoints, (i + 1), axis=0)

            waypoints = waypoints[0::10]
            x_wp = waypoints[:, 0]
            y_wp = -waypoints[:, 2]
            z_wp = waypoints[:, 1]

            ax = x_wp
            ay = y_wp

            cx, cy, cyaw, ck, s_profile, s = cubic_spline_planner.calc_spline_course(
                ax, ay, ds=0.01)

            goal = [cx[-1], cy[-1], cyaw[-1]]

            agent_forward = quat_to_magnum(
                env.habitat_env._sim.get_agent_state(
                ).rotation).transform_vector(mn.Vector3(0, 0, -1.0))

            angle = quaternion.as_euler_angles(
                env.habitat_env._sim.get_agent_state().rotation)
            init_x = env.habitat_env._sim.get_agent_state().position[0]
            init_y = -env.habitat_env._sim.get_agent_state().position[2]
            # init_yaw = angle[1]

            init_yaw = math.atan2(agent_forward[0], agent_forward[2])

            state = State(x=init_x, y=init_y, yaw=init_yaw)

            time_step = 1.0 / (30)
            T = 500
            t, x, y, yaw, vel, omeg, images = closed_loop_prediction(
                env, state, cx, cy, cyaw, ck, s_profile, goal, images,
                vel_control, time_step)
            images_to_video(images,
                            dirname,
                            str(episode_id),
                            fps=int(1.0 / time_step))
            images = []
Beispiel #17
0
    def train_model(self):
        episode_rewards = deque(maxlen=10)
        current_episode_rewards = np.zeros(self.shell_args.num_processes)
        episode_lengths = deque(maxlen=10)
        current_episode_lengths = np.zeros(self.shell_args.num_processes)
        current_rewards = np.zeros(self.shell_args.num_processes)

        total_num_steps = self.start_iter
        fps_timer = [time.time(), total_num_steps]
        timers = np.zeros(3)
        egomotion_loss = 0

        video_frames = []
        num_episodes = 0
        # self.evaluate_model()

        obs = self.envs.reset()
        if self.compute_surface_normals:
            obs["surface_normals"] = pt_util.depth_to_surface_normals(
                obs["depth"].to(self.device))
        obs["prev_action_one_hot"] = obs[
            "prev_action_one_hot"][:, ACTION_SPACE].to(torch.float32)
        if self.shell_args.algo == "supervised":
            obs["best_next_action"] = pt_util.from_numpy(
                obs["best_next_action"][:, ACTION_SPACE])
        self.rollouts.copy_obs(obs, 0)
        distances = pt_util.to_numpy_array(obs["goal_geodesic_distance"])
        self.train_stats["start_geodesic_distance"][:] = distances
        previous_visual_features = None
        egomotion_pred = None
        prev_action = None
        prev_action_probs = None
        num_updates = (int(self.shell_args.num_env_steps) //
                       self.shell_args.num_forward_rollout_steps
                       ) // self.shell_args.num_processes

        try:
            for iter_count in range(num_updates):
                if self.shell_args.tensorboard:
                    if iter_count % 500 == 0:
                        print("Logging conv summaries")
                        self.logger.network_conv_summary(
                            self.agent, total_num_steps)
                    elif iter_count % 100 == 0:
                        print("Logging variable summaries")
                        self.logger.network_variable_summary(
                            self.agent, total_num_steps)

                if self.shell_args.use_linear_lr_decay:
                    # decrease learning rate linearly
                    update_linear_schedule(self.optimizer.optimizer,
                                           iter_count, num_updates,
                                           self.shell_args.lr)

                if self.shell_args.algo == "ppo" and self.shell_args.use_linear_clip_decay:
                    self.optimizer.clip_param = self.shell_args.clip_param * (
                        1 - iter_count / float(num_updates))

                if hasattr(self.agent.base, "enable_decoder"):
                    if self.shell_args.record_video:
                        self.agent.base.enable_decoder()
                    else:
                        self.agent.base.disable_decoder()

                for step in range(self.shell_args.num_forward_rollout_steps):
                    with torch.no_grad():
                        start_t = time.time()
                        value, action, action_log_prob, recurrent_hidden_states = self.agent.act(
                            {
                                "images":
                                self.rollouts.obs[step],
                                "target_vector":
                                self.rollouts.additional_observations_dict[
                                    "pointgoal"][step],
                                "prev_action_one_hot":
                                self.rollouts.additional_observations_dict[
                                    "prev_action_one_hot"][step],
                            },
                            self.rollouts.recurrent_hidden_states[step],
                            self.rollouts.masks[step],
                        )
                        action_cpu = pt_util.to_numpy_array(action.squeeze(1))
                        translated_action_space = ACTION_SPACE[action_cpu]
                        if not self.shell_args.end_to_end:
                            self.rollouts.additional_observations_dict[
                                "visual_encoder_features"][
                                    self.rollouts.step].copy_(
                                        self.agent.base.visual_encoder_features
                                    )

                        if self.shell_args.use_motion_loss:
                            if self.shell_args.record_video:
                                if previous_visual_features is not None:
                                    egomotion_pred = self.agent.base.predict_egomotion(
                                        self.agent.base.visual_features,
                                        previous_visual_features)
                            previous_visual_features = self.agent.base.visual_features.detach(
                            )

                        timers[1] += time.time() - start_t

                        if self.shell_args.record_video:
                            # Copy so we don't mess with obs itself
                            draw_obs = OrderedDict()
                            for key, val in obs.items():
                                draw_obs[key] = pt_util.to_numpy_array(
                                    val).copy()
                            best_next_action = draw_obs.pop(
                                "best_next_action", None)

                            if prev_action is not None:
                                draw_obs[
                                    "action_taken"] = pt_util.to_numpy_array(
                                        self.agent.last_dist.probs).copy()
                                draw_obs["action_taken"][:] = 0
                                draw_obs["action_taken"][
                                    np.arange(self.shell_args.num_processes),
                                    prev_action] = 1
                                draw_obs[
                                    "action_taken_name"] = SIM_ACTION_TO_NAME[
                                        ACTION_SPACE_TO_SIM_ACTION[
                                            ACTION_SPACE[
                                                prev_action.squeeze()]]]
                                draw_obs[
                                    "action_prob"] = pt_util.to_numpy_array(
                                        prev_action_probs).copy()
                            else:
                                draw_obs["action_taken"] = None
                                draw_obs[
                                    "action_taken_name"] = SIM_ACTION_TO_NAME[
                                        SimulatorActions.STOP]
                                draw_obs["action_prob"] = None
                            prev_action = action_cpu
                            prev_action_probs = self.agent.last_dist.probs.detach(
                            )
                            if (hasattr(self.agent.base, "decoder_outputs")
                                    and self.agent.base.decoder_outputs
                                    is not None):
                                min_channel = 0
                                for key, num_channels in self.agent.base.decoder_output_info:
                                    outputs = self.agent.base.decoder_outputs[:,
                                                                              min_channel:
                                                                              min_channel
                                                                              +
                                                                              num_channels,
                                                                              ...]
                                    draw_obs["output_" +
                                             key] = pt_util.to_numpy_array(
                                                 outputs).copy()
                                    min_channel += num_channels
                            draw_obs["rewards"] = current_rewards.copy()
                            draw_obs["step"] = current_episode_lengths.copy()
                            draw_obs["method"] = self.shell_args.method_name
                            if best_next_action is not None:
                                draw_obs["best_next_action"] = best_next_action
                            if self.shell_args.use_motion_loss:
                                if egomotion_pred is not None:
                                    draw_obs[
                                        "egomotion_pred"] = pt_util.to_numpy_array(
                                            F.softmax(egomotion_pred,
                                                      dim=1)).copy()
                                else:
                                    draw_obs["egomotion_pred"] = None
                            images, titles, normalize = draw_outputs.obs_to_images(
                                draw_obs)
                            if self.shell_args.algo == "supervised":
                                im_inds = [0, 2, 3, 1, 9, 6, 7, 8, 5, 4]
                            else:
                                im_inds = [0, 2, 3, 1, 6, 7, 8, 5]
                            height, width = images[0].shape[:2]
                            subplot_image = drawing.subplot(
                                images,
                                2,
                                5,
                                titles=titles,
                                normalize=normalize,
                                order=im_inds,
                                output_width=max(width, 320),
                                output_height=max(height, 320),
                            )
                            video_frames.append(subplot_image)

                        # save dists from previous step or else on reset they will be overwritten
                        distances = pt_util.to_numpy_array(
                            obs["goal_geodesic_distance"])

                        start_t = time.time()
                        obs, rewards, dones, infos = self.envs.step(
                            translated_action_space)
                        timers[0] += time.time() - start_t
                        obs["reward"] = rewards
                        if self.shell_args.algo == "supervised":
                            obs["best_next_action"] = pt_util.from_numpy(
                                obs["best_next_action"][:, ACTION_SPACE]).to(
                                    torch.float32)
                        obs["prev_action_one_hot"] = obs[
                            "prev_action_one_hot"][:, ACTION_SPACE].to(
                                torch.float32)
                        rewards *= REWARD_SCALAR
                        rewards = np.clip(rewards, -10, 10)

                        if self.shell_args.record_video and not dones[0]:
                            obs["top_down_map"] = infos[0]["top_down_map"]

                        if self.compute_surface_normals:
                            obs["surface_normals"] = pt_util.depth_to_surface_normals(
                                obs["depth"].to(self.device))

                        current_rewards = pt_util.to_numpy_array(rewards)
                        current_episode_rewards += pt_util.to_numpy_array(
                            rewards).squeeze()
                        current_episode_lengths += 1
                        for ii, done_e in enumerate(dones):
                            if done_e:
                                num_episodes += 1
                                if self.shell_args.record_video:
                                    final_rgb = draw_obs["rgb"].transpose(
                                        0, 2, 3, 1).squeeze(0)
                                    if self.shell_args.task == "pointnav":
                                        if infos[ii]["spl"] > 0:
                                            draw_obs[
                                                "action_taken_name"] = "Stop. Success"
                                            draw_obs["reward"] = [
                                                self.configs[0].TASK.
                                                SUCCESS_REWARD
                                            ]
                                            final_rgb[:] = final_rgb * np.float32(
                                                0.5) + np.tile(
                                                    np.array([0, 128, 0],
                                                             dtype=np.uint8),
                                                    (final_rgb.shape[0],
                                                     final_rgb.shape[1], 1),
                                                )
                                        else:
                                            draw_obs[
                                                "action_taken_name"] = "Timeout. Failed"
                                            final_rgb[:] = final_rgb * np.float32(
                                                0.5) + np.tile(
                                                    np.array([128, 0, 0],
                                                             dtype=np.uint8),
                                                    (final_rgb.shape[0],
                                                     final_rgb.shape[1], 1),
                                                )
                                    elif self.shell_args.task == "exploration" or self.shell_args.task == "flee":
                                        draw_obs[
                                            "action_taken_name"] = "End of episode."
                                    final_rgb = final_rgb[np.newaxis,
                                                          ...].transpose(
                                                              0, 3, 1, 2)
                                    draw_obs["rgb"] = final_rgb

                                    images, titles, normalize = draw_outputs.obs_to_images(
                                        draw_obs)
                                    im_inds = [0, 2, 3, 1, 6, 7, 8, 5]
                                    height, width = images[0].shape[:2]
                                    subplot_image = drawing.subplot(
                                        images,
                                        2,
                                        5,
                                        titles=titles,
                                        normalize=normalize,
                                        order=im_inds,
                                        output_width=max(width, 320),
                                        output_height=max(height, 320),
                                    )
                                    video_frames.extend(
                                        [subplot_image] *
                                        (self.configs[0].ENVIRONMENT.
                                         MAX_EPISODE_STEPS + 30 -
                                         len(video_frames)))

                                    if "top_down_map" in infos[0]:
                                        video_dir = os.path.join(
                                            self.shell_args.log_prefix,
                                            "videos")
                                        if not os.path.exists(video_dir):
                                            os.makedirs(video_dir)
                                        im_path = os.path.join(
                                            self.shell_args.log_prefix,
                                            "videos", "total_steps_%d.png" %
                                            total_num_steps)
                                        from habitat.utils.visualizations import maps
                                        import imageio

                                        top_down_map = maps.colorize_topdown_map(
                                            infos[0]["top_down_map"]["map"])
                                        imageio.imsave(im_path, top_down_map)

                                    images_to_video(
                                        video_frames,
                                        os.path.join(
                                            self.shell_args.log_prefix,
                                            "videos"),
                                        "total_steps_%d" % total_num_steps,
                                    )
                                    video_frames = []

                                if self.shell_args.task == "pointnav":
                                    print(
                                        "FINISHED EPISODE %d Length %d Reward %.3f SPL %.4f"
                                        % (
                                            num_episodes,
                                            current_episode_lengths[ii],
                                            current_episode_rewards[ii],
                                            infos[ii]["spl"],
                                        ))
                                    self.train_stats["spl"][ii] = infos[ii][
                                        "spl"]
                                    self.train_stats["success"][
                                        ii] = self.train_stats["spl"][ii] > 0
                                    self.train_stats["end_geodesic_distance"][
                                        ii] = (distances[ii] - self.configs[0].
                                               SIMULATOR.FORWARD_STEP_SIZE)
                                    self.train_stats[
                                        "delta_geodesic_distance"][ii] = (
                                            self.train_stats[
                                                "start_geodesic_distance"][ii]
                                            - self.train_stats[
                                                "end_geodesic_distance"][ii])
                                    self.train_stats["num_steps"][
                                        ii] = current_episode_lengths[ii]
                                elif self.shell_args.task == "exploration":
                                    print(
                                        "FINISHED EPISODE %d Reward %.3f States Visited %d"
                                        % (num_episodes,
                                           current_episode_rewards[ii],
                                           infos[ii]["visited_states"]))
                                    self.train_stats["visited_states"][
                                        ii] = infos[ii]["visited_states"]
                                elif self.shell_args.task == "flee":
                                    print(
                                        "FINISHED EPISODE %d Reward %.3f Distance from start %.4f"
                                        % (num_episodes,
                                           current_episode_rewards[ii],
                                           infos[ii]["distance_from_start"]))
                                    self.train_stats["distance_from_start"][
                                        ii] = infos[ii]["distance_from_start"]

                                self.train_stats["num_episodes"][ii] += 1
                                self.train_stats["reward"][
                                    ii] = current_episode_rewards[ii]

                                if self.shell_args.tensorboard:
                                    log_dict = {
                                        "single_episode/reward":
                                        self.train_stats["reward"][ii]
                                    }
                                    if self.shell_args.task == "pointnav":
                                        log_dict.update({
                                            "single_episode/num_steps":
                                            self.train_stats["num_steps"][ii],
                                            "single_episode/spl":
                                            self.train_stats["spl"][ii],
                                            "single_episode/success":
                                            self.train_stats["success"][ii],
                                            "single_episode/start_geodesic_distance":
                                            self.train_stats[
                                                "start_geodesic_distance"][ii],
                                            "single_episode/end_geodesic_distance":
                                            self.train_stats[
                                                "end_geodesic_distance"][ii],
                                            "single_episode/delta_geodesic_distance":
                                            self.train_stats[
                                                "delta_geodesic_distance"][ii],
                                        })
                                    elif self.shell_args.task == "exploration":
                                        log_dict[
                                            "single_episode/visited_states"] = self.train_stats[
                                                "visited_states"][ii]
                                    elif self.shell_args.task == "flee":
                                        log_dict[
                                            "single_episode/distance_from_start"] = self.train_stats[
                                                "distance_from_start"][ii]
                                    self.logger.dict_log(
                                        log_dict,
                                        step=(total_num_steps +
                                              self.shell_args.num_processes *
                                              step + ii))

                                episode_rewards.append(
                                    current_episode_rewards[ii])
                                current_episode_rewards[ii] = 0
                                episode_lengths.append(
                                    current_episode_lengths[ii])
                                current_episode_lengths[ii] = 0
                                self.train_stats["start_geodesic_distance"][
                                    ii] = obs["goal_geodesic_distance"][ii]

                        # If done then clean the history of observations.
                        masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                                   for done_ in dones])
                        bad_masks = torch.FloatTensor(
                            [[0.0]
                             if "bad_transition" in info.keys() else [1.0]
                             for info in infos])

                        self.rollouts.insert(obs, recurrent_hidden_states,
                                             action, action_log_prob, value,
                                             rewards, masks, bad_masks)

                with torch.no_grad():
                    start_t = time.time()
                    next_value = self.agent.get_value(
                        {
                            "images":
                            self.rollouts.obs[-1],
                            "target_vector":
                            self.rollouts.
                            additional_observations_dict["pointgoal"][-1],
                            "prev_action_one_hot":
                            self.rollouts.additional_observations_dict[
                                "prev_action_one_hot"][-1],
                        },
                        self.rollouts.recurrent_hidden_states[-1],
                        self.rollouts.masks[-1],
                    ).detach()
                    timers[1] += time.time() - start_t

                self.rollouts.compute_returns(next_value,
                                              self.shell_args.use_gae,
                                              self.shell_args.gamma,
                                              self.shell_args.tau)

                if not self.shell_args.no_weight_update:
                    start_t = time.time()
                    if self.shell_args.algo == "supervised":
                        (
                            total_loss,
                            action_loss,
                            visual_loss_total,
                            visual_loss_dict,
                            egomotion_loss,
                            forward_model_loss,
                        ) = self.optimizer.update(self.rollouts,
                                                  self.shell_args)
                    else:
                        (
                            total_loss,
                            value_loss,
                            action_loss,
                            dist_entropy,
                            visual_loss_total,
                            visual_loss_dict,
                            egomotion_loss,
                            forward_model_loss,
                        ) = self.optimizer.update(self.rollouts,
                                                  self.shell_args)

                    timers[2] += time.time() - start_t

                self.rollouts.after_update()

                # save for every interval-th episode or for the last epoch
                if iter_count % self.shell_args.save_interval == 0 or iter_count == num_updates - 1:
                    self.save_checkpoint(5, total_num_steps)

                total_num_steps += self.shell_args.num_processes * self.shell_args.num_forward_rollout_steps

                if not self.shell_args.no_weight_update and iter_count % self.shell_args.log_interval == 0:
                    log_dict = {}
                    if len(episode_rewards) > 1:
                        end = time.time()
                        nsteps = total_num_steps - fps_timer[1]
                        fps = int((total_num_steps - fps_timer[1]) /
                                  (end - fps_timer[0]))
                        timers /= nsteps
                        env_spf = timers[0]
                        forward_spf = timers[1]
                        backward_spf = timers[2]
                        print((
                            "{} Updates {}, num timesteps {}, FPS {}, Env FPS "
                            "{}, \n Last {} training episodes: mean/median reward "
                            "{:.3f}/{:.3f}, min/max reward {:.3f}/{:.3f}\n"
                        ).format(
                            datetime.datetime.now(),
                            iter_count,
                            total_num_steps,
                            fps,
                            int(1.0 / env_spf),
                            len(episode_rewards),
                            np.mean(episode_rewards),
                            np.median(episode_rewards),
                            np.min(episode_rewards),
                            np.max(episode_rewards),
                        ))

                        if self.shell_args.tensorboard:
                            log_dict.update({
                                "stats/full_spf":
                                1.0 / (fps + 1e-10),
                                "stats/env_spf":
                                env_spf,
                                "stats/forward_spf":
                                forward_spf,
                                "stats/backward_spf":
                                backward_spf,
                                "stats/full_fps":
                                fps,
                                "stats/env_fps":
                                1.0 / (env_spf + 1e-10),
                                "stats/forward_fps":
                                1.0 / (forward_spf + 1e-10),
                                "stats/backward_fps":
                                1.0 / (backward_spf + 1e-10),
                                "episode/mean_rewards":
                                np.mean(episode_rewards),
                                "episode/median_rewards":
                                np.median(episode_rewards),
                                "episode/min_rewards":
                                np.min(episode_rewards),
                                "episode/max_rewards":
                                np.max(episode_rewards),
                                "episode/mean_lengths":
                                np.mean(episode_lengths),
                                "episode/median_lengths":
                                np.median(episode_lengths),
                                "episode/min_lengths":
                                np.min(episode_lengths),
                                "episode/max_lengths":
                                np.max(episode_lengths),
                            })
                        fps_timer[0] = time.time()
                        fps_timer[1] = total_num_steps
                        timers[:] = 0
                    if self.shell_args.tensorboard:
                        log_dict.update({
                            "loss/action":
                            action_loss,
                            "loss/0_total":
                            total_loss,
                            "loss/visual/0_total":
                            visual_loss_total,
                            "loss/exploration/egomotion":
                            egomotion_loss,
                            "loss/exploration/forward_model":
                            forward_model_loss,
                        })
                        if self.shell_args.algo != "supervised":
                            log_dict.update({
                                "loss/entropy": dist_entropy,
                                "loss/value": value_loss
                            })
                        for key, val in visual_loss_dict.items():
                            log_dict["loss/visual/" + key] = val
                        self.logger.dict_log(log_dict, step=total_num_steps)

                if self.shell_args.eval_interval is not None and total_num_steps % self.shell_args.eval_interval < (
                        self.shell_args.num_processes *
                        self.shell_args.num_forward_rollout_steps):
                    self.save_checkpoint(-1, total_num_steps)
                    self.set_log_iter(total_num_steps)
                    self.evaluate_model()
                    # reset the env datasets
                    self.envs.unwrapped.call(
                        ["switch_dataset"] * self.shell_args.num_processes,
                        [("train", )] * self.shell_args.num_processes)
                    obs = self.envs.reset()
                    if self.compute_surface_normals:
                        obs["surface_normals"] = pt_util.depth_to_surface_normals(
                            obs["depth"].to(self.device))
                    obs["prev_action_one_hot"] = obs[
                        "prev_action_one_hot"][:,
                                               ACTION_SPACE].to(torch.float32)
                    if self.shell_args.algo == "supervised":
                        obs["best_next_action"] = pt_util.from_numpy(
                            obs["best_next_action"][:, ACTION_SPACE])
                    self.rollouts.copy_obs(obs, 0)
                    distances = pt_util.to_numpy_array(
                        obs["goal_geodesic_distance"])
                    self.train_stats["start_geodesic_distance"][:] = distances
                    previous_visual_features = None
                    egomotion_pred = None
                    prev_action = None
                    prev_action_probs = None
        except:
            # Catch all exceptions so a final save can be performed
            import traceback

            traceback.print_exc()
        finally:
            self.save_checkpoint(-1, total_num_steps)
Beispiel #18
0
def evaluate_agent(config: Config):
    split = config.EVAL.SPLIT
    config.defrost()
    config.TASK_CONFIG.DATASET.SPLIT = split
    config.TASK_CONFIG.TASK.NDTW.SPLIT = split
    config.TASK_CONFIG.TASK.SDTW.SPLIT = split
    config.TASK_CONFIG.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = True
    config.TASK_CONFIG.ENVIRONMENT.ITERATOR_OPTIONS.MAX_SCENE_REPEAT_STEPS = -1
    config.freeze()
    logger.info(config)

    env = construct_env(config)

    gt_path = config.TASK_CONFIG.TASK.NDTW.GT_PATH.format(
        split=config.TASK_CONFIG.DATASET.SPLIT)
    with gzip.open(gt_path, "rt") as f:
        gt_json = json.load(f)

    assert config.EVAL.NONLEARNING.AGENT in [
        "RandomAgent",
        "HandcraftedAgent",
    ], "EVAL.NONLEARNING.AGENT must be either RandomAgent or HandcraftedAgent."

    if config.EVAL.NONLEARNING.AGENT == "RandomAgent":
        agent = RandomContinuousAgent()
    else:
        agent = HandcraftedAgent()
    obs = env.reset()
    agent.reset()
    steps = 0
    is_done = False
    stats_episodes = {}  # dict of dicts that stores stats per episode
    ep_count = 0
    locations = []

    vel_control = habitat_sim.physics.VelocityControl()
    vel_control.controlling_lin_vel = True
    vel_control.lin_vel_is_local = True
    vel_control.controlling_ang_vel = True
    vel_control.ang_vel_is_local = True
    images = []
    IMAGE_DIR = os.path.join("examples", "images")
    if not os.path.exists(IMAGE_DIR):
        os.makedirs(IMAGE_DIR)

    while (len(stats_episodes) < config.EVAL.EPISODE_COUNT):
        current_episode = env.habitat_env.current_episode
        actions = agent.act()
        vel_control.linear_velocity = np.array([0, 0, -actions[0]])
        vel_control.angular_velocity = np.array([0, actions[1], 0])
        observations, _, done, info = env.step(vel_control)
        episode_over, success = done
        episode_success = success and (actions[0] < 0.25)
        is_done = episode_over or episode_success
        steps += 1
        locations.append(
            env.habitat_env._sim.get_agent_state().position.tolist())
        save_map(observations, info, images)

        dirname = os.path.join(
            IMAGE_DIR, "icra_video",
            "%02d" % env.habitat_env.current_episode.episode_id)
        if os.path.exists(dirname):
            shutil.rmtree(dirname)
        os.makedirs(dirname)

        if is_done or steps == config.TASK_CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS:
            gt_locations = gt_json[str(
                current_episode.episode_id)]["locations"]
            dtw_distance = fastdtw(locations,
                                   gt_locations,
                                   dist=euclidean_distance)[0]
            nDTW = np.exp(-dtw_distance /
                          (len(gt_locations) *
                           config.TASK_CONFIG.TASK.NDTW.SUCCESS_DISTANCE))

            locations = []
            is_done = False
            ep_count += 1
            steps = 0
            print("dones:", done)
            stats_episodes[current_episode.episode_id] = info
            stats_episodes[current_episode.episode_id]['ndtw'] = nDTW
            print("len stats episodes", len(stats_episodes))
            print("Current episode ID:", current_episode.episode_id)
            print("Episode Completed:", ep_count)
            print(" Episode done---------------------------------------------")
            obs = env.reset()
            print(stats_episodes[current_episode.episode_id])
            time_step = 1.0 / 30
            images_to_video(images,
                            dirname,
                            str(current_episode.episode_id),
                            fps=int(1.0 / time_step))
            images = []

    env.close()

    aggregated_stats = {}
    num_episodes = len(stats_episodes)
    for stat_key in next(iter(stats_episodes.values())).keys():
        aggregated_stats[stat_key] = (
            sum([v[stat_key] for v in stats_episodes.values()]) / num_episodes)
    with open(f"stats_complete_{config.EVAL.NONLEARNING.AGENT}_{split}.json",
              "w") as f:
        json.dump(aggregated_stats, f, indent=4)
Beispiel #19
0
    def evaluate_model(self):
        self.envs.unwrapped.call(["switch_dataset"] *
                                 self.shell_args.num_processes,
                                 [("val", )] * self.shell_args.num_processes)

        if not os.path.exists(self.eval_dir):
            os.makedirs(self.eval_dir)
        try:
            eval_net_file_name = sorted(
                glob.glob(
                    os.path.join(self.shell_args.log_prefix,
                                 self.shell_args.checkpoint_dirname, "*") +
                    "/*.pt"),
                key=os.path.getmtime,
            )[-1]
            eval_net_file_name = (
                self.shell_args.log_prefix.replace(os.sep, "_") + "_" +
                "_".join(eval_net_file_name.split(os.sep)[-2:])[:-3])
        except IndexError:
            print("Warning, no weights found")
            eval_net_file_name = "random_weights"
        eval_output_file = open(
            os.path.join(self.eval_dir, eval_net_file_name + ".csv"), "w")
        print("Writing results to", eval_output_file.name)

        # Save the evaled net for posterity
        if self.shell_args.save_checkpoints:
            save_model = self.agent
            pt_util.save(
                save_model,
                os.path.join(self.shell_args.log_prefix,
                             self.shell_args.checkpoint_dirname,
                             "eval_weights"),
                num_to_keep=-1,
                iteration=self.log_iter,
            )
            print("Wrote model to file for safe keeping")

        obs = self.envs.reset()
        if self.compute_surface_normals:
            obs["surface_normals"] = pt_util.depth_to_surface_normals(
                obs["depth"].to(self.device))
        obs["prev_action_one_hot"] = obs[
            "prev_action_one_hot"][:, ACTION_SPACE].to(torch.float32)
        recurrent_hidden_states = torch.zeros(
            self.shell_args.num_processes,
            self.agent.recurrent_hidden_state_size,
            dtype=torch.float32,
            device=self.device,
        )
        masks = torch.ones(self.shell_args.num_processes,
                           1,
                           dtype=torch.float32,
                           device=self.device)

        episode_rewards = deque(maxlen=10)
        current_episode_rewards = np.zeros(self.shell_args.num_processes)
        episode_lengths = deque(maxlen=10)
        current_episode_lengths = np.zeros(self.shell_args.num_processes)

        total_num_steps = self.log_iter
        fps_timer = [time.time(), total_num_steps]
        timers = np.zeros(3)

        num_episodes = 0

        print("Config\n", self.configs[0])

        # Initialize every time eval is run rather than just at the start
        dataset_sizes = np.array(
            [len(dataset.episodes) for dataset in self.eval_datasets])

        eval_stats = dict(
            episode_ids=[None for _ in range(self.shell_args.num_processes)],
            num_episodes=np.zeros(self.shell_args.num_processes,
                                  dtype=np.int32),
            num_steps=np.zeros(self.shell_args.num_processes, dtype=np.int32),
            reward=np.zeros(self.shell_args.num_processes, dtype=np.float32),
            spl=np.zeros(self.shell_args.num_processes, dtype=np.float32),
            visited_states=np.zeros(self.shell_args.num_processes,
                                    dtype=np.int32),
            success=np.zeros(self.shell_args.num_processes, dtype=np.int32),
            end_geodesic_distance=np.zeros(self.shell_args.num_processes,
                                           dtype=np.float32),
            start_geodesic_distance=np.zeros(self.shell_args.num_processes,
                                             dtype=np.float32),
            delta_geodesic_distance=np.zeros(self.shell_args.num_processes,
                                             dtype=np.float32),
            distance_from_start=np.zeros(self.shell_args.num_processes,
                                         dtype=np.float32),
        )
        eval_stats_means = dict(
            num_episodes=0,
            num_steps=0,
            reward=0,
            spl=0,
            visited_states=0,
            success=0,
            end_geodesic_distance=0,
            start_geodesic_distance=0,
            delta_geodesic_distance=0,
            distance_from_start=0,
        )
        eval_output_file.write("name,%s,iter,%d\n\n" %
                               (eval_net_file_name, self.log_iter))
        if self.shell_args.task == "pointnav":
            eval_output_file.write((
                "episode_id,num_steps,reward,spl,success,start_geodesic_distance,"
                "end_geodesic_distance,delta_geodesic_distance\n"))
        elif self.shell_args.task == "exploration":
            eval_output_file.write("episode_id,reward,visited_states\n")
        elif self.shell_args.task == "flee":
            eval_output_file.write("episode_id,reward,distance_from_start\n")
        distances = pt_util.to_numpy(obs["goal_geodesic_distance"])
        eval_stats["start_geodesic_distance"][:] = distances
        progress_bar = tqdm.tqdm(total=self.num_eval_episodes_total)
        all_done = False
        iter_count = 0
        video_frames = []
        previous_visual_features = None
        egomotion_pred = None
        prev_action = None
        prev_action_probs = None
        if hasattr(self.agent.base, "enable_decoder"):
            if self.shell_args.record_video:
                self.agent.base.enable_decoder()
            else:
                self.agent.base.disable_decoder()
        while not all_done:
            with torch.no_grad():
                start_t = time.time()
                value, action, action_log_prob, recurrent_hidden_states = self.agent.act(
                    {
                        "images":
                        obs["rgb"].to(self.device),
                        "target_vector":
                        obs["pointgoal"].to(self.device),
                        "prev_action_one_hot":
                        obs["prev_action_one_hot"].to(self.device),
                    },
                    recurrent_hidden_states,
                    masks,
                )
                action_cpu = pt_util.to_numpy(action.squeeze(1))
                translated_action_space = ACTION_SPACE[action_cpu]

                timers[1] += time.time() - start_t

                if self.shell_args.record_video:
                    if self.shell_args.use_motion_loss:
                        if previous_visual_features is not None:
                            egomotion_pred = self.agent.base.predict_egomotion(
                                self.agent.base.visual_features,
                                previous_visual_features)
                        previous_visual_features = self.agent.base.visual_features.detach(
                        )

                    # Copy so we don't mess with obs itself
                    draw_obs = OrderedDict()
                    for key, val in obs.items():
                        draw_obs[key] = pt_util.to_numpy(val).copy()
                    best_next_action = draw_obs.pop("best_next_action", None)

                    if prev_action is not None:
                        draw_obs["action_taken"] = pt_util.to_numpy(
                            self.agent.last_dist.probs).copy()
                        draw_obs["action_taken"][:] = 0
                        draw_obs["action_taken"][
                            np.arange(self.shell_args.num_processes),
                            prev_action] = 1
                        draw_obs["action_taken_name"] = SIM_ACTION_TO_NAME[
                            draw_obs['prev_action'].item()]
                        draw_obs["action_prob"] = pt_util.to_numpy(
                            prev_action_probs).copy()
                    else:
                        draw_obs["action_taken"] = None
                        draw_obs["action_taken_name"] = SIM_ACTION_TO_NAME[
                            SimulatorActions.STOP]
                        draw_obs["action_prob"] = None
                    prev_action = action_cpu
                    prev_action_probs = self.agent.last_dist.probs.detach()
                    if hasattr(
                            self.agent.base, "decoder_outputs"
                    ) and self.agent.base.decoder_outputs is not None:
                        min_channel = 0
                        for key, num_channels in self.agent.base.decoder_output_info:
                            outputs = self.agent.base.decoder_outputs[:,
                                                                      min_channel:
                                                                      min_channel
                                                                      +
                                                                      num_channels,
                                                                      ...]
                            draw_obs["output_" +
                                     key] = pt_util.to_numpy(outputs).copy()
                            min_channel += num_channels
                    draw_obs["rewards"] = eval_stats["reward"]
                    draw_obs["step"] = current_episode_lengths.copy()
                    draw_obs["method"] = self.shell_args.method_name
                    if best_next_action is not None:
                        draw_obs["best_next_action"] = best_next_action
                    if self.shell_args.use_motion_loss:
                        if egomotion_pred is not None:
                            draw_obs["egomotion_pred"] = pt_util.to_numpy(
                                F.softmax(egomotion_pred, dim=1)).copy()
                        else:
                            draw_obs["egomotion_pred"] = None
                    images, titles, normalize = draw_outputs.obs_to_images(
                        draw_obs)
                    im_inds = [0, 2, 3, 1, 6, 7, 8, 5]
                    height, width = images[0].shape[:2]
                    subplot_image = drawing.subplot(
                        images,
                        2,
                        4,
                        titles=titles,
                        normalize=normalize,
                        output_width=max(width, 320),
                        output_height=max(height, 320),
                        order=im_inds,
                        fancy_text=True,
                    )
                    video_frames.append(subplot_image)

                # save dists from previous step or else on reset they will be overwritten
                distances = pt_util.to_numpy(obs["goal_geodesic_distance"])

                start_t = time.time()
                obs, rewards, dones, infos = self.envs.step(
                    translated_action_space)
                timers[0] += time.time() - start_t
                obs["prev_action_one_hot"] = obs[
                    "prev_action_one_hot"][:, ACTION_SPACE].to(torch.float32)
                rewards *= REWARD_SCALAR
                rewards = np.clip(rewards, -10, 10)

                if self.shell_args.record_video and not dones[0]:
                    obs["top_down_map"] = infos[0]["top_down_map"]

                if self.compute_surface_normals:
                    obs["surface_normals"] = pt_util.depth_to_surface_normals(
                        obs["depth"].to(self.device))

                current_episode_rewards += pt_util.to_numpy(rewards).squeeze()
                current_episode_lengths += 1
                to_pause = []
                for ii, done_e in enumerate(dones):
                    if done_e:
                        num_episodes += 1

                        if self.shell_args.record_video:
                            if "top_down_map" in infos[ii]:
                                video_dir = os.path.join(
                                    self.shell_args.log_prefix, "videos")
                                if not os.path.exists(video_dir):
                                    os.makedirs(video_dir)
                                im_path = os.path.join(
                                    self.shell_args.log_prefix, "videos",
                                    "total_steps_%d.png" % total_num_steps)
                                top_down_map = maps.colorize_topdown_map(
                                    infos[ii]["top_down_map"]["map"])
                                imageio.imsave(im_path, top_down_map)

                            images_to_video(
                                video_frames,
                                os.path.join(self.shell_args.log_prefix,
                                             "videos"),
                                "total_steps_%d" % total_num_steps,
                            )
                            video_frames = []

                        eval_stats["episode_ids"][ii] = infos[ii]["episode_id"]

                        if self.shell_args.task == "pointnav":
                            print(
                                "FINISHED EPISODE %d Length %d Reward %.3f SPL %.4f"
                                % (
                                    num_episodes,
                                    current_episode_lengths[ii],
                                    current_episode_rewards[ii],
                                    infos[ii]["spl"],
                                ))
                            eval_stats["spl"][ii] = infos[ii]["spl"]
                            eval_stats["success"][
                                ii] = eval_stats["spl"][ii] > 0
                            eval_stats["num_steps"][
                                ii] = current_episode_lengths[ii]
                            eval_stats["end_geodesic_distance"][ii] = (
                                infos[ii]["final_distance"] if
                                eval_stats["success"][ii] else distances[ii])
                            eval_stats["delta_geodesic_distance"][ii] = (
                                eval_stats["start_geodesic_distance"][ii] -
                                eval_stats["end_geodesic_distance"][ii])
                        elif self.shell_args.task == "exploration":
                            print(
                                "FINISHED EPISODE %d Reward %.3f States Visited %d"
                                % (num_episodes, current_episode_rewards[ii],
                                   infos[ii]["visited_states"]))
                            eval_stats["visited_states"][ii] = infos[ii][
                                "visited_states"]
                        elif self.shell_args.task == "flee":
                            print(
                                "FINISHED EPISODE %d Reward %.3f Distance from start %.4f"
                                % (num_episodes, current_episode_rewards[ii],
                                   infos[ii]["distance_from_start"]))
                            eval_stats["distance_from_start"][ii] = infos[ii][
                                "distance_from_start"]

                        eval_stats["num_episodes"][ii] += 1
                        eval_stats["reward"][ii] = current_episode_rewards[ii]

                        if eval_stats["num_episodes"][ii] <= dataset_sizes[ii]:
                            progress_bar.update(1)
                            eval_stats_means["num_episodes"] += 1
                            eval_stats_means["reward"] += eval_stats["reward"][
                                ii]
                            if self.shell_args.task == "pointnav":
                                eval_output_file.write(
                                    "%s,%d,%f,%f,%d,%f,%f,%f\n" % (
                                        eval_stats["episode_ids"][ii],
                                        eval_stats["num_steps"][ii],
                                        eval_stats["reward"][ii],
                                        eval_stats["spl"][ii],
                                        eval_stats["success"][ii],
                                        eval_stats["start_geodesic_distance"]
                                        [ii],
                                        eval_stats["end_geodesic_distance"]
                                        [ii],
                                        eval_stats["delta_geodesic_distance"]
                                        [ii],
                                    ))
                                eval_stats_means["num_steps"] += eval_stats[
                                    "num_steps"][ii]
                                eval_stats_means["spl"] += eval_stats["spl"][
                                    ii]
                                eval_stats_means["success"] += eval_stats[
                                    "success"][ii]
                                eval_stats_means[
                                    "start_geodesic_distance"] += eval_stats[
                                        "start_geodesic_distance"][ii]
                                eval_stats_means[
                                    "end_geodesic_distance"] += eval_stats[
                                        "end_geodesic_distance"][ii]
                                eval_stats_means[
                                    "delta_geodesic_distance"] += eval_stats[
                                        "delta_geodesic_distance"][ii]
                            elif self.shell_args.task == "exploration":
                                eval_output_file.write("%s,%f,%d\n" % (
                                    eval_stats["episode_ids"][ii],
                                    eval_stats["reward"][ii],
                                    eval_stats["visited_states"][ii],
                                ))
                                eval_stats_means[
                                    "visited_states"] += eval_stats[
                                        "visited_states"][ii]
                            elif self.shell_args.task == "flee":
                                eval_output_file.write("%s,%f,%f\n" % (
                                    eval_stats["episode_ids"][ii],
                                    eval_stats["reward"][ii],
                                    eval_stats["distance_from_start"][ii],
                                ))
                                eval_stats_means[
                                    "distance_from_start"] += eval_stats[
                                        "distance_from_start"][ii]
                            eval_output_file.flush()
                            if eval_stats["num_episodes"][ii] == dataset_sizes[
                                    ii]:
                                to_pause.append(ii)

                        episode_rewards.append(current_episode_rewards[ii])
                        current_episode_rewards[ii] = 0
                        episode_lengths.append(current_episode_lengths[ii])
                        current_episode_lengths[ii] = 0
                        eval_stats["start_geodesic_distance"][ii] = obs[
                            "goal_geodesic_distance"][ii]

                # If done then clean the history of observations.
                masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                           for done_ in dones]).to(self.device)

                # Reverse in order to maintain order in case of multiple.
                to_pause.reverse()
                for ii in to_pause:
                    # Pause the environments that are done from the vectorenv.
                    print("Pausing env", ii)
                    self.envs.unwrapped.pause_at(ii)
                    current_episode_rewards = np.concatenate(
                        (current_episode_rewards[:ii],
                         current_episode_rewards[ii + 1:]))
                    current_episode_lengths = np.concatenate(
                        (current_episode_lengths[:ii],
                         current_episode_lengths[ii + 1:]))
                    for key in eval_stats:
                        eval_stats[key] = np.concatenate(
                            (eval_stats[key][:ii], eval_stats[key][ii + 1:]))
                    dataset_sizes = np.concatenate(
                        (dataset_sizes[:ii], dataset_sizes[ii + 1:]))

                    for key in obs:
                        if type(obs[key]) == torch.Tensor:
                            obs[key] = torch.cat(
                                (obs[key][:ii], obs[key][ii + 1:]), dim=0)
                        else:
                            obs[key] = np.concatenate(
                                (obs[key][:ii], obs[key][ii + 1:]), axis=0)

                    recurrent_hidden_states = torch.cat(
                        (recurrent_hidden_states[:ii],
                         recurrent_hidden_states[ii + 1:]),
                        dim=0)
                    masks = torch.cat((masks[:ii], masks[ii + 1:]), dim=0)

                if len(dataset_sizes) == 0:
                    progress_bar.close()
                    all_done = True

            total_num_steps += self.shell_args.num_processes

            if iter_count % (self.shell_args.log_interval * 100) == 0:
                log_dict = {}
                if len(episode_rewards) > 1:
                    end = time.time()
                    nsteps = total_num_steps - fps_timer[1]
                    fps = int((total_num_steps - fps_timer[1]) /
                              (end - fps_timer[0]))
                    timers /= nsteps
                    env_spf = timers[0]
                    forward_spf = timers[1]
                    print((
                        "{} Updates {}, num timesteps {}, FPS {}, Env FPS {}, "
                        "\n Last {} training episodes: mean/median reward {:.3f}/{:.3f}, "
                        "min/max reward {:.3f}/{:.3f}\n").format(
                            datetime.datetime.now(),
                            iter_count,
                            total_num_steps,
                            fps,
                            int(1.0 / env_spf),
                            len(episode_rewards),
                            np.mean(episode_rewards),
                            np.median(episode_rewards),
                            np.min(episode_rewards),
                            np.max(episode_rewards),
                        ))

                    if self.shell_args.tensorboard:
                        log_dict.update({
                            "stats/full_spf":
                            1.0 / (fps + 1e-10),
                            "stats/env_spf":
                            env_spf,
                            "stats/forward_spf":
                            forward_spf,
                            "stats/full_fps":
                            fps,
                            "stats/env_fps":
                            1.0 / (env_spf + 1e-10),
                            "stats/forward_fps":
                            1.0 / (forward_spf + 1e-10),
                            "episode/mean_rewards":
                            np.mean(episode_rewards),
                            "episode/median_rewards":
                            np.median(episode_rewards),
                            "episode/min_rewards":
                            np.min(episode_rewards),
                            "episode/max_rewards":
                            np.max(episode_rewards),
                            "episode/mean_lengths":
                            np.mean(episode_lengths),
                            "episode/median_lengths":
                            np.median(episode_lengths),
                            "episode/min_lengths":
                            np.min(episode_lengths),
                            "episode/max_lengths":
                            np.max(episode_lengths),
                        })
                        self.eval_logger.dict_log(log_dict, step=self.log_iter)
                    fps_timer[0] = time.time()
                    fps_timer[1] = total_num_steps
                    timers[:] = 0
            iter_count += 1
        print("Finished testing")
        print("Wrote results to", eval_output_file.name)

        eval_stats_means = {
            key: val / eval_stats_means["num_episodes"]
            for key, val in eval_stats_means.items()
        }
        if self.shell_args.tensorboard:
            log_dict = {"single_episode/reward": eval_stats_means["reward"]}
            if self.shell_args.task == "pointnav":
                log_dict.update({
                    "single_episode/num_steps":
                    eval_stats_means["num_steps"],
                    "single_episode/spl":
                    eval_stats_means["spl"],
                    "single_episode/success":
                    eval_stats_means["success"],
                    "single_episode/start_geodesic_distance":
                    eval_stats_means["start_geodesic_distance"],
                    "single_episode/end_geodesic_distance":
                    eval_stats_means["end_geodesic_distance"],
                    "single_episode/delta_geodesic_distance":
                    eval_stats_means["delta_geodesic_distance"],
                })
            elif self.shell_args.task == "exploration":
                log_dict["single_episode/visited_states"] = eval_stats_means[
                    "visited_states"]
            elif self.shell_args.task == "flee":
                log_dict[
                    "single_episode/distance_from_start"] = eval_stats_means[
                        "distance_from_start"]
            self.eval_logger.dict_log(log_dict, step=self.log_iter)
        self.envs.unwrapped.resume_all()