コード例 #1
0
def get_eval_dataset(shell_args, data_subset="val"):
    if shell_args.dataset == "mp3d":
        data_path = "data/datasets/pointnav/mp3d/v1/{split}/{split}.json.gz"
    elif shell_args.dataset == "gibson":
        data_path = "data/datasets/pointnav/gibson/v1/{split}/{split}.json.gz"
    else:
        raise NotImplementedError("No rule for this dataset.")

    config = get_dataset_config(data_path, data_subset,
                                shell_args.max_episode_length, 0, [], [])
    dataset = make_dataset(config.DATASET.TYPE, config=config.DATASET)

    assert len(dataset.episodes) > 0, "empty datasets"
    return dataset
コード例 #2
0
    def setup(self, create_decoder):
        self.setup_device()
        render_gpus = [
            int(gpu_id.strip())
            for gpu_id in self.shell_args.render_gpu_ids.split(",")
        ]
        self.configs = []
        self.env_types = []
        for proc in range(self.shell_args.num_processes):
            extra_task_sensors = set()

            extra_agent_sensors = set()
            if self.shell_args.record_video or self.shell_args.update_encoder_features:
                extra_agent_sensors.add("DEPTH_SENSOR")

            if "SEMANTIC_SENSOR" in extra_agent_sensors:
                extra_task_sensors.append("CLASS_SEGMENTATION_SENSOR")

            if self.shell_args.dataset == "suncg":
                data_path = "data/datasets/pointnav/suncg/v1/{split}/{split}.json.gz"
            elif self.shell_args.dataset == "mp3d":
                data_path = "data/datasets/pointnav/mp3d/v1/{split}/{split}.json.gz"
            elif self.shell_args.dataset == "gibson":
                data_path = "data/datasets/pointnav/gibson/v1/{split}/{split}.json.gz"
            else:
                raise NotImplementedError("No rule for this dataset.")

            config = get_dataset_config(
                data_path,
                self.shell_args.data_subset,
                self.shell_args.max_episode_length,
                render_gpus[proc % len(render_gpus)],
                list(extra_task_sensors),
                list(extra_agent_sensors),
            )
            config.TASK.NUM_EPISODES_BEFORE_JUMP = self.shell_args.num_processes

            if self.shell_args.blind and not self.shell_args.record_video:
                config.SIMULATOR.RGB_SENSOR.HEIGHT = 2
                config.SIMULATOR.RGB_SENSOR.WIDTH = 2
            if self.shell_args.task == "pointnav":
                config.TASK.SUCCESS_REWARD = 2
                config.TASK.SUCCESS_DISTANCE = 0.2
                config.TASK.COLLISION_REWARD = 0
                config.TASK.ENABLE_STOP_ACTION = False
                if self.shell_args.task == "pointnav":
                    self.env_types.append(PointnavRLEnv)
            elif self.shell_args.task == "exploration":
                config.TASK.GRID_SIZE = 1
                assert config.TASK.GRID_SIZE >= config.SIMULATOR.FORWARD_STEP_SIZE
                config.TASK.NEW_GRID_CELL_REWARD = 0.1
                config.TASK.COLLISION_REWARD = 0  # -0.1
                config.TASK.RETURN_VISITED_GRID = self.shell_args.record_video
                config.ENVIRONMENT.MAX_EPISODE_STEPS = 250
                config.TASK.TOP_DOWN_MAP.DRAW_SOURCE_AND_TARGET = False
                self.env_types.append(ExplorationRLEnv)
                if self.shell_args.dataset == "suncg":
                    config.TASK.NUM_EPISODES_BEFORE_JUMP = 5
                else:
                    config.TASK.NUM_EPISODES_BEFORE_JUMP = 5
            elif self.shell_args.task == "flee":
                config.TASK.COLLISION_REWARD = 0  # -0.1
                config.ENVIRONMENT.MAX_EPISODE_STEPS = 250
                config.TASK.TOP_DOWN_MAP.DRAW_SOURCE_AND_TARGET = False
                self.env_types.append(RunAwayRLEnv)
                if self.shell_args.dataset == "suncg":
                    config.TASK.NUM_EPISODES_BEFORE_JUMP = 5
                else:
                    config.TASK.NUM_EPISODES_BEFORE_JUMP = 5
            else:
                raise NotImplementedError("Unknown task type")

            if self.shell_args.record_video:
                config.TASK.NUM_EPISODES_BEFORE_JUMP = -1
                config.TASK.STEP_SIZE = config.SIMULATOR.FORWARD_STEP_SIZE
                config.TASK.TOP_DOWN_MAP.MAX_EPISODE_STEPS = config.ENVIRONMENT.MAX_EPISODE_STEPS
                config.TASK.TOP_DOWN_MAP.MAP_RESOLUTION = 1250

            config.TASK.OBSERVE_BEST_NEXT_ACTION = self.shell_args.algo == "supervised"

            self.configs.append(config)
        if self.shell_args.debug:
            print("Config\n", self.configs[0])

        self.shell_args.cuda = not self.shell_args.no_cuda and torch.cuda.is_available(
        )

        if self.shell_args.blind:
            decoder_output_info = []
        else:
            decoder_output_info = [("reconstruction", 3), ("depth", 1),
                                   ("surface_normals", 3)]

        if self.shell_args.encoder_network_type == "ShallowVisualEncoder":
            encoder_type = networks.ShallowVisualEncoder
        elif self.shell_args.encoder_network_type == "ResNetEncoder":
            encoder_type = networks.ResNetEncoder
        else:
            raise NotImplementedError("Unknown network type.")

        self.gym_action_space = gym.spaces.discrete.Discrete(len(ACTION_SPACE))
        target_vector_size = None
        if self.shell_args.task == "pointnav":
            target_vector_size = 2
        elif self.shell_args.task == "exploration" or self.shell_args.task == "flee":
            target_vector_size = 0
        self.agent = VisualPolicy(
            self.gym_action_space,
            base=networks.RLBaseWithVisualEncoder,
            base_kwargs=dict(
                encoder_type=encoder_type,
                decoder_output_info=decoder_output_info,
                recurrent=True,
                end_to_end=self.shell_args.end_to_end,
                hidden_size=256,
                target_vector_size=target_vector_size,
                action_size=len(ACTION_SPACE),
                gpu_ids=self.torch_devices,
                create_decoder=create_decoder,
                blind=self.shell_args.blind,
            ),
        )

        if self.shell_args.debug:
            print("actor critic", self.agent)
        self.agent.to(self.device)
        self.time_str = misc_util.get_time_str()

        visual_layers = self.agent.base.visual_encoder.module
        if self.shell_args.freeze_encoder_features:
            # Not necessary, but probably lets pytorch be more space efficient.
            for param in visual_layers.encoder.parameters():
                param.requires_grad = False

        if self.shell_args.freeze_visual_decoder_features:
            if hasattr(visual_layers, "bridge"):
                for param in visual_layers.bridge.parameters():
                    param.requires_grad = False
            if hasattr(visual_layers, "decoder"):
                for param in visual_layers.decoder.parameters():
                    param.requires_grad = False
            if hasattr(visual_layers, "out"):
                for param in visual_layers.out.parameters():
                    param.requires_grad = False
            if hasattr(visual_layers, "class_pred_layer"):
                if visual_layers.class_pred_layer is not None:
                    for param in visual_layers.class_pred_layer.parameters():
                        param.requires_grad = False

        if self.shell_args.freeze_motion_decoder_features and self.shell_args.freeze_policy_decoder_features:
            for param in self.agent.base.visual_projection.parameters():
                param.requires_grad = False

        if self.shell_args.freeze_motion_decoder_features:
            for param in self.agent.base.egomotion_layer.parameters():
                param.requires_grad = False
            for param in self.agent.base.motion_model_layer.parameters():
                param.requires_grad = False

        if self.shell_args.freeze_policy_decoder_features:
            for param in self.agent.base.gru.parameters():
                param.requires_grad = False
            for param in self.agent.base.rl_layers.parameters():
                param.requires_grad = False
            for param in self.agent.base.critic_linear.parameters():
                param.requires_grad = False
            for param in self.agent.dist.parameters():
                param.requires_grad = False

        if self.shell_args.algo == "ppo":
            self.optimizer = optimizers.VisualPPO(
                self.agent,
                self.shell_args.clip_param,
                self.shell_args.ppo_epoch,
                self.shell_args.num_mini_batch,
                self.shell_args.value_loss_coef,
                self.shell_args.entropy_coef,
                lr=self.shell_args.lr,
                eps=self.shell_args.eps,
                max_grad_norm=self.shell_args.max_grad_norm,
            )
        elif self.shell_args.algo == "supervised":
            self.optimizer = optimizers.BehavioralCloningOptimizer(
                self.agent,
                self.shell_args.clip_param,
                self.shell_args.ppo_epoch,
                self.shell_args.num_mini_batch,
                self.shell_args.value_loss_coef,
                self.shell_args.entropy_coef,
                lr=self.shell_args.lr,
                eps=self.shell_args.eps,
            )
        else:
            raise NotImplementedError("No such algorithm")

        height = self.configs[0].SIMULATOR.RGB_SENSOR.HEIGHT
        width = self.configs[0].SIMULATOR.RGB_SENSOR.WIDTH
        self.observation_space = {
            "pointgoal": ((2, ), np.dtype(np.float32)),
            "prev_action_one_hot":
            ((len(ACTION_SPACE), ), np.dtype(np.float32)),
        }
        self.compute_surface_normals = self.shell_args.record_video or self.shell_args.update_encoder_features
        if self.shell_args.algo == "supervised":
            self.observation_space["best_next_action"] = ((
                len(ACTION_SPACE), ), np.dtype(np.float32))
        if self.shell_args.update_encoder_features:
            self.observation_space["depth"] = ((1, height, width),
                                               np.dtype(np.float32))
            if self.compute_surface_normals:
                self.observation_space["surface_normals"] = ((3, height,
                                                              width),
                                                             np.dtype(
                                                                 np.float32))
        if not self.shell_args.end_to_end:
            self.observation_space["visual_encoder_features"] = (
                (self.agent.base.num_output_channels, 256 // 2**5,
                 256 // 2**5),
                np.dtype(np.float32),
            )

        # Send dummy batch through to allocate memory before vecenv
        print("Feeding dummy batch")
        dummy_start = time.time()

        self.agent.act(
            {
                "images":
                torch.rand((
                    self.shell_args.num_processes,
                    3,
                    self.configs[0].SIMULATOR.RGB_SENSOR.HEIGHT,
                    self.configs[0].SIMULATOR.RGB_SENSOR.WIDTH,
                )).to(self.device),
                "target_vector":
                torch.rand(self.shell_args.num_processes,
                           target_vector_size).to(self.device),
                "prev_action_one_hot":
                torch.rand(self.shell_args.num_processes,
                           self.gym_action_space.n).to(self.device),
            },
            torch.rand(self.shell_args.num_processes,
                       self.agent.recurrent_hidden_state_size).to(self.device),
            torch.rand(self.shell_args.num_processes, 1).to(self.device),
        )
        print("Done feeding dummy batch %.3f" % (time.time() - dummy_start))
        self.start_iter = 0
        self.checkpoint_dir = os.path.join(self.shell_args.log_prefix,
                                           self.shell_args.checkpoint_dirname,
                                           self.time_str)
コード例 #3
0
ファイル: flee_test.py プロジェクト: CAVED123/SPLITNET
        top_down_map,
        map_agent_pos,
        heading - np.pi / 2,
        agent_radius_px=top_down_map.shape[0] / 40,
    )
    return top_down_map


if dataset == "mp3d":
    data_path = "data/datasets/pointnav/mp3d/v1/{split}/{split}.json.gz"
elif dataset == "gibson":
    data_path = "data/datasets/pointnav/gibson/v1/{split}/{split}.json.gz"
else:
    raise NotImplementedError("No rule for this dataset.")

config = get_dataset_config(data_path, data_subset, max_episode_length, 0, [],
                            [])
config.defrost()
for sensor in config.SIMULATOR.AGENT_0.SENSORS:
    config.SIMULATOR[sensor].HEIGHT = RESOLUTION
    config.SIMULATOR[sensor].WIDTH = RESOLUTION
config.TASK.COLLISION_REWARD = 0  # -0.1
config.ENVIRONMENT.MAX_EPISODE_STEPS = 250
config.TASK.TOP_DOWN_MAP.DRAW_SOURCE_AND_TARGET = False
config.TASK.NUM_EPISODES_BEFORE_JUMP = -1
config.TASK.GRID_SIZE = 1
config.TASK.NEW_GRID_CELL_REWARD = 1
config.TASK.RETURN_VISITED_GRID = False

config.freeze()

dataset = make_dataset(config.DATASET.TYPE, config=config.DATASET)