def _setup_actor_critic_agent(self, config: Config, load_from_ckpt: bool, ckpt_path: str) -> None: r"""Sets up actor critic and agent. Args: config: MODEL config Returns: None """ config.defrost() config.TORCH_GPU_ID = self.config.TORCH_GPU_ID config.freeze() if config.CMA.use: self.actor_critic = CMAPolicy( observation_space=self.envs.observation_spaces[0], action_space=self.envs.action_spaces[0], model_config=config, ) else: self.actor_critic = Seq2SeqPolicy( observation_space=self.envs.observation_spaces[0], action_space=self.envs.action_spaces[0], model_config=config, ) self.actor_critic.to(self.device) self.optimizer = torch.optim.Adam(self.actor_critic.parameters(), lr=self.config.DAGGER.LR) if load_from_ckpt: ckpt_dict = self.load_checkpoint(ckpt_path, map_location="cpu") self.actor_critic.load_state_dict(ckpt_dict["state_dict"]) logger.info(f"Loaded weights from checkpoint: {ckpt_path}") logger.info("Finished setting up actor critic model.")
def construct_envs( config: Config, training: bool ) -> VectorEnv: r"""Create VectorEnv object with specified config and env class type. To allow better performance, dataset are split into small ones for each individual env, grouped by scenes. Args: config: configs that contain num_processes as well as information necessary to create individual environments. env_class: class type of the envs to be created. Returns: VectorEnv object created according to specification. """ num_processes = config.NUM_PROCESSES dataset = make_dataset(config.TASK_CONFIG.DATASET.TYPE) scenes = dataset.get_scenes_to_load(config.TASK_CONFIG.DATASET) if len(scenes) > 0: random.shuffle(scenes) assert len(scenes) >= num_processes, ( "reduce the number of processes as there " "aren't enough number of scenes" ) scene_splits = [[] for _ in range(num_processes)] for idx, scene in enumerate(scenes): scene_splits[idx % len(scene_splits)].append(scene) assert sum(map(len, scene_splits)) == len(scenes) task = 'habitat_train_task' if training else 'habitat_eval_task' max_duration = gin.query_parameter(f'{task}.max_length') wrappers = [w.scoped_configurable_fn() for w in gin.query_parameter(f'{task}.wrappers')] kwargs = get_config(training=training, max_steps=max_duration*3) kwargs['max_duration'] = max_duration kwargs['action_repeat'] = 1 kwargs['wrappers'] = [(wrapper, kwarg_fn(kwargs)) for wrapper, kwarg_fn in wrappers] env_kwargs = [] for scenes in scene_splits: kw = kwargs.copy() config = kw['config'].clone() if len(scenes) > 0: config.defrost() config.DATASET.CONTENT_SCENES = scenes config.freeze() kw['config'] = config env_kwargs.append(kw) envs = habitat.VectorEnv( make_env_fn=make_env_fn, env_fn_args=tuple( # tuple(zip(configs, env_classes, range(num_processes))) tuple(zip(env_kwargs, range(num_processes))) ), ) return envs
def construct_envs(config: Config, env_class: Type[Union[Env, RLEnv]]) -> VectorEnv: r"""Create VectorEnv object with specified config and env class type. To allow better performance, dataset are split into small ones for each individual env, grouped by scenes. Args: config: configs that contain num_processes as well as information necessary to create individual environments. env_class: class type of the envs to be created. Returns: VectorEnv object created according to specification. """ num_processes = config.NUM_PROCESSES configs = [] env_classes = [env_class for _ in range(num_processes)] dataset = make_dataset(config.TASK_CONFIG.DATASET.TYPE) scenes = dataset.get_scenes_to_load(config.TASK_CONFIG.DATASET) if len(scenes) > 0: random.shuffle(scenes) assert len(scenes) >= num_processes, ( "reduce the number of processes as there " "aren't enough number of scenes") scene_splits = [[] for _ in range(num_processes)] for idx, scene in enumerate(scenes): scene_splits[idx % len(scene_splits)].append(scene) assert sum(map(len, scene_splits)) == len(scenes) for i in range(num_processes): task_config = config.TASK_CONFIG.clone() task_config.defrost() if len(scenes) > 0: task_config.DATASET.CONTENT_SCENES = scene_splits[i] task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = ( config.SIMULATOR_GPU_ID) task_config.SIMULATOR.AGENT_0.SENSORS = config.SENSORS task_config.freeze() config.defrost() config.TASK_CONFIG = task_config config.freeze() configs.append(config.clone()) envs = habitat.VectorEnv( make_env_fn=make_env_fn, env_fn_args=tuple( tuple(zip(configs, env_classes, range(num_processes)))), ) return envs
def __init__(self, observation_space: Space, num_actions: int, model_config: Config): super().__init__() self.model_config = model_config model_config.defrost() model_config.INSTRUCTION_ENCODER.final_state_only = False model_config.freeze() # Init the instruction encoder self.instruction_encoder = InstructionEncoder( model_config.INSTRUCTION_ENCODER) # Init the depth encoder assert model_config.DEPTH_ENCODER.cnn_type in [ "VlnResnetDepthEncoder" ], "DEPTH_ENCODER.cnn_type must be VlnResnetDepthEncoder" self.depth_encoder = VlnResnetDepthEncoder( observation_space, output_size=model_config.DEPTH_ENCODER.output_size, checkpoint=model_config.DEPTH_ENCODER.ddppo_checkpoint, backbone=model_config.DEPTH_ENCODER.backbone, spatial_output=True, ) # Init the RGB encoder assert model_config.RGB_ENCODER.cnn_type in [ "TorchVisionResNet50" ], "RGB_ENCODER.cnn_type must be TorchVisionResNet50'." device = (torch.device("cuda", model_config.TORCH_GPU_ID) if torch.cuda.is_available() else torch.device("cpu")) self.rgb_encoder = TorchVisionResNet50( observation_space, model_config.RGB_ENCODER.output_size, model_config.RGB_ENCODER.resnet_output_size, device, spatial_output=True, ) if model_config.CMA.use_prev_action: self.prev_action_embedding = nn.Embedding(num_actions + 1, 32) self.rcm_state_encoder = model_config.CMA.rcm_state_encoder hidden_size = model_config.STATE_ENCODER.hidden_size self._hidden_size = hidden_size if self.rcm_state_encoder: self.state_encoder = RCMStateEncoder( self.rgb_encoder.output_shape[0], self.depth_encoder.output_shape[0], model_config.STATE_ENCODER.hidden_size, self.prev_action_embedding.embedding_dim, ) else: self.rgb_linear = nn.Sequential( nn.AdaptiveAvgPool1d(1), nn.Flatten(), nn.Linear( self.rgb_encoder.output_shape[0], model_config.RGB_ENCODER.output_size, ), nn.ReLU(True), ) self.depth_linear = nn.Sequential( nn.Flatten(), nn.Linear( np.prod(self.depth_encoder.output_shape), model_config.DEPTH_ENCODER.output_size, ), nn.ReLU(True), ) # Init the RNN state decoder rnn_input_size = model_config.DEPTH_ENCODER.output_size rnn_input_size += model_config.RGB_ENCODER.output_size if model_config.CMA.use_prev_action: rnn_input_size += self.prev_action_embedding.embedding_dim self.state_encoder = RNNStateEncoder( input_size=rnn_input_size, hidden_size=model_config.STATE_ENCODER.hidden_size, num_layers=1, rnn_type=model_config.STATE_ENCODER.rnn_type, ) self._output_size = (model_config.STATE_ENCODER.hidden_size + model_config.RGB_ENCODER.output_size + model_config.DEPTH_ENCODER.output_size + self.instruction_encoder.output_size) self.rgb_kv = nn.Conv1d( self.rgb_encoder.output_shape[0], hidden_size // 2 + model_config.RGB_ENCODER.output_size, 1, ) self.depth_kv = nn.Conv1d( self.depth_encoder.output_shape[0], hidden_size // 2 + model_config.DEPTH_ENCODER.output_size, 1, ) # self.depth_kv = nn.Conv1d( # self.depth_encoder.output_shape[0], # hidden_size, # 1, # ) self.state_q = nn.Linear(hidden_size, hidden_size // 2) self.text_k = nn.Conv1d(self.instruction_encoder.output_size, hidden_size // 2, 1) self.text_q = nn.Linear(self.instruction_encoder.output_size, hidden_size // 2) self.register_buffer("_scale", torch.tensor(1.0 / ((hidden_size // 2)**0.5))) if model_config.CMA.use_prev_action: self.second_state_compress = nn.Sequential( nn.Linear( self._output_size + self.prev_action_embedding.embedding_dim, self._hidden_size, ), nn.ReLU(True), ) else: self.second_state_compress = nn.Sequential( nn.Linear( self._output_size, self._hidden_size, ), nn.ReLU(True), ) self.second_state_encoder = RNNStateEncoder( input_size=self._hidden_size, hidden_size=self._hidden_size, num_layers=1, rnn_type=model_config.STATE_ENCODER.rnn_type, ) self._output_size = model_config.STATE_ENCODER.hidden_size self.progress_monitor = nn.Linear(self.output_size, 1) self.linear = nn.Linear(self.model_config.STATE_ENCODER.hidden_size, num_actions) self.stop_linear = nn.Linear( self.model_config.STATE_ENCODER.hidden_size, 1) self._init_layers() self.train()
def construct_envs(config: Config, env_class: Type[Union[Env, RLEnv]]) -> VectorEnv: r"""Create VectorEnv object with specified config and env class type. To allow better performance, dataset are split into small ones for each individual env, grouped by scenes. Args: config: configs that contain num_processes as well as information necessary to create individual environments. env_class: class type of the envs to be created Returns: VectorEnv object created according to specification. """ num_processes = config.NUM_PROCESSES configs = [] env_classes = [env_class for _ in range(num_processes)] dataset = make_dataset(config.TASK_CONFIG.DATASET.TYPE) scenes = dataset.get_scenes_to_load(config.TASK_CONFIG.DATASET) # rearrange scenes in the order of scene size since there is a severe imbalance of data size if "replica" in config.TASK_CONFIG.DATASET.SCENES_DIR: scenes_new = list() for scene in SCENES: if scene in scenes: scenes_new.append(scene) scenes = scenes_new if len(scenes) > 0: # random.shuffle(scenes) assert len(scenes) >= num_processes, ( "reduce the number of processes as there " "aren't enough number of scenes") scene_splits = [[] for _ in range(num_processes)] for idx, scene in enumerate(scenes): scene_splits[idx % len(scene_splits)].append(scene) assert sum(map(len, scene_splits)) == len(scenes) for i in range(num_processes): task_config = config.TASK_CONFIG.clone() task_config.defrost() if len(scenes) > 0: task_config.DATASET.CONTENT_SCENES = scene_splits[i] logging.debug('All scenes: {}'.format(','.join(scene_splits[i]))) # overwrite the task config with top-level config file task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = ( config.SIMULATOR_GPU_ID) task_config.SIMULATOR.AGENT_0.SENSORS = config.SENSORS task_config.freeze() config.defrost() config.TASK_CONFIG = task_config config.freeze() configs.append(config.clone()) # use VectorEnv for the best performance and ThreadedVectorEnv for debugging if config.USE_SYNC_VECENV: env_launcher = SyncVectorEnv logging.info('Using SyncVectorEnv') elif config.USE_VECENV: env_launcher = habitat.VectorEnv logging.info('Using VectorEnv') else: env_launcher = habitat.ThreadedVectorEnv logging.info('Using ThreadedVectorEnv') envs = env_launcher( make_env_fn=make_env_fn, env_fn_args=tuple( tuple(zip(configs, env_classes, range(num_processes)))), ) return envs