def _setup_actor_critic_agent(self, config: Config, load_from_ckpt: bool, ckpt_path: str) -> None: r"""Sets up actor critic and agent. Args: config: MODEL config Returns: None """ config.defrost() config.TORCH_GPU_ID = self.config.TORCH_GPU_ID config.freeze() if config.CMA.use: self.actor_critic = CMAPolicy( observation_space=self.envs.observation_spaces[0], action_space=self.envs.action_spaces[0], model_config=config, ) else: self.actor_critic = Seq2SeqPolicy( observation_space=self.envs.observation_spaces[0], action_space=self.envs.action_spaces[0], model_config=config, ) self.actor_critic.to(self.device) self.optimizer = torch.optim.Adam(self.actor_critic.parameters(), lr=self.config.DAGGER.LR) if load_from_ckpt: ckpt_dict = self.load_checkpoint(ckpt_path, map_location="cpu") self.actor_critic.load_state_dict(ckpt_dict["state_dict"]) logger.info(f"Loaded weights from checkpoint: {ckpt_path}") logger.info("Finished setting up actor critic model.")
def construct_envs( config: Config, training: bool ) -> VectorEnv: r"""Create VectorEnv object with specified config and env class type. To allow better performance, dataset are split into small ones for each individual env, grouped by scenes. Args: config: configs that contain num_processes as well as information necessary to create individual environments. env_class: class type of the envs to be created. Returns: VectorEnv object created according to specification. """ num_processes = config.NUM_PROCESSES dataset = make_dataset(config.TASK_CONFIG.DATASET.TYPE) scenes = dataset.get_scenes_to_load(config.TASK_CONFIG.DATASET) if len(scenes) > 0: random.shuffle(scenes) assert len(scenes) >= num_processes, ( "reduce the number of processes as there " "aren't enough number of scenes" ) scene_splits = [[] for _ in range(num_processes)] for idx, scene in enumerate(scenes): scene_splits[idx % len(scene_splits)].append(scene) assert sum(map(len, scene_splits)) == len(scenes) task = 'habitat_train_task' if training else 'habitat_eval_task' max_duration = gin.query_parameter(f'{task}.max_length') wrappers = [w.scoped_configurable_fn() for w in gin.query_parameter(f'{task}.wrappers')] kwargs = get_config(training=training, max_steps=max_duration*3) kwargs['max_duration'] = max_duration kwargs['action_repeat'] = 1 kwargs['wrappers'] = [(wrapper, kwarg_fn(kwargs)) for wrapper, kwarg_fn in wrappers] env_kwargs = [] for scenes in scene_splits: kw = kwargs.copy() config = kw['config'].clone() if len(scenes) > 0: config.defrost() config.DATASET.CONTENT_SCENES = scenes config.freeze() kw['config'] = config env_kwargs.append(kw) envs = habitat.VectorEnv( make_env_fn=make_env_fn, env_fn_args=tuple( # tuple(zip(configs, env_classes, range(num_processes))) tuple(zip(env_kwargs, range(num_processes))) ), ) return envs
def construct_envs(config: Config, env_class: Type[Union[Env, RLEnv]]) -> VectorEnv: r"""Create VectorEnv object with specified config and env class type. To allow better performance, dataset are split into small ones for each individual env, grouped by scenes. Args: config: configs that contain num_processes as well as information necessary to create individual environments. env_class: class type of the envs to be created. Returns: VectorEnv object created according to specification. """ num_processes = config.NUM_PROCESSES configs = [] env_classes = [env_class for _ in range(num_processes)] dataset = make_dataset(config.TASK_CONFIG.DATASET.TYPE) scenes = dataset.get_scenes_to_load(config.TASK_CONFIG.DATASET) if len(scenes) > 0: random.shuffle(scenes) assert len(scenes) >= num_processes, ( "reduce the number of processes as there " "aren't enough number of scenes") scene_splits = [[] for _ in range(num_processes)] for idx, scene in enumerate(scenes): scene_splits[idx % len(scene_splits)].append(scene) assert sum(map(len, scene_splits)) == len(scenes) for i in range(num_processes): task_config = config.TASK_CONFIG.clone() task_config.defrost() if len(scenes) > 0: task_config.DATASET.CONTENT_SCENES = scene_splits[i] task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = ( config.SIMULATOR_GPU_ID) task_config.SIMULATOR.AGENT_0.SENSORS = config.SENSORS task_config.freeze() config.defrost() config.TASK_CONFIG = task_config config.freeze() configs.append(config.clone()) envs = habitat.VectorEnv( make_env_fn=make_env_fn, env_fn_args=tuple( tuple(zip(configs, env_classes, range(num_processes)))), ) return envs
def __init__(self, task_config: Optional[Config] = None) -> None: r""".. :param task_config: config to be used for creating the environment """ dummy_config = Config() dummy_config.RL = Config() dummy_config.RL.SLACK_REWARD = -0.01 dummy_config.RL.SUCCESS_REWARD = 10 dummy_config.RL.WITH_TIME_PENALTY = True dummy_config.RL.DISTANCE_REWARD_SCALE = 1 dummy_config.RL.WITH_DISTANCE_REWARD = True dummy_config.RL.defrost() dummy_config.TASK_CONFIG = task_config dummy_config.freeze() dataset = make_dataset(id_dataset=task_config.DATASET.TYPE, config=task_config.DATASET) self._env = NavRLEnv(config=dummy_config, dataset=dataset)
def construct_env_configs_mp3d(config: Config) -> List[Config]: r"""Create list of Habitat Configs for training on multiple processes To allow better performance, dataset are split into small ones for each individual env, grouped by scenes. Args: config: configs that contain num_processes as well as information necessary to create individual environments. Returns: List of Configs, one for each process """ config.freeze() num_processes = config.NUM_PROCESSES configs = [] # dataset = habitat.make_dataset(config.DATASET.TYPE) # scenes = dataset.get_scenes_to_load(config.DATASET) if num_processes == 1: scene_splits = [["pRbA3pwrgk9"]] else: small = [ "rPc6DW4iMge", "e9zR4mvMWw7", "uNb9QFRL6hY", "qoiz87JEwZ2", "sKLMLpTHeUy", "s8pcmisQ38h", "759xd9YjKW5", "XcA2TqTSSAj", "SN83YJsR3w2", "8WUmhLawc2A", "JeFG25nYj2p", "17DRP5sb8fy", "Uxmj2M2itWa", "XcA2TqTSSAj", "SN83YJsR3w2", "8WUmhLawc2A", "JeFG25nYj2p", "17DRP5sb8fy", "Uxmj2M2itWa", "D7N2EKCX4Sj", "b8cTxDM8gDG", "sT4fr6TAbpF", "S9hNv5qa7GM", "82sE5b5pLXE", "pRbA3pwrgk9", "aayBHfsNo7d", "cV4RVeZvu5T", "i5noydFURQK", "YmJkqBEsHnH", "jh4fc5c5qoQ", "VVfe2KiqLaN", "29hnd4uzFmX", "Pm6F8kyY3z2", "JF19kD82Mey", "GdvgFV5R1Z5", "HxpKQynjfin", "vyrNrziPKCB", ] med = [ "V2XKFyX4ASd", "VFuaQ6m2Qom", "ZMojNkEp431", "5LpN3gDmAk7", "r47D5H71a5s", "ULsKaCPVFJR", "E9uDoFAP3SH", "kEZ7cmS4wCh", "ac26ZMwG7aT", "dhjEzFoUFzH", "mJXqzFtmKg4", "p5wJjkQkbXX", "Vvot9Ly1tCj", "EDJbREhghzL", "VzqfbhrpDEA", "7y3sRwLe3Va", ] scene_splits = [[] for _ in range(config.NUM_PROCESSES)] distribute( small, scene_splits, num_gpus=8, procs_per_gpu=3, proc_offset=1, scenes_per_process=2, ) distribute( med, scene_splits, num_gpus=8, procs_per_gpu=3, proc_offset=0, scenes_per_process=1, ) # gpu0 = [['pRbA3pwrgk9', '82sE5b5pLXE', 'S9hNv5qa7GM'], # ['Uxmj2M2itWa', '17DRP5sb8fy', 'JeFG25nYj2p'], # ['5q7pvUzZiYa', '759xd9YjKW5', 's8pcmisQ38h'], # ['e9zR4mvMWw7', 'rPc6DW4iMge', 'vyrNrziPKCB']] # gpu1 = [['sT4fr6TAbpF', 'b8cTxDM8gDG', 'D7N2EKCX4Sj'], # ['8WUmhLawc2A', 'SN83YJsR3w2', 'XcA2TqTSSAj'], # ['sKLMLpTHeUy', 'qoiz87JEwZ2', 'uNb9QFRL6hY'], # ['V2XKFyX4ASd', 'VFuaQ6m2Qom', 'ZMojNkEp431']] # gpu2 = [['5LpN3gDmAk7', 'r47D5H71a5s', 'ULsKaCPVFJR', 'E9uDoFAP3SH'], # ['VVfe2KiqLaN', 'jh4fc5c5qoQ', 'YmJkqBEsHnH'], # small # ['i5noydFURQK', 'cV4RVeZvu5T', 'aayBHfsNo7d']] # small # gpu3 = [['kEZ7cmS4wCh', 'ac26ZMwG7aT', 'dhjEzFoUFzH'], # ['mJXqzFtmKg4', 'p5wJjkQkbXX', 'Vvot9Ly1tCj']] # gpu4 = [['EDJbREhghzL', 'VzqfbhrpDEA', '7y3sRwLe3Va'], # ['ur6pFq6Qu1A', 'PX4nDJXEHrG', 'PuKPg4mmafe']] # gpu5 = [['r1Q1Z4BcV1o', 'gTV8FGcVJC9', '1pXnuDYAj8r'], # ['JF19kD82Mey', 'Pm6F8kyY3z2', '29hnd4uzFmX']] # small # gpu6 = [['VLzqgDo317F', '1LXtFkjw3qL'], # ['HxpKQynjfin', 'gZ6f7yhEvPG', 'GdvgFV5R1Z5']] # small # gpu7 = [['D7G3Y4RVNrH', 'B6ByNegPMKs']] # # scene_splits = gpu0 + gpu1 + gpu2 + gpu3 + gpu4 + gpu5 + gpu6 + gpu7 for i in range(num_processes): task_config = config.clone() task_config.defrost() task_config.DATASET.CONTENT_SCENES = scene_splits[i] task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = config.SIMULATOR_GPU_IDS[ i % len(config.SIMULATOR_GPU_IDS) ] task_config.freeze() configs.append(task_config.clone()) return configs
def construct_env_configs( config: Config, allow_scene_repeat: bool = False, ) -> List[Config]: """Create list of Habitat Configs for training on multiple processes To allow better performance, dataset are split into small ones for each individual env, grouped by scenes. # Parameters config : configs that contain num_processes as well as information necessary to create individual environments. allow_scene_repeat: if `True` and the number of distinct scenes in the dataset is less than the total number of processes this will result in scenes being repeated across processes. If `False`, then if the total number of processes is greater than the number of scenes, this will result in a RuntimeError exception being raised. # Returns List of Configs, one for each process. """ config.freeze() num_processes = config.NUM_PROCESSES configs = [] dataset = habitat.make_dataset(config.DATASET.TYPE) scenes = dataset.get_scenes_to_load(config.DATASET) if len(scenes) > 0: if len(scenes) < num_processes: if not allow_scene_repeat: raise RuntimeError( "reduce the number of processes as there aren't enough number of scenes." ) else: scenes = (scenes * (1 + (num_processes // len(scenes))))[:num_processes] scene_splits: List[List] = [[] for _ in range(num_processes)] for idx, scene in enumerate(scenes): scene_splits[idx % len(scene_splits)].append(scene) assert sum(map(len, scene_splits)) == len(scenes) for i in range(num_processes): task_config = config.clone() task_config.defrost() if len(scenes) > 0: task_config.DATASET.CONTENT_SCENES = scene_splits[i] if len(config.SIMULATOR_GPU_IDS) == 0: task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = -1 else: task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = config.SIMULATOR_GPU_IDS[ i % len(config.SIMULATOR_GPU_IDS) ] task_config.freeze() configs.append(task_config.clone()) return configs
def __init__(self, observation_space: Space, num_actions: int, model_config: Config): super().__init__() self.model_config = model_config model_config.defrost() model_config.INSTRUCTION_ENCODER.final_state_only = False model_config.freeze() # Init the instruction encoder self.instruction_encoder = InstructionEncoder( model_config.INSTRUCTION_ENCODER) # Init the depth encoder assert model_config.DEPTH_ENCODER.cnn_type in [ "VlnResnetDepthEncoder" ], "DEPTH_ENCODER.cnn_type must be VlnResnetDepthEncoder" self.depth_encoder = VlnResnetDepthEncoder( observation_space, output_size=model_config.DEPTH_ENCODER.output_size, checkpoint=model_config.DEPTH_ENCODER.ddppo_checkpoint, backbone=model_config.DEPTH_ENCODER.backbone, spatial_output=True, ) # Init the RGB encoder assert model_config.RGB_ENCODER.cnn_type in [ "TorchVisionResNet50" ], "RGB_ENCODER.cnn_type must be TorchVisionResNet50'." device = (torch.device("cuda", model_config.TORCH_GPU_ID) if torch.cuda.is_available() else torch.device("cpu")) self.rgb_encoder = TorchVisionResNet50( observation_space, model_config.RGB_ENCODER.output_size, model_config.RGB_ENCODER.resnet_output_size, device, spatial_output=True, ) if model_config.CMA.use_prev_action: self.prev_action_embedding = nn.Embedding(num_actions + 1, 32) self.rcm_state_encoder = model_config.CMA.rcm_state_encoder hidden_size = model_config.STATE_ENCODER.hidden_size self._hidden_size = hidden_size if self.rcm_state_encoder: self.state_encoder = RCMStateEncoder( self.rgb_encoder.output_shape[0], self.depth_encoder.output_shape[0], model_config.STATE_ENCODER.hidden_size, self.prev_action_embedding.embedding_dim, ) else: self.rgb_linear = nn.Sequential( nn.AdaptiveAvgPool1d(1), nn.Flatten(), nn.Linear( self.rgb_encoder.output_shape[0], model_config.RGB_ENCODER.output_size, ), nn.ReLU(True), ) self.depth_linear = nn.Sequential( nn.Flatten(), nn.Linear( np.prod(self.depth_encoder.output_shape), model_config.DEPTH_ENCODER.output_size, ), nn.ReLU(True), ) # Init the RNN state decoder rnn_input_size = model_config.DEPTH_ENCODER.output_size rnn_input_size += model_config.RGB_ENCODER.output_size if model_config.CMA.use_prev_action: rnn_input_size += self.prev_action_embedding.embedding_dim self.state_encoder = RNNStateEncoder( input_size=rnn_input_size, hidden_size=model_config.STATE_ENCODER.hidden_size, num_layers=1, rnn_type=model_config.STATE_ENCODER.rnn_type, ) self._output_size = (model_config.STATE_ENCODER.hidden_size + model_config.RGB_ENCODER.output_size + model_config.DEPTH_ENCODER.output_size + self.instruction_encoder.output_size) self.rgb_kv = nn.Conv1d( self.rgb_encoder.output_shape[0], hidden_size // 2 + model_config.RGB_ENCODER.output_size, 1, ) self.depth_kv = nn.Conv1d( self.depth_encoder.output_shape[0], hidden_size // 2 + model_config.DEPTH_ENCODER.output_size, 1, ) # self.depth_kv = nn.Conv1d( # self.depth_encoder.output_shape[0], # hidden_size, # 1, # ) self.state_q = nn.Linear(hidden_size, hidden_size // 2) self.text_k = nn.Conv1d(self.instruction_encoder.output_size, hidden_size // 2, 1) self.text_q = nn.Linear(self.instruction_encoder.output_size, hidden_size // 2) self.register_buffer("_scale", torch.tensor(1.0 / ((hidden_size // 2)**0.5))) if model_config.CMA.use_prev_action: self.second_state_compress = nn.Sequential( nn.Linear( self._output_size + self.prev_action_embedding.embedding_dim, self._hidden_size, ), nn.ReLU(True), ) else: self.second_state_compress = nn.Sequential( nn.Linear( self._output_size, self._hidden_size, ), nn.ReLU(True), ) self.second_state_encoder = RNNStateEncoder( input_size=self._hidden_size, hidden_size=self._hidden_size, num_layers=1, rnn_type=model_config.STATE_ENCODER.rnn_type, ) self._output_size = model_config.STATE_ENCODER.hidden_size self.progress_monitor = nn.Linear(self.output_size, 1) self.linear = nn.Linear(self.model_config.STATE_ENCODER.hidden_size, num_actions) self.stop_linear = nn.Linear( self.model_config.STATE_ENCODER.hidden_size, 1) self._init_layers() self.train()
def construct_envs(config: Config, env_class: Type[Union[Env, RLEnv]]) -> VectorEnv: r"""Create VectorEnv object with specified config and env class type. To allow better performance, dataset are split into small ones for each individual env, grouped by scenes. Args: config: configs that contain num_processes as well as information necessary to create individual environments. env_class: class type of the envs to be created Returns: VectorEnv object created according to specification. """ num_processes = config.NUM_PROCESSES configs = [] env_classes = [env_class for _ in range(num_processes)] dataset = make_dataset(config.TASK_CONFIG.DATASET.TYPE) scenes = dataset.get_scenes_to_load(config.TASK_CONFIG.DATASET) # rearrange scenes in the order of scene size since there is a severe imbalance of data size if "replica" in config.TASK_CONFIG.DATASET.SCENES_DIR: scenes_new = list() for scene in SCENES: if scene in scenes: scenes_new.append(scene) scenes = scenes_new if len(scenes) > 0: # random.shuffle(scenes) assert len(scenes) >= num_processes, ( "reduce the number of processes as there " "aren't enough number of scenes") scene_splits = [[] for _ in range(num_processes)] for idx, scene in enumerate(scenes): scene_splits[idx % len(scene_splits)].append(scene) assert sum(map(len, scene_splits)) == len(scenes) for i in range(num_processes): task_config = config.TASK_CONFIG.clone() task_config.defrost() if len(scenes) > 0: task_config.DATASET.CONTENT_SCENES = scene_splits[i] logging.debug('All scenes: {}'.format(','.join(scene_splits[i]))) # overwrite the task config with top-level config file task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = ( config.SIMULATOR_GPU_ID) task_config.SIMULATOR.AGENT_0.SENSORS = config.SENSORS task_config.freeze() config.defrost() config.TASK_CONFIG = task_config config.freeze() configs.append(config.clone()) # use VectorEnv for the best performance and ThreadedVectorEnv for debugging if config.USE_SYNC_VECENV: env_launcher = SyncVectorEnv logging.info('Using SyncVectorEnv') elif config.USE_VECENV: env_launcher = habitat.VectorEnv logging.info('Using VectorEnv') else: env_launcher = habitat.ThreadedVectorEnv logging.info('Using ThreadedVectorEnv') envs = env_launcher( make_env_fn=make_env_fn, env_fn_args=tuple( tuple(zip(configs, env_classes, range(num_processes)))), ) return envs
def construct_env_configs(config: Config) -> List[Config]: """Create list of Habitat Configs for training on multiple processes To allow better performance, dataset are split into small ones for each individual env, grouped by scenes. # Parameters config : configs that contain num_processes as well as information necessary to create individual environments. # Returns List of Configs, one for each process. """ config.freeze() num_processes = config.NUM_PROCESSES configs = [] dataset = habitat.make_dataset(config.DATASET.TYPE) scenes = dataset.get_scenes_to_load(config.DATASET) # scenes = [ # "sT4fr6TAbpF", # "HxpKQynjfin", # "8WUmhLawc2A", # "r47D5H71a5s", # "Pm6F8kyY3z2", # "17DRP5sb8fy", # "Vvot9Ly1tCj", # "GdvgFV5R1Z5", # "sT4fr6TAbpF", # "HxpKQynjfin", # "8WUmhLawc2A", # "r47D5H71a5s", # "Pm6F8kyY3z2", # "17DRP5sb8fy", # "Vvot9Ly1tCj", # "GdvgFV5R1Z5", # "sT4fr6TAbpF", # "HxpKQynjfin", # "8WUmhLawc2A", # "r47D5H71a5s", # "Pm6F8kyY3z2", # "17DRP5sb8fy", # "Vvot9Ly1tCj", # "GdvgFV5R1Z5", # ] # scenes = ['rPc6DW4iMge', 'e9zR4mvMWw7', 'uNb9QFRL6hY', 'sKLMLpTHeUy', 's8pcmisQ38h', '759xd9YjKW5', # 'XcA2TqTSSAj', 'SN83YJsR3w2', '8WUmhLawc2A', 'JeFG25nYj2p', '17DRP5sb8fy', 'Uxmj2M2itWa', # 'b8cTxDM8gDG', 'sT4fr6TAbpF', 'S9hNv5qa7GM', '82sE5b5pLXE', 'pRbA3pwrgk9', 'aayBHfsNo7d', # 'cV4RVeZvu5T', 'i5noydFURQK', 'jh4fc5c5qoQ', 'VVfe2KiqLaN', '29hnd4uzFmX', 'Pm6F8kyY3z2', # 'JF19kD82Mey', 'GdvgFV5R1Z5', 'HxpKQynjfin'] # scenes = ['rPc6DW4iMge', 'e9zR4mvMWw7', 'uNb9QFRL6hY', 'qoiz87JEwZ2', 'sKLMLpTHeUy', 's8pcmisQ38h', '759xd9YjKW5', # '5q7pvUzZiYa', 'XcA2TqTSSAj', 'SN83YJsR3w2', '8WUmhLawc2A', 'JeFG25nYj2p', '17DRP5sb8fy', 'Uxmj2M2itWa', # 'D7N2EKCX4Sj', 'b8cTxDM8gDG', 'sT4fr6TAbpF', 'S9hNv5qa7GM', '82sE5b5pLXE', 'pRbA3pwrgk9', 'aayBHfsNo7d', # 'cV4RVeZvu5T', 'i5noydFURQK', 'YmJkqBEsHnH', 'jh4fc5c5qoQ', 'VVfe2KiqLaN', '29hnd4uzFmX', 'Pm6F8kyY3z2', # 'JF19kD82Mey', 'GdvgFV5R1Z5', 'HxpKQynjfin', 'vyrNrziPKCB'] # scenes = ['29hnd4uzFmX', 'i5noydFURQK', 'cV4RVeZvu5T', '82sE5b5pLXE', 'JeFG25nYj2p', '8WUmhLawc2A', 'VFuaQ6m2Qom', # 'rPc6DW4iMge', '29hnd4uzFmX', 'i5noydFURQK', 'cV4RVeZvu5T', '82sE5b5pLXE', # 'JeFG25nYj2p', '8WUmhLawc2A', 'VFuaQ6m2Qom', 'rPc6DW4iMge'] if len(scenes) > 0: # random.shuffle(scenes) assert len(scenes) >= num_processes, ( "reduce the number of processes as there " "aren't enough number of scenes") scene_splits: List[List] = [[] for _ in range(num_processes)] for idx, scene in enumerate(scenes): scene_splits[idx % len(scene_splits)].append(scene) assert sum(map(len, scene_splits)) == len(scenes) for i in range(num_processes): task_config = config.clone() task_config.defrost() if len(scenes) > 0: task_config.DATASET.CONTENT_SCENES = scene_splits[i] task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = config.SIMULATOR_GPU_IDS[ i % len(config.SIMULATOR_GPU_IDS)] task_config.freeze() configs.append(task_config.clone()) return configs