def test_rl_vectorized_envs(): configs, datasets = _load_test_data() num_envs = len(configs) env_fn_args = tuple(zip(configs, datasets, range(num_envs))) envs = habitat.VectorEnv(make_env_fn=make_rl_env, env_fn_args=env_fn_args) envs.reset() non_stop_actions = [ v for v in range(len(SimulatorActions)) if v != SimulatorActions.STOP.value ] for i in range(2 * configs[0].ENVIRONMENT.MAX_EPISODE_STEPS): outputs = envs.step(np.random.choice(non_stop_actions, num_envs)) observations, rewards, dones, infos = [list(x) for x in zip(*outputs)] assert len(observations) == num_envs assert len(rewards) == num_envs assert len(dones) == num_envs assert len(infos) == num_envs tiled_img = envs.render(mode="rgb_array") new_height = int(np.ceil(np.sqrt(NUM_ENVS))) new_width = int(np.ceil(float(NUM_ENVS) / new_height)) h, w, c = observations[0]["rgb"].shape assert tiled_img.shape == ( h * new_height, w * new_width, c, ), "vector env render is broken" if (i + 1) % configs[0].ENVIRONMENT.MAX_EPISODE_STEPS == 0: assert all(dones), "dones should be true after max_episode steps" envs.close()
def test_number_of_episodes(): configs, datasets = _load_test_data() num_envs = len(configs) env_fn_args = tuple(zip(configs, datasets, range(num_envs))) with habitat.VectorEnv(env_fn_args=env_fn_args, multiprocessing_start_method="forkserver") as envs: assert envs.number_of_episodes == [10000, 10000, 10000, 10000]
def construct_envs( config: Config, training: bool ) -> VectorEnv: r"""Create VectorEnv object with specified config and env class type. To allow better performance, dataset are split into small ones for each individual env, grouped by scenes. Args: config: configs that contain num_processes as well as information necessary to create individual environments. env_class: class type of the envs to be created. Returns: VectorEnv object created according to specification. """ num_processes = config.NUM_PROCESSES dataset = make_dataset(config.TASK_CONFIG.DATASET.TYPE) scenes = dataset.get_scenes_to_load(config.TASK_CONFIG.DATASET) if len(scenes) > 0: random.shuffle(scenes) assert len(scenes) >= num_processes, ( "reduce the number of processes as there " "aren't enough number of scenes" ) scene_splits = [[] for _ in range(num_processes)] for idx, scene in enumerate(scenes): scene_splits[idx % len(scene_splits)].append(scene) assert sum(map(len, scene_splits)) == len(scenes) task = 'habitat_train_task' if training else 'habitat_eval_task' max_duration = gin.query_parameter(f'{task}.max_length') wrappers = [w.scoped_configurable_fn() for w in gin.query_parameter(f'{task}.wrappers')] kwargs = get_config(training=training, max_steps=max_duration*3) kwargs['max_duration'] = max_duration kwargs['action_repeat'] = 1 kwargs['wrappers'] = [(wrapper, kwarg_fn(kwargs)) for wrapper, kwarg_fn in wrappers] env_kwargs = [] for scenes in scene_splits: kw = kwargs.copy() config = kw['config'].clone() if len(scenes) > 0: config.defrost() config.DATASET.CONTENT_SCENES = scenes config.freeze() kw['config'] = config env_kwargs.append(kw) envs = habitat.VectorEnv( make_env_fn=make_env_fn, env_fn_args=tuple( # tuple(zip(configs, env_classes, range(num_processes))) tuple(zip(env_kwargs, range(num_processes))) ), ) return envs
def construct_envs(config: Config, env_class: Type[Union[Env, RLEnv]]) -> VectorEnv: r"""Create VectorEnv object with specified config and env class type. To allow better performance, dataset are split into small ones for each individual env, grouped by scenes. Args: config: configs that contain num_processes as well as information necessary to create individual environments. env_class: class type of the envs to be created. Returns: VectorEnv object created according to specification. """ num_processes = config.NUM_PROCESSES configs = [] env_classes = [env_class for _ in range(num_processes)] dataset = make_dataset(config.TASK_CONFIG.DATASET.TYPE) scenes = dataset.get_scenes_to_load(config.TASK_CONFIG.DATASET) if len(scenes) > 0: random.shuffle(scenes) assert len(scenes) >= num_processes, ( "reduce the number of processes as there " "aren't enough number of scenes") scene_splits = [[] for _ in range(num_processes)] for idx, scene in enumerate(scenes): scene_splits[idx % len(scene_splits)].append(scene) assert sum(map(len, scene_splits)) == len(scenes) for i in range(num_processes): task_config = config.TASK_CONFIG.clone() task_config.defrost() if len(scenes) > 0: task_config.DATASET.CONTENT_SCENES = scene_splits[i] task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = ( config.SIMULATOR_GPU_ID) task_config.SIMULATOR.AGENT_0.SENSORS = config.SENSORS task_config.freeze() config.defrost() config.TASK_CONFIG = task_config config.freeze() configs.append(config.clone()) envs = habitat.VectorEnv( make_env_fn=make_env_fn, env_fn_args=tuple( tuple(zip(configs, env_classes, range(num_processes)))), ) return envs
def test_with_scope(): configs, datasets = _load_test_data() num_envs = len(configs) env_fn_args = tuple(zip(configs, datasets, range(num_envs))) with habitat.VectorEnv(env_fn_args=env_fn_args, multiprocessing_start_method="forkserver") as envs: envs.reset() assert envs._is_closed
def construct_envs(args): env_configs = [] baseline_configs = [] basic_config = cfg_env(config_paths=args.task_config, opts=args.opts) dataset = make_dataset(basic_config.DATASET.TYPE) scenes = dataset.get_scenes_to_load(basic_config.DATASET) if len(scenes) > 0: random.shuffle(scenes) assert len(scenes) >= args.num_processes, ( "reduce the number of processes as there " "aren't enough number of scenes" ) scene_split_size = int(np.floor(len(scenes) / args.num_processes)) scene_splits = [[] for _ in range(args.num_processes)] for j, s in enumerate(scenes): scene_splits[j % len(scene_splits)].append(s) assert sum(map(len, scene_splits)) == len(scenes) for i in range(args.num_processes): config_env = cfg_env(config_paths=args.task_config, opts=args.opts) config_env.defrost() if len(scenes) > 0: config_env.DATASET.CONTENT_SCENES = scene_splits[i] config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = args.sim_gpu_id agent_sensors = args.sensors.strip().split(",") for sensor in agent_sensors: assert sensor in ["RGB_SENSOR", "DEPTH_SENSOR"] config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors config_env.freeze() env_configs.append(config_env) config_baseline = cfg_baseline() baseline_configs.append(config_baseline) logger.info("config_env: {}".format(config_env)) envs = habitat.VectorEnv( make_env_fn=make_env_fn, env_fn_args=tuple( tuple( zip(env_configs, baseline_configs, range(args.num_processes)) ) ), ) return envs
def _vec_env_test_fn(configs, datasets, multiprocessing_start_method): num_envs = len(configs) env_fn_args = tuple(zip(configs, datasets, range(num_envs))) envs = habitat.VectorEnv( env_fn_args=env_fn_args, multiprocessing_start_method=multiprocessing_start_method, ) envs.reset() non_stop_actions = [ v for v in range(len(SimulatorActions)) if v != SimulatorActions.STOP.value ] for _ in range(2 * configs[0].ENVIRONMENT.MAX_EPISODE_STEPS): observations = envs.step(np.random.choice(non_stop_actions, num_envs)) assert len(observations) == num_envs
def construct_envs(args): env_configs = [] baseline_configs = [] basic_config = cfg_env(config_file=args.task_config) scenes = PointNavDatasetV1.get_scenes_to_load(basic_config.DATASET) if len(scenes) > 0: random.shuffle(scenes) assert len(scenes) >= args.num_processes, ( "reduce the number of processes as there " "aren't enough number of scenes") scene_split_size = int(np.floor(len(scenes) / args.num_processes)) for i in range(args.num_processes): config_env = cfg_env(config_file=args.task_config) config_env.defrost() if len(scenes) > 0: config_env.DATASET.POINTNAVV1.CONTENT_SCENES = scenes[ i * scene_split_size:(i + 1) * scene_split_size] config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = args.sim_gpu_id agent_sensors = args.sensors.strip().split(",") for sensor in agent_sensors: assert sensor in ["RGB_SENSOR", "DEPTH_SENSOR"] config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors config_env.freeze() env_configs.append(config_env) config_baseline = cfg_baseline() baseline_configs.append(config_baseline) logger.info("config_env: {}".format(config_env)) envs = habitat.VectorEnv( make_env_fn=make_env_fn, env_fn_args=tuple( tuple(zip(env_configs, baseline_configs, range(args.num_processes)))), ) return envs
def test_rl_vectorized_envs(gpu2gpu): import habitat_sim if gpu2gpu and not habitat_sim.cuda_enabled: pytest.skip("GPU-GPU requires CUDA") configs, datasets = _load_test_data() for config in configs: config.defrost() config.SIMULATOR.HABITAT_SIM_V0.GPU_GPU = gpu2gpu config.freeze() num_envs = len(configs) env_fn_args = tuple(zip(configs, datasets, range(num_envs))) with habitat.VectorEnv( make_env_fn=make_rl_env, env_fn_args=env_fn_args ) as envs: envs.reset() for i in range(2 * configs[0].ENVIRONMENT.MAX_EPISODE_STEPS): outputs = envs.step( sample_non_stop_action(envs.action_spaces[0], num_envs) ) observations, rewards, dones, infos = [ list(x) for x in zip(*outputs) ] assert len(observations) == num_envs assert len(rewards) == num_envs assert len(dones) == num_envs assert len(infos) == num_envs tiled_img = envs.render(mode="rgb_array") new_height = int(np.ceil(np.sqrt(NUM_ENVS))) new_width = int(np.ceil(float(NUM_ENVS) / new_height)) print(f"observations: {observations}") h, w, c = observations[0]["rgb"].shape assert tiled_img.shape == ( h * new_height, w * new_width, c, ), "vector env render is broken" if (i + 1) % configs[0].ENVIRONMENT.MAX_EPISODE_STEPS == 0: assert all( dones ), "dones should be true after max_episode steps"
def make_task_envs(env_types, nav_configs, nav_datasets, shell_args): data_keys = list(nav_datasets.keys()) nav_datasets = [{key: nav_datasets[key][ii] for key in data_keys} for ii in range(len(nav_datasets[data_keys[0]]))] env_fn_args: Tuple[Tuple] = tuple( zip(env_types, nav_configs, nav_datasets, range(shell_args.seed, shell_args.seed + len(nav_configs)))) if shell_args.use_multithreading: envs = habitat.ThreadedVectorEnv(make_env_fn, env_fn_args) else: envs = habitat.VectorEnv(make_env_fn, env_fn_args, multiprocessing_start_method="forkserver") envs = HabitatVecEnvWrapper(envs) return envs
def _vec_env_test_fn(configs, datasets, multiprocessing_start_method, gpu2gpu): num_envs = len(configs) for cfg in configs: cfg.defrost() cfg.SIMULATOR.HABITAT_SIM_V0.GPU_GPU = gpu2gpu cfg.freeze() env_fn_args = tuple(zip(configs, datasets, range(num_envs))) envs = habitat.VectorEnv( env_fn_args=env_fn_args, multiprocessing_start_method=multiprocessing_start_method, ) envs.reset() for _ in range(2 * configs[0].ENVIRONMENT.MAX_EPISODE_STEPS): observations = envs.step( sample_non_stop_action(envs.action_spaces[0], num_envs)) assert len(observations) == num_envs
def test_vec_env_call_func(): configs, datasets = _load_test_data() num_envs = len(configs) env_fn_args = tuple(zip(configs, datasets, range(num_envs))) true_env_ids = list(range(num_envs)) envs = habitat.VectorEnv( make_env_fn=_make_dummy_env_func, env_fn_args=env_fn_args, multiprocessing_start_method="forkserver", ) envs.reset() env_ids = envs.call(["get_env_ind"] * num_envs) assert env_ids == true_env_ids env_id = envs.call_at(1, "get_env_ind") assert env_id == true_env_ids[1] envs.call_at(2, "set_env_ind", {"new_env_ind": 20}) true_env_ids[2] = 20 env_ids = envs.call(["get_env_ind"] * num_envs) assert env_ids == true_env_ids envs.call_at(2, "set_env_ind", {"new_env_ind": 2}) true_env_ids[2] = 2 env_ids = envs.call(["get_env_ind"] * num_envs) assert env_ids == true_env_ids envs.pause_at(0) true_env_ids.pop(0) env_ids = envs.call(["get_env_ind"] * num_envs) assert env_ids == true_env_ids envs.pause_at(0) true_env_ids.pop(0) env_ids = envs.call(["get_env_ind"] * num_envs) assert env_ids == true_env_ids envs.resume_all() env_ids = envs.call(["get_env_ind"] * num_envs) assert env_ids == list(range(num_envs)) envs.close()
def test_rl_vectorized_envs(): configs, datasets = _load_test_data() num_envs = len(configs) env_fn_args = tuple(zip(configs, datasets, range(num_envs))) envs = habitat.VectorEnv(make_env_fn=make_rl_env, env_fn_args=env_fn_args) envs.reset() non_stop_actions = [ k for k, v in SIM_ACTION_TO_NAME.items() if v != SimulatorActions.STOP.value ] for i in range(2 * configs[0].ENVIRONMENT.MAX_EPISODE_STEPS): outputs = envs.step(np.random.choice(non_stop_actions, num_envs)) observations, rewards, dones, infos = [list(x) for x in zip(*outputs)] assert len(observations) == num_envs assert len(rewards) == num_envs assert len(dones) == num_envs assert len(infos) == num_envs if (i + 1) % configs[0].ENVIRONMENT.MAX_EPISODE_STEPS == 0: assert all(dones), "dones should be true after max_episode steps" envs.close()
def construct_envs_habitat( config, env_class, workers_ignore_signals: bool = False, ): r"""Create VectorEnv object with specified config and env class type. To allow better performance, dataset are split into small ones for each individual env, grouped by scenes. :param config: configs that contain num_processes as well as information :param necessary to create individual environments. :param env_class: class type of the envs to be created. :param workers_ignore_signals: Passed to :ref:`habitat.VectorEnv`'s constructor :return: VectorEnv object created according to specification. """ import habitat from habitat import make_dataset from habitat_baselines.utils.env_utils import make_env_fn num_processes = config.NUM_PROCESSES configs = [] env_classes = [env_class for _ in range(num_processes)] dataset = make_dataset(config.TASK_CONFIG.DATASET.TYPE) scenes = config.TASK_CONFIG.DATASET.CONTENT_SCENES if "*" in config.TASK_CONFIG.DATASET.CONTENT_SCENES: scenes = dataset.get_scenes_to_load(config.TASK_CONFIG.DATASET) if num_processes > 1: if len(scenes) == 0: raise RuntimeError( "No scenes to load, multiple process logic relies on being able to split scenes uniquely between processes" ) if len(scenes) < num_processes: raise RuntimeError("reduce the number of processes as there " "aren't enough number of scenes") random.shuffle(scenes) scene_splits = [[] for _ in range(num_processes)] for idx, scene in enumerate(scenes): scene_splits[idx % len(scene_splits)].append(scene) assert sum(map(len, scene_splits)) == len(scenes) for i in range(num_processes): proc_config = config.clone() proc_config.defrost() task_config = proc_config.TASK_CONFIG task_config.SEED = task_config.SEED + i if len(scenes) > 0: task_config.DATASET.CONTENT_SCENES = scene_splits[i] task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = config.SIMULATOR_GPU_ID sensors = [] if config.DEPTH: sensors += ["DEPTH_SENSOR"] if config.COLOR: sensors += ["RGB_SENSOR"] task_config.SIMULATOR.AGENT_0.SENSORS = sensors task_config.SIMULATOR.RGB_SENSOR.HEIGHT = config.RESOLUTION[1] task_config.SIMULATOR.RGB_SENSOR.WIDTH = config.RESOLUTION[0] task_config.SIMULATOR.DEPTH_SENSOR.HEIGHT = config.RESOLUTION[1] task_config.SIMULATOR.DEPTH_SENSOR.WIDTH = config.RESOLUTION[0] task_config.SIMULATOR.DEPTH_SENSOR.NORMALIZE_DEPTH = False task_config.SIMULATOR.DEPTH_SENSOR.MAX_DEPTH = 20.0 proc_config.freeze() configs.append(proc_config) envs = habitat.VectorEnv( make_env_fn=make_env_fn, env_fn_args=tuple(tuple(zip(configs, env_classes))), workers_ignore_signals=workers_ignore_signals, ) return envs
def __init__(self, split, gpu_id, envs_processed, envs_to_process, opts, vectorize=False, seed=0) -> None: # self.vectorize = vectorize self.vectorize = False # print("gpu_id", gpu_id) resolution = opts.W if opts.no_sem_images and opts.no_txt_semantic and opts.no_binary_semantic: sensors = ["RGB_SENSOR", "DEPTH_SENSOR"] else: sensors = ["RGB_SENSOR", "DEPTH_SENSOR", "SEMANTIC_SENSOR"] if split == "train": data_path = opts.train_data_path elif split == "val": data_path = opts.val_data_path elif split == "test": data_path = opts.test_data_path else: raise Exception("Invalid split") unique_dataset_name = opts.dataset self.num_parallel_envs = 1 self.images_before_reset = opts.images_before_reset config = make_config( opts.config, gpu_id, split, data_path, sensors, resolution, opts.scenes_dir, ) data_dir = os.path.join( "./util/scripts/mp3d_data_gen_deps/scene_episodes", unique_dataset_name + "_" + split ) self.dataset_name = config.DATASET.TYPE # print(data_dir) if not os.path.exists(data_dir): os.makedirs(data_dir) data_path = os.path.join(data_dir, "dataset_one_ep_per_scene.json.gz") # Creates a dataset where each episode is a random spawn point in each scene. if not (os.path.exists(data_path)): print("Creating dataset...", flush=True) dataset = make_dataset(config.DATASET.TYPE, config=config.DATASET) # Get one episode per scene in dataset scene_episodes = {} for episode in tqdm.tqdm(dataset.episodes): if episode.scene_id not in scene_episodes: scene_episodes[episode.scene_id] = episode scene_episodes = list(scene_episodes.values()) dataset.episodes = scene_episodes if not os.path.exists(data_path): # Multiproc do check again before write. json = dataset.to_json().encode("utf-8") with gzip.GzipFile(data_path, "w") as fout: fout.write(json) print("Finished dataset...", flush=True) # Load in data and update the location to the proper location (else # get a weird, uninformative, error -- Affine2Dtransform()) dataset = mp3d_dataset.PointNavDatasetV1() with gzip.open(data_path, "rt") as f: dataset.from_json(f.read()) envs = [] for i in range(0, len(dataset.episodes)): scene_id = dataset.episodes[i].scene_id.split("/")[-2] # Check if scene already processed if scene_id not in envs_processed: # Check if user wants to process this scene (if no scene is specified then ignore this condition) if len(envs_to_process) == 0 or scene_id in envs_to_process: dataset.episodes[i].scene_id = dataset.episodes[i].scene_id.replace( '/checkpoint/erikwijmans/data/mp3d/', opts.scenes_dir + '/mp3d/') envs.append(dataset.episodes[i]) dataset.episodes = envs config.TASK.SENSORS = ["POINTGOAL_SENSOR"] config.freeze() self.rng = np.random.RandomState(seed) # Now look at vector environments if self.vectorize: configs, datasets = _load_datasets( ( opts.config, gpu_id, split, data_path, sensors, resolution, opts.scenes_dir, ), dataset, data_path, opts.scenes_dir + '/mp3d/', num_workers=self.num_parallel_envs, ) num_envs = len(configs) env_fn_args = tuple(zip(configs, datasets, range(num_envs))) envs = habitat.VectorEnv( env_fn_args=env_fn_args, multiprocessing_start_method="forkserver", ) self.env = envs self.num_train_envs = int(0.9 * (self.num_parallel_envs)) self.num_val_envs = self.num_parallel_envs - self.num_train_envs else: self.env = habitat.Env(config=config, dataset=dataset) # TODO: End randomization here self.env_sim = self.env.sim self.rng.shuffle(self.env.episodes) self.env_sim = self.env.sim self.num_samples = 0 # Set up intrinsic parameters self.hfov = config.SIMULATOR.DEPTH_SENSOR.HFOV * np.pi / 180.0 self.W = resolution self.K = np.array( [ [1.0 / np.tan(self.hfov / 2.0), 0.0, 0.0, 0.0], [0, 1.0 / np.tan(self.hfov / 2.0), 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 0.0, 1.0], ], dtype=np.float32, ) self.invK = np.linalg.inv(self.K) self.config = config self.opts = opts if self.opts.normalize_image: self.transform = transforms.Compose( [ transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]), ] ) # Using same normalization as BigGan else: self.transform = transforms.ToTensor()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model-path", type=str, required=True) parser.add_argument("--sim-gpu-id", type=int, required=True) parser.add_argument("--pth-gpu-id", type=int, required=True) parser.add_argument("--num-processes", type=int, required=True) parser.add_argument("--hidden-size", type=int, default=512) parser.add_argument("--count-test-episodes", type=int, default=100) parser.add_argument( "--sensors", type=str, default="RGB_SENSOR,DEPTH_SENSOR", help="comma separated string containing different" "sensors to use, currently 'RGB_SENSOR' and" "'DEPTH_SENSOR' are supported", ) parser.add_argument( "--task-config", type=str, default="configs/tasks/pointnav.yaml", help="path to config yaml containing information about task", ) args = parser.parse_args() device = torch.device("cuda:{}".format(args.pth_gpu_id)) env_configs = [] baseline_configs = [] for _ in range(args.num_processes): config_env = get_config(config_paths=args.task_config) config_env.defrost() config_env.DATASET.SPLIT = "val" agent_sensors = args.sensors.strip().split(",") for sensor in agent_sensors: assert sensor in ["RGB_SENSOR", "DEPTH_SENSOR"] config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors config_env.freeze() env_configs.append(config_env) config_baseline = cfg_baseline() baseline_configs.append(config_baseline) assert len(baseline_configs) > 0, "empty list of datasets" envs = habitat.VectorEnv( make_env_fn=make_env_fn, env_fn_args=tuple( tuple(zip(env_configs, baseline_configs, range(args.num_processes)))), ) ckpt = torch.load(args.model_path, map_location=device) actor_critic = Policy( observation_space=envs.observation_spaces[0], action_space=envs.action_spaces[0], hidden_size=512, goal_sensor_uuid=env_configs[0].TASK.GOAL_SENSOR_UUID, ) actor_critic.to(device) ppo = PPO( actor_critic=actor_critic, clip_param=0.1, ppo_epoch=4, num_mini_batch=32, value_loss_coef=0.5, entropy_coef=0.01, lr=2.5e-4, eps=1e-5, max_grad_norm=0.5, ) ppo.load_state_dict(ckpt["state_dict"]) actor_critic = ppo.actor_critic observations = envs.reset() batch = batch_obs(observations) for sensor in batch: batch[sensor] = batch[sensor].to(device) episode_rewards = torch.zeros(envs.num_envs, 1, device=device) episode_spls = torch.zeros(envs.num_envs, 1, device=device) episode_success = torch.zeros(envs.num_envs, 1, device=device) episode_counts = torch.zeros(envs.num_envs, 1, device=device) current_episode_reward = torch.zeros(envs.num_envs, 1, device=device) test_recurrent_hidden_states = torch.zeros(args.num_processes, args.hidden_size, device=device) not_done_masks = torch.zeros(args.num_processes, 1, device=device) while episode_counts.sum() < args.count_test_episodes: with torch.no_grad(): _, actions, _, test_recurrent_hidden_states = actor_critic.act( batch, test_recurrent_hidden_states, not_done_masks, deterministic=False, ) outputs = envs.step([a[0].item() for a in actions]) observations, rewards, dones, infos = [list(x) for x in zip(*outputs)] batch = batch_obs(observations) for sensor in batch: batch[sensor] = batch[sensor].to(device) not_done_masks = torch.tensor( [[0.0] if done else [1.0] for done in dones], dtype=torch.float, device=device, ) for i in range(not_done_masks.shape[0]): if not_done_masks[i].item() == 0: episode_spls[i] += infos[i]["roomnavmetric"] if infos[i]["roomnavmetric"] > 0: episode_success[i] += 1 rewards = torch.tensor(rewards, dtype=torch.float, device=device).unsqueeze(1) current_episode_reward += rewards episode_rewards += (1 - not_done_masks) * current_episode_reward episode_counts += 1 - not_done_masks current_episode_reward *= not_done_masks episode_reward_mean = (episode_rewards / episode_counts).mean().item() episode_spl_mean = (episode_spls / episode_counts).mean().item() episode_success_mean = (episode_success / episode_counts).mean().item() print("Average episode reward: {:.6f}".format(episode_reward_mean)) print("Average episode success: {:.6f}".format(episode_success_mean)) print("Average episode spl: {:.6f}".format(episode_spl_mean))
def construct_envs( config: Config, env_class: Union[Type[Env], Type[RLEnv]], workers_ignore_signals: bool = False, ) -> VectorEnv: r"""Create VectorEnv object with specified config and env class type. To allow better performance, dataset are split into small ones for each individual env, grouped by scenes. :param config: configs that contain num_environments as well as information :param necessary to create individual environments. :param env_class: class type of the envs to be created. :param workers_ignore_signals: Passed to :ref:`habitat.VectorEnv`'s constructor :return: VectorEnv object created according to specification. """ num_environments = config.NUM_ENVIRONMENTS configs = [] env_classes = [env_class for _ in range(num_environments)] dataset = make_dataset(config.TASK_CONFIG.DATASET.TYPE) scenes = config.TASK_CONFIG.DATASET.CONTENT_SCENES if "*" in config.TASK_CONFIG.DATASET.CONTENT_SCENES: scenes = dataset.get_scenes_to_load(config.TASK_CONFIG.DATASET) if num_environments > 1: if len(scenes) == 0: raise RuntimeError( "No scenes to load, multiple process logic relies on being able to split scenes uniquely between processes" ) if len(scenes) < num_environments: raise RuntimeError( "reduce the number of environments as there " "aren't enough number of scenes.\n" "num_environments: {}\tnum_scenes: {}".format( num_environments, len(scenes) ) ) random.shuffle(scenes) scene_splits: List[List[str]] = [[] for _ in range(num_environments)] for idx, scene in enumerate(scenes): scene_splits[idx % len(scene_splits)].append(scene) assert sum(map(len, scene_splits)) == len(scenes) for i in range(num_environments): proc_config = config.clone() proc_config.defrost() task_config = proc_config.TASK_CONFIG task_config.SEED = task_config.SEED + i if len(scenes) > 0: task_config.DATASET.CONTENT_SCENES = scene_splits[i] task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = ( config.SIMULATOR_GPU_ID ) task_config.SIMULATOR.AGENT_0.SENSORS = config.SENSORS proc_config.freeze() configs.append(proc_config) envs = habitat.VectorEnv( make_env_fn=make_env_fn, env_fn_args=tuple(zip(configs, env_classes)), workers_ignore_signals=workers_ignore_signals, ) return envs
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model-path", type=str, required=True) parser.add_argument("--sim-gpu-id", type=int, required=True) parser.add_argument("--pth-gpu-id", type=int, required=True) parser.add_argument("--num-processes", type=int, required=True) parser.add_argument("--hidden-size", type=int, default=512) parser.add_argument("--count-test-episodes", type=int, default=100) parser.add_argument( "--sensors", type=str, default="DEPTH_SENSOR", help="comma separated string containing different" "sensors to use, currently 'RGB_SENSOR' and" "'DEPTH_SENSOR' are supported", ) parser.add_argument( "--task-config", type=str, default="configs/tasks/pointnav.yaml", help="path to config yaml containing information about task", ) cmd_line_inputs = [ "--model-path", "/home/bruce/NSERC_2019/habitat-api/data/checkpoints/depth.pth", "--sim-gpu-id", "0", "--pth-gpu-id", "0", "--num-processes", "1", "--count-test-episodes", "100", "--task-config", "configs/tasks/pointnav.yaml", ] args = parser.parse_args(cmd_line_inputs) device = torch.device("cuda:{}".format(args.pth_gpu_id)) env_configs = [] baseline_configs = [] for _ in range(args.num_processes): config_env = get_config(config_paths=args.task_config) config_env.defrost() config_env.DATASET.SPLIT = "val" agent_sensors = args.sensors.strip().split(",") for sensor in agent_sensors: assert sensor in ["RGB_SENSOR", "DEPTH_SENSOR"] config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors config_env.freeze() env_configs.append(config_env) config_baseline = cfg_baseline() baseline_configs.append(config_baseline) assert len(baseline_configs) > 0, "empty list of datasets" envs = habitat.VectorEnv( make_env_fn=make_env_fn, env_fn_args=tuple( tuple(zip(env_configs, baseline_configs, range(args.num_processes))) ), ) ckpt = torch.load(args.model_path, map_location=device) actor_critic = Policy( observation_space=envs.observation_spaces[0], action_space=envs.action_spaces[0], hidden_size=512, goal_sensor_uuid="pointgoal", ) actor_critic.to(device) ppo = PPO( actor_critic=actor_critic, clip_param=0.1, ppo_epoch=4, num_mini_batch=32, value_loss_coef=0.5, entropy_coef=0.01, lr=2.5e-4, eps=1e-5, max_grad_norm=0.5, ) ppo.load_state_dict(ckpt["state_dict"]) actor_critic = ppo.actor_critic observations = envs.reset() batch = batch_obs(observations) for sensor in batch: batch[sensor] = batch[sensor].to(device) test_recurrent_hidden_states = torch.zeros( args.num_processes, args.hidden_size, device=device ) not_done_masks = torch.zeros(args.num_processes, 1, device=device) def transform_callback(data): nonlocal actor_critic nonlocal batch nonlocal not_done_masks nonlocal test_recurrent_hidden_states global flag global t_prev_update global observation if flag == 2: observation["depth"] = np.reshape(data.data[0:-2], (256, 256, 1)) observation["pointgoal"] = data.data[-2:] flag = 1 return pointgoal_received = data.data[-2:] translate_amount = 0.25 # meters rotate_amount = 0.174533 # radians isrotated = ( rotate_amount * 0.95 <= abs(pointgoal_received[1] - observation["pointgoal"][1]) <= rotate_amount * 1.05 ) istimeup = (time.time() - t_prev_update) >= 4 # print('istranslated is '+ str(istranslated)) # print('isrotated is '+ str(isrotated)) # print('istimeup is '+ str(istimeup)) if isrotated or istimeup: vel_msg = Twist() vel_msg.linear.x = 0 vel_msg.linear.y = 0 vel_msg.linear.z = 0 vel_msg.angular.x = 0 vel_msg.angular.y = 0 vel_msg.angular.z = 0 pub_vel.publish(vel_msg) time.sleep(0.2) print("entered update step") # cv2.imshow("Depth", observation['depth']) # cv2.waitKey(100) observation["depth"] = np.reshape(data.data[0:-2], (256, 256, 1)) observation["pointgoal"] = data.data[-2:] batch = batch_obs([observation]) for sensor in batch: batch[sensor] = batch[sensor].to(device) if flag == 1: not_done_masks = torch.tensor([0.0], dtype=torch.float, device=device) flag = 0 else: not_done_masks = torch.tensor([1.0], dtype=torch.float, device=device) _, actions, _, test_recurrent_hidden_states = actor_critic.act( batch, test_recurrent_hidden_states, not_done_masks, deterministic=True ) action_id = actions.item() print( "observation received to produce action_id is " + str(observation["pointgoal"]) ) print("action_id from net is " + str(actions.item())) t_prev_update = time.time() vel_msg = Twist() vel_msg.linear.x = 0 vel_msg.linear.y = 0 vel_msg.linear.z = 0 vel_msg.angular.x = 0 vel_msg.angular.y = 0 vel_msg.angular.z = 0 if action_id == 0: vel_msg.linear.x = 0.25 / 4 pub_vel.publish(vel_msg) elif action_id == 1: vel_msg.angular.z = 10 / 180 * 3.1415926 pub_vel.publish(vel_msg) elif action_id == 2: vel_msg.angular.z = -10 / 180 * 3.1415926 pub_vel.publish(vel_msg) else: pub_vel.publish(vel_msg) sub.unregister() print("NN finished navigation task") sub = rospy.Subscriber( "depth_and_pointgoal", numpy_msg(Floats), transform_callback, queue_size=1 ) rospy.spin()
def eval_checkpoint(checkpoint_path, args, writer, cur_ckpt_idx=0): env_configs = [] baseline_configs = [] device = torch.device("cuda", args.pth_gpu_id) for _ in range(args.num_processes): config_env = get_config(config_paths=args.task_config) config_env.defrost() config_env.DATASET.SPLIT = "val" agent_sensors = args.sensors.strip().split(",") for sensor in agent_sensors: assert sensor in ["RGB_SENSOR", "DEPTH_SENSOR"] config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors if args.video_option: config_env.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config_env.TASK.MEASUREMENTS.append("COLLISIONS") config_env.freeze() env_configs.append(config_env) config_baseline = cfg_baseline() baseline_configs.append(config_baseline) assert len(baseline_configs) > 0, "empty list of datasets" envs = habitat.VectorEnv( make_env_fn=make_env_fn, env_fn_args=tuple( tuple(zip(env_configs, baseline_configs, range(args.num_processes)))), ) ckpt = torch.load(checkpoint_path, map_location=device) actor_critic = Policy( observation_space=envs.observation_spaces[0], action_space=envs.action_spaces[0], hidden_size=512, goal_sensor_uuid=env_configs[0].TASK.GOAL_SENSOR_UUID, ) actor_critic.to(device) ppo = PPO( actor_critic=actor_critic, clip_param=0.1, ppo_epoch=4, num_mini_batch=32, value_loss_coef=0.5, entropy_coef=0.01, lr=2.5e-4, eps=1e-5, max_grad_norm=0.5, ) ppo.load_state_dict(ckpt["state_dict"]) actor_critic = ppo.actor_critic observations = envs.reset() batch = batch_obs(observations) for sensor in batch: batch[sensor] = batch[sensor].to(device) current_episode_reward = torch.zeros(envs.num_envs, 1, device=device) test_recurrent_hidden_states = torch.zeros(args.num_processes, args.hidden_size, device=device) not_done_masks = torch.zeros(args.num_processes, 1, device=device) stats_episodes = dict() # dict of dicts that stores stats per episode while episode_counts.sum() < args.count_test_episodes: # test_recurrent_hidden_states_list.append(test_recurrent_hidden_states) # pickle_out = open("hab_recurrent_states.pickle","wb") # pickle.dump(test_recurrent_hidden_states_list, pickle_out) # pickle_out.close() # obs_list.append(observations[0]) # pickle_out = open("hab_obs_list.pickle","wb") # pickle.dump(obs_list, pickle_out) # pickle_out.close() # mask_list.append(not_done_masks) # pickle_out = open("hab_mask_list.pickle","wb") # pickle.dump(mask_list, pickle_out) # pickle_out.close() with torch.no_grad(): _, actions, _, test_recurrent_hidden_states = actor_critic.act( batch, test_recurrent_hidden_states, not_done_masks, deterministic=True, ) print("action_id is " + str(actions.item())) print('point goal is ' + str(observations[0]['pointgoal'])) outputs = envs.step([a[0].item() for a in actions]) observations, rewards, dones, infos = [list(x) for x in zip(*outputs)] #for visualizing where robot is going #cv2.imshow("RGB", transform_rgb_bgr(observations[0]["rgb"])) cv2.imshow("Depth", observations[0]["depth"]) cv2.waitKey(100) time.sleep(0.2) batch = batch_obs(observations) for sensor in batch: batch[sensor] = batch[sensor].to(device) not_done_masks = torch.tensor( [[0.0] if done else [1.0] for done in dones], dtype=torch.float, device=device, ) rewards = torch.tensor(rewards, dtype=torch.float, device=device).unsqueeze(1) current_episode_reward += rewards next_episodes = envs.current_episodes() envs_to_pause = [] n_envs = envs.num_envs for i in range(n_envs): if ( next_episodes[i].scene_id, next_episodes[i].episode_id, ) in stats_episodes: envs_to_pause.append(i) # episode ended if not_done_masks[i].item() == 0: episode_stats = dict() episode_stats["spl"] = infos[i]["spl"] episode_stats["success"] = int(infos[i]["spl"] > 0) episode_stats["reward"] = current_episode_reward[i].item() current_episode_reward[i] = 0 # use scene_id + episode_id as unique id for storing stats stats_episodes[( current_episodes[i].scene_id, current_episodes[i].episode_id, )] = episode_stats if args.video_option: generate_video( args, rgb_frames[i], current_episodes[i].episode_id, cur_ckpt_idx, infos[i]["spl"], writer, ) rgb_frames[i] = [] # episode continues elif args.video_option: frame = observations_to_image(observations[i], infos[i]) rgb_frames[i].append(frame) # pausing envs with no new episode if len(envs_to_pause) > 0: state_index = list(range(envs.num_envs)) for idx in reversed(envs_to_pause): state_index.pop(idx) envs.pause_at(idx) # indexing along the batch dimensions test_recurrent_hidden_states = test_recurrent_hidden_states[ state_index] not_done_masks = not_done_masks[state_index] current_episode_reward = current_episode_reward[state_index] for k, v in batch.items(): batch[k] = v[state_index] if args.video_option: rgb_frames = [rgb_frames[i] for i in state_index] aggregated_stats = dict() for stat_key in next(iter(stats_episodes.values())).keys(): aggregated_stats[stat_key] = sum( [v[stat_key] for v in stats_episodes.values()]) num_episodes = len(stats_episodes) episode_reward_mean = aggregated_stats["reward"] / num_episodes episode_spl_mean = aggregated_stats["spl"] / num_episodes episode_success_mean = aggregated_stats["success"] / num_episodes logger.info("Average episode reward: {:.6f}".format(episode_reward_mean)) logger.info("Average episode success: {:.6f}".format(episode_success_mean)) logger.info("Average episode SPL: {:.6f}".format(episode_spl_mean)) writer.add_scalars("eval_reward", {"average reward": episode_reward_mean}, cur_ckpt_idx) writer.add_scalars("eval_SPL", {"average SPL": episode_spl_mean}, cur_ckpt_idx) writer.add_scalars("eval_success", {"average success": episode_success_mean}, cur_ckpt_idx)
def construct_envs(config: Config, env_class: Type[Union[Env, RLEnv]]) -> VectorEnv: r"""Create VectorEnv object with specified config and env class type. To allow better performance, dataset are split into small ones for each individual env, grouped by scenes. Args: config: configs that contain num_processes as well as information necessary to create individual environments. env_class: class type of the envs to be created. Returns: VectorEnv object created according to specification. """ num_processes = config.NUM_PROCESSES configs = [] env_classes = [env_class for _ in range(num_processes)] dataset = make_dataset(config.TASK_CONFIG.DATASET.TYPE) scenes = config.TASK_CONFIG.DATASET.CONTENT_SCENES if "*" in config.TASK_CONFIG.DATASET.CONTENT_SCENES: scenes = dataset.get_scenes_to_load(config.TASK_CONFIG.DATASET) print("************************************* scenes lens:", len(scenes)) print("************************************* num_processes lens:", num_processes) print("************************************* env_classes:", env_class) if num_processes > 1: if len(scenes) == 0: raise RuntimeError( "No scenes to load, multiple process logic relies on being able to split scenes uniquely between processes" ) if len(scenes) < num_processes: raise RuntimeError("reduce the number of processes as there " "aren't enough number of scenes") random.shuffle(scenes) scene_splits = [[] for _ in range(num_processes)] for idx, scene in enumerate(scenes): scene_splits[idx % len(scene_splits)].append(scene) assert sum(map(len, scene_splits)) == len(scenes) for i in range(num_processes): proc_config = config.clone() proc_config.defrost() task_config = proc_config.TASK_CONFIG task_config.SEED = task_config.SEED + i if len(scenes) > 0: task_config.DATASET.CONTENT_SCENES = scene_splits[i] task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = ( config.SIMULATOR_GPU_ID) task_config.SIMULATOR.AGENT_0.SENSORS = config.SENSORS proc_config.freeze() configs.append(proc_config) envs = habitat.VectorEnv( make_env_fn=make_env_fn, env_fn_args=tuple(tuple(zip(configs, env_classes))), ) print("************************************* envs type:", type(envs)) print("************************************* count_episodes:", (envs.count_episodes())) # print("************************************* current_episodes:", (envs.current_episodes())) return envs
agent_sensors = "RGB_SENSOR,DEPTH_SENSOR".strip().split(",") for sensor in agent_sensors: assert sensor in ["RGB_SENSOR", "DEPTH_SENSOR"] config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors config_env.freeze() env_configs.append(config_env) config_baseline = cfg_baseline() baseline_configs.append(config_baseline) assert len(baseline_configs) > 0, "empty list of datasets" envs = habitat.VectorEnv( make_env_fn=make_env_fn, env_fn_args=tuple( tuple( zip(env_configs, baseline_configs, range(1)) ) ), ) ckpt = torch.load("/home/bruce/NSERC_2019/habitat-api/data/checkpoints/ckpt.2.pth", map_location=device) actor_critic = Policy( observation_space=envs.observation_spaces[0], action_space=envs.action_spaces[0], hidden_size=512, ) actor_critic.to(device) ppo = PPO( actor_critic=actor_critic,
def eval_checkpoint(checkpoint_path, args, writer, cur_ckpt_idx=0): env_configs = [] baseline_configs = [] device = torch.device("cuda", args.pth_gpu_id) for _ in range(args.num_processes): config_env = get_config(config_paths=args.task_config) config_env.defrost() config_env.DATASET.SPLIT = "val" agent_sensors = args.sensors.strip().split(",") for sensor in agent_sensors: assert sensor in ["RGB_SENSOR", "DEPTH_SENSOR"] config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors if args.video_option: config_env.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config_env.TASK.MEASUREMENTS.append("COLLISIONS") config_env.freeze() env_configs.append(config_env) config_baseline = cfg_baseline() baseline_configs.append(config_baseline) assert len(baseline_configs) > 0, "empty list of datasets" envs = habitat.VectorEnv( make_env_fn=make_env_fn, env_fn_args=tuple( tuple( zip(env_configs, baseline_configs, range(args.num_processes)) ) ), ) ckpt = torch.load(checkpoint_path, map_location=device) actor_critic = Policy( observation_space=envs.observation_spaces[0], action_space=envs.action_spaces[0], hidden_size=512, goal_sensor_uuid=env_configs[0].TASK.GOAL_SENSOR_UUID, ) actor_critic.to(device) ppo = PPO( actor_critic=actor_critic, clip_param=0.1, ppo_epoch=4, num_mini_batch=32, value_loss_coef=0.5, entropy_coef=0.01, lr=2.5e-4, eps=1e-5, max_grad_norm=0.5, ) ppo.load_state_dict(ckpt["state_dict"]) actor_critic = ppo.actor_critic observations = envs.reset() batch = batch_obs(observations) for sensor in batch: batch[sensor] = batch[sensor].to(device) episode_rewards = torch.zeros(envs.num_envs, 1, device=device) episode_spls = torch.zeros(envs.num_envs, 1, device=device) episode_success = torch.zeros(envs.num_envs, 1, device=device) episode_counts = torch.zeros(envs.num_envs, 1, device=device) current_episode_reward = torch.zeros(envs.num_envs, 1, device=device) test_recurrent_hidden_states = torch.zeros( args.num_processes, args.hidden_size, device=device ) not_done_masks = torch.zeros(args.num_processes, 1, device=device) stats_episodes = set() rgb_frames = None if args.video_option: rgb_frames = [[]] * args.num_processes os.makedirs(args.video_dir, exist_ok=True) while episode_counts.sum() < args.count_test_episodes: current_episodes = envs.current_episodes() with torch.no_grad(): _, actions, _, test_recurrent_hidden_states = actor_critic.act( batch, test_recurrent_hidden_states, not_done_masks, deterministic=False, ) outputs = envs.step([a[0].item() for a in actions]) observations, rewards, dones, infos = [list(x) for x in zip(*outputs)] batch = batch_obs(observations) for sensor in batch: batch[sensor] = batch[sensor].to(device) not_done_masks = torch.tensor( [[0.0] if done else [1.0] for done in dones], dtype=torch.float, device=device, ) for i in range(not_done_masks.shape[0]): if not_done_masks[i].item() == 0: episode_spls[i] += infos[i]["spl"] if infos[i]["spl"] > 0: episode_success[i] += 1 rewards = torch.tensor( rewards, dtype=torch.float, device=device ).unsqueeze(1) current_episode_reward += rewards episode_rewards += (1 - not_done_masks) * current_episode_reward episode_counts += 1 - not_done_masks current_episode_reward *= not_done_masks next_episodes = envs.current_episodes() envs_to_pause = [] n_envs = envs.num_envs for i in range(n_envs): if next_episodes[i].episode_id in stats_episodes: envs_to_pause.append(i) # episode ended if not_done_masks[i].item() == 0: stats_episodes.add(current_episodes[i].episode_id) if args.video_option: generate_video( args, rgb_frames[i], current_episodes[i].episode_id, cur_ckpt_idx, infos[i]["spl"], writer, ) rgb_frames[i] = [] # episode continues elif args.video_option: frame = observations_to_image(observations[i], infos[i]) rgb_frames[i].append(frame) # stop tracking ended episodes if they exist if len(envs_to_pause) > 0: state_index = list(range(envs.num_envs)) for idx in reversed(envs_to_pause): state_index.pop(idx) envs.pause_at(idx) # indexing along the batch dimensions test_recurrent_hidden_states = test_recurrent_hidden_states[ :, state_index ] not_done_masks = not_done_masks[state_index] current_episode_reward = current_episode_reward[state_index] for k, v in batch.items(): batch[k] = v[state_index] if args.video_option: rgb_frames = [rgb_frames[i] for i in state_index] episode_reward_mean = (episode_rewards / episode_counts).mean().item() episode_spl_mean = (episode_spls / episode_counts).mean().item() episode_success_mean = (episode_success / episode_counts).mean().item() logger.info("Average episode reward: {:.6f}".format(episode_reward_mean)) logger.info("Average episode success: {:.6f}".format(episode_success_mean)) logger.info("Average episode SPL: {:.6f}".format(episode_spl_mean)) writer.add_scalars( "eval_reward", {"average reward": episode_reward_mean}, cur_ckpt_idx ) writer.add_scalars( "eval_SPL", {"average SPL": episode_spl_mean}, cur_ckpt_idx ) writer.add_scalars( "eval_success", {"average success": episode_success_mean}, cur_ckpt_idx )
def construct_envs(config: Config, env_class: Type[Union[Env, RLEnv]], auto_reset_done: bool = True) -> VectorEnv: r"""Create VectorEnv object with specified config and env class type. To allow better performance, dataset are split into small ones for each individual env, grouped by scenes. Args: config: configs that contain num_processes as well as information necessary to create individual environments. env_class: class type of the envs to be created. auto_reset_done: Whether or not to automatically reset the env on done Returns: VectorEnv object created according to specification. """ num_processes = config.NUM_PROCESSES configs = [] env_classes = [env_class for _ in range(num_processes)] dataset = make_dataset(config.TASK_CONFIG.DATASET.TYPE) scenes = dataset.get_scenes_to_load(config.TASK_CONFIG.DATASET) if num_processes > 1: if len(scenes) == 0: raise RuntimeError( "No scenes to load, multiple process logic relies on being able to split scenes uniquely between processes" ) if len(scenes) < num_processes: raise RuntimeError("reduce the number of processes as there " "aren't enough number of scenes") random.shuffle(scenes) scene_splits = [[] for _ in range(num_processes)] for idx, scene in enumerate(scenes): scene_splits[idx % len(scene_splits)].append(scene) assert sum(map(len, scene_splits)) == len(scenes) for i in range(num_processes): new_config = config.clone() task_config = new_config.TASK_CONFIG.clone() task_config.defrost() if len(scenes) > 0: task_config.DATASET.CONTENT_SCENES = scene_splits[i] task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = ( config.SIMULATOR_GPU_ID[i % len(config.SIMULATOR_GPU_ID)]) task_config.SIMULATOR.AGENT_0.SENSORS = config.SENSORS task_config.freeze() new_config.defrost() new_config.TASK_CONFIG = task_config new_config.freeze() configs.append(new_config) # for i in range(num_processes): # proc_config = config.clone() # proc_config.defrost() # task_config = proc_config.TASK_CONFIG # if len(scenes) > 0: # task_config.DATASET.CONTENT_SCENES = scene_splits[i] # task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = ( # config.SIMULATOR_GPU_ID[i % len(config.SIMULATOR_GPU_ID)] # ) # # task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = config.SIMULATOR_GPU_ID # task_config.SIMULATOR.AGENT_0.SENSORS = config.SENSORS # proc_config.freeze() # configs.append(proc_config) for config in configs: logger.info( f"[construct_envs] Using GPU ID {config.TASK_CONFIG.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID}" ) envs = habitat.VectorEnv( make_env_fn=make_env_fn, env_fn_args=tuple(tuple(zip(configs, env_classes))), auto_reset_done=auto_reset_done, ) return envs
def construct_envs(config: Config, env_class: Type) -> VectorEnv: r""" Create VectorEnv object with specified config and env class type. To allow better performance, dataset are split into small ones for each individual env, grouped by scenes. Args: config: configs that contain num_processes as well as information necessary to create individual environments. env_class: class type of the envs to be created. Returns: VectorEnv object created according to specification. """ trainer_config = config.TRAINER.RL.PPO rl_env_config = config.TRAINER.RL task_config = config.TASK_CONFIG # excluding trainer-specific configs env_configs, rl_env_configs = [], [] env_classes = [env_class for _ in range(trainer_config.num_processes)] dataset = make_dataset(task_config.DATASET.TYPE) scenes = dataset.get_scenes_to_load(task_config.DATASET) if len(scenes) > 0: random.shuffle(scenes) assert len(scenes) >= trainer_config.num_processes, ( "reduce the number of processes as there " "aren't enough number of scenes") scene_splits = [[] for _ in range(trainer_config.num_processes)] for idx, scene in enumerate(scenes): scene_splits[idx % len(scene_splits)].append(scene) assert sum(map(len, scene_splits)) == len(scenes) for i in range(trainer_config.num_processes): env_config = task_config.clone() env_config.defrost() if len(scenes) > 0: env_config.DATASET.CONTENT_SCENES = scene_splits[i] env_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = ( trainer_config.sim_gpu_id) agent_sensors = trainer_config.sensors.strip().split(",") env_config.SIMULATOR.AGENT_0.SENSORS = agent_sensors env_config.freeze() env_configs.append(env_config) rl_env_configs.append(rl_env_config) envs = habitat.VectorEnv( make_env_fn=make_env_fn, env_fn_args=tuple( tuple( zip( env_configs, rl_env_configs, env_classes, range(trainer_config.num_processes), ))), ) return envs