def get_eval_dataset(shell_args, data_subset="val"): if shell_args.dataset == "mp3d": data_path = "data/datasets/pointnav/mp3d/v1/{split}/{split}.json.gz" elif shell_args.dataset == "gibson": data_path = "data/datasets/pointnav/gibson/v1/{split}/{split}.json.gz" else: raise NotImplementedError("No rule for this dataset.") config = get_dataset_config(data_path, data_subset, shell_args.max_episode_length, 0, [], []) dataset = make_dataset(config.DATASET.TYPE, config=config.DATASET) assert len(dataset.episodes) > 0, "empty datasets" return dataset
def setup(self, create_decoder): self.setup_device() render_gpus = [ int(gpu_id.strip()) for gpu_id in self.shell_args.render_gpu_ids.split(",") ] self.configs = [] self.env_types = [] for proc in range(self.shell_args.num_processes): extra_task_sensors = set() extra_agent_sensors = set() if self.shell_args.record_video or self.shell_args.update_encoder_features: extra_agent_sensors.add("DEPTH_SENSOR") if "SEMANTIC_SENSOR" in extra_agent_sensors: extra_task_sensors.append("CLASS_SEGMENTATION_SENSOR") if self.shell_args.dataset == "suncg": data_path = "data/datasets/pointnav/suncg/v1/{split}/{split}.json.gz" elif self.shell_args.dataset == "mp3d": data_path = "data/datasets/pointnav/mp3d/v1/{split}/{split}.json.gz" elif self.shell_args.dataset == "gibson": data_path = "data/datasets/pointnav/gibson/v1/{split}/{split}.json.gz" else: raise NotImplementedError("No rule for this dataset.") config = get_dataset_config( data_path, self.shell_args.data_subset, self.shell_args.max_episode_length, render_gpus[proc % len(render_gpus)], list(extra_task_sensors), list(extra_agent_sensors), ) config.TASK.NUM_EPISODES_BEFORE_JUMP = self.shell_args.num_processes if self.shell_args.blind and not self.shell_args.record_video: config.SIMULATOR.RGB_SENSOR.HEIGHT = 2 config.SIMULATOR.RGB_SENSOR.WIDTH = 2 if self.shell_args.task == "pointnav": config.TASK.SUCCESS_REWARD = 2 config.TASK.SUCCESS_DISTANCE = 0.2 config.TASK.COLLISION_REWARD = 0 config.TASK.ENABLE_STOP_ACTION = False if self.shell_args.task == "pointnav": self.env_types.append(PointnavRLEnv) elif self.shell_args.task == "exploration": config.TASK.GRID_SIZE = 1 assert config.TASK.GRID_SIZE >= config.SIMULATOR.FORWARD_STEP_SIZE config.TASK.NEW_GRID_CELL_REWARD = 0.1 config.TASK.COLLISION_REWARD = 0 # -0.1 config.TASK.RETURN_VISITED_GRID = self.shell_args.record_video config.ENVIRONMENT.MAX_EPISODE_STEPS = 250 config.TASK.TOP_DOWN_MAP.DRAW_SOURCE_AND_TARGET = False self.env_types.append(ExplorationRLEnv) if self.shell_args.dataset == "suncg": config.TASK.NUM_EPISODES_BEFORE_JUMP = 5 else: config.TASK.NUM_EPISODES_BEFORE_JUMP = 5 elif self.shell_args.task == "flee": config.TASK.COLLISION_REWARD = 0 # -0.1 config.ENVIRONMENT.MAX_EPISODE_STEPS = 250 config.TASK.TOP_DOWN_MAP.DRAW_SOURCE_AND_TARGET = False self.env_types.append(RunAwayRLEnv) if self.shell_args.dataset == "suncg": config.TASK.NUM_EPISODES_BEFORE_JUMP = 5 else: config.TASK.NUM_EPISODES_BEFORE_JUMP = 5 else: raise NotImplementedError("Unknown task type") if self.shell_args.record_video: config.TASK.NUM_EPISODES_BEFORE_JUMP = -1 config.TASK.STEP_SIZE = config.SIMULATOR.FORWARD_STEP_SIZE config.TASK.TOP_DOWN_MAP.MAX_EPISODE_STEPS = config.ENVIRONMENT.MAX_EPISODE_STEPS config.TASK.TOP_DOWN_MAP.MAP_RESOLUTION = 1250 config.TASK.OBSERVE_BEST_NEXT_ACTION = self.shell_args.algo == "supervised" self.configs.append(config) if self.shell_args.debug: print("Config\n", self.configs[0]) self.shell_args.cuda = not self.shell_args.no_cuda and torch.cuda.is_available( ) if self.shell_args.blind: decoder_output_info = [] else: decoder_output_info = [("reconstruction", 3), ("depth", 1), ("surface_normals", 3)] if self.shell_args.encoder_network_type == "ShallowVisualEncoder": encoder_type = networks.ShallowVisualEncoder elif self.shell_args.encoder_network_type == "ResNetEncoder": encoder_type = networks.ResNetEncoder else: raise NotImplementedError("Unknown network type.") self.gym_action_space = gym.spaces.discrete.Discrete(len(ACTION_SPACE)) target_vector_size = None if self.shell_args.task == "pointnav": target_vector_size = 2 elif self.shell_args.task == "exploration" or self.shell_args.task == "flee": target_vector_size = 0 self.agent = VisualPolicy( self.gym_action_space, base=networks.RLBaseWithVisualEncoder, base_kwargs=dict( encoder_type=encoder_type, decoder_output_info=decoder_output_info, recurrent=True, end_to_end=self.shell_args.end_to_end, hidden_size=256, target_vector_size=target_vector_size, action_size=len(ACTION_SPACE), gpu_ids=self.torch_devices, create_decoder=create_decoder, blind=self.shell_args.blind, ), ) if self.shell_args.debug: print("actor critic", self.agent) self.agent.to(self.device) self.time_str = misc_util.get_time_str() visual_layers = self.agent.base.visual_encoder.module if self.shell_args.freeze_encoder_features: # Not necessary, but probably lets pytorch be more space efficient. for param in visual_layers.encoder.parameters(): param.requires_grad = False if self.shell_args.freeze_visual_decoder_features: if hasattr(visual_layers, "bridge"): for param in visual_layers.bridge.parameters(): param.requires_grad = False if hasattr(visual_layers, "decoder"): for param in visual_layers.decoder.parameters(): param.requires_grad = False if hasattr(visual_layers, "out"): for param in visual_layers.out.parameters(): param.requires_grad = False if hasattr(visual_layers, "class_pred_layer"): if visual_layers.class_pred_layer is not None: for param in visual_layers.class_pred_layer.parameters(): param.requires_grad = False if self.shell_args.freeze_motion_decoder_features and self.shell_args.freeze_policy_decoder_features: for param in self.agent.base.visual_projection.parameters(): param.requires_grad = False if self.shell_args.freeze_motion_decoder_features: for param in self.agent.base.egomotion_layer.parameters(): param.requires_grad = False for param in self.agent.base.motion_model_layer.parameters(): param.requires_grad = False if self.shell_args.freeze_policy_decoder_features: for param in self.agent.base.gru.parameters(): param.requires_grad = False for param in self.agent.base.rl_layers.parameters(): param.requires_grad = False for param in self.agent.base.critic_linear.parameters(): param.requires_grad = False for param in self.agent.dist.parameters(): param.requires_grad = False if self.shell_args.algo == "ppo": self.optimizer = optimizers.VisualPPO( self.agent, self.shell_args.clip_param, self.shell_args.ppo_epoch, self.shell_args.num_mini_batch, self.shell_args.value_loss_coef, self.shell_args.entropy_coef, lr=self.shell_args.lr, eps=self.shell_args.eps, max_grad_norm=self.shell_args.max_grad_norm, ) elif self.shell_args.algo == "supervised": self.optimizer = optimizers.BehavioralCloningOptimizer( self.agent, self.shell_args.clip_param, self.shell_args.ppo_epoch, self.shell_args.num_mini_batch, self.shell_args.value_loss_coef, self.shell_args.entropy_coef, lr=self.shell_args.lr, eps=self.shell_args.eps, ) else: raise NotImplementedError("No such algorithm") height = self.configs[0].SIMULATOR.RGB_SENSOR.HEIGHT width = self.configs[0].SIMULATOR.RGB_SENSOR.WIDTH self.observation_space = { "pointgoal": ((2, ), np.dtype(np.float32)), "prev_action_one_hot": ((len(ACTION_SPACE), ), np.dtype(np.float32)), } self.compute_surface_normals = self.shell_args.record_video or self.shell_args.update_encoder_features if self.shell_args.algo == "supervised": self.observation_space["best_next_action"] = (( len(ACTION_SPACE), ), np.dtype(np.float32)) if self.shell_args.update_encoder_features: self.observation_space["depth"] = ((1, height, width), np.dtype(np.float32)) if self.compute_surface_normals: self.observation_space["surface_normals"] = ((3, height, width), np.dtype( np.float32)) if not self.shell_args.end_to_end: self.observation_space["visual_encoder_features"] = ( (self.agent.base.num_output_channels, 256 // 2**5, 256 // 2**5), np.dtype(np.float32), ) # Send dummy batch through to allocate memory before vecenv print("Feeding dummy batch") dummy_start = time.time() self.agent.act( { "images": torch.rand(( self.shell_args.num_processes, 3, self.configs[0].SIMULATOR.RGB_SENSOR.HEIGHT, self.configs[0].SIMULATOR.RGB_SENSOR.WIDTH, )).to(self.device), "target_vector": torch.rand(self.shell_args.num_processes, target_vector_size).to(self.device), "prev_action_one_hot": torch.rand(self.shell_args.num_processes, self.gym_action_space.n).to(self.device), }, torch.rand(self.shell_args.num_processes, self.agent.recurrent_hidden_state_size).to(self.device), torch.rand(self.shell_args.num_processes, 1).to(self.device), ) print("Done feeding dummy batch %.3f" % (time.time() - dummy_start)) self.start_iter = 0 self.checkpoint_dir = os.path.join(self.shell_args.log_prefix, self.shell_args.checkpoint_dirname, self.time_str)
top_down_map, map_agent_pos, heading - np.pi / 2, agent_radius_px=top_down_map.shape[0] / 40, ) return top_down_map if dataset == "mp3d": data_path = "data/datasets/pointnav/mp3d/v1/{split}/{split}.json.gz" elif dataset == "gibson": data_path = "data/datasets/pointnav/gibson/v1/{split}/{split}.json.gz" else: raise NotImplementedError("No rule for this dataset.") config = get_dataset_config(data_path, data_subset, max_episode_length, 0, [], []) config.defrost() for sensor in config.SIMULATOR.AGENT_0.SENSORS: config.SIMULATOR[sensor].HEIGHT = RESOLUTION config.SIMULATOR[sensor].WIDTH = RESOLUTION config.TASK.COLLISION_REWARD = 0 # -0.1 config.ENVIRONMENT.MAX_EPISODE_STEPS = 250 config.TASK.TOP_DOWN_MAP.DRAW_SOURCE_AND_TARGET = False config.TASK.NUM_EPISODES_BEFORE_JUMP = -1 config.TASK.GRID_SIZE = 1 config.TASK.NEW_GRID_CELL_REWARD = 1 config.TASK.RETURN_VISITED_GRID = False config.freeze() dataset = make_dataset(config.DATASET.TYPE, config=config.DATASET)