def __init__(self): super().__init__() self.ENV_ARGS["renderDepthImage"] = True DepthSensorRoboThor( height=self.SCREEN_SIZE, width=self.SCREEN_SIZE, use_resnet_normalization=True, uuid="depth_lowres", ), GoalObjectTypeThorSensor(object_types=self.TARGET_TYPES,), self.PREPROCESSORS = [ Builder( ResnetPreProcessorHabitat, { "input_height": self.SCREEN_SIZE, "input_width": self.SCREEN_SIZE, "output_width": 7, "output_height": 7, "output_dims": 512, "pool": False, "torchvision_resnet_model": models.resnet18, "input_uuids": ["depth_lowres"], "output_uuid": "depth_resnet", "parallel": False, # TODO False for debugging }, ), ] self.OBSERVATIONS = [ "depth_resnet", "goal_object_type_ind", ]
def __init__(self): super().__init__() self.SENSORS = [ RGBSensorThor( height=self.SCREEN_SIZE, width=self.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GoalObjectTypeThorSensor(object_types=self.TARGET_TYPES, ), ] self.PREPROCESSORS = [ Builder( ResnetPreProcessorHabitat, { "input_height": self.SCREEN_SIZE, "input_width": self.SCREEN_SIZE, "output_width": 7, "output_height": 7, "output_dims": 512, "pool": False, "torchvision_resnet_model": models.resnet18, "input_uuids": ["rgb_lowres"], "output_uuid": "rgb_resnet", "parallel": False, }, ), ] self.OBSERVATIONS = [ "rgb_resnet", "goal_object_type_ind", ]
class ObjectNavRoboThorRGBPPOExperimentConfig( ObjectNavRoboThorBaseConfig, ObjectNavMixInPPOConfig, ObjectNavMixInResNetGRUConfig ): """An Object Navigation experiment configuration in RoboThor with RGBD input.""" SENSORS = [ RGBSensorThor( height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), DepthSensorThor( height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, use_normalization=True, uuid="depth_lowres", ), GoalObjectTypeThorSensor( object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES, ), ] @classmethod def tag(cls): return "Objectnav-RoboTHOR-RGBD-ResNetGRU-DDPPO"
class ObjectNaviThorRGBDAggerExperimentConfig( ObjectNavRoboThorBaseConfig, ObjectNavMixInDAggerConfig, ObjectNavMixInResNetGRUConfig, ): """An Object Navigation experiment configuration in RoboThor with RGB input.""" SENSORS = [ RGBSensorThor( height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GoalObjectTypeThorSensor( object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES, ), ExpertActionSensor(nactions=len(ObjectNavTask.class_action_names()), ), ] @classmethod def tag(cls): return "Objectnav-RoboTHOR-RGB-ResNetGRU-DAgger"
class ObjectNavThorPPOExperimentConfig(ExperimentConfig): """A simple object navigation experiment in THOR. Training with PPO. """ # A simple setting, train/valid/test are all the same single scene # and we're looking for a single object OBJECT_TYPES = ["Tomato"] TRAIN_SCENES = ["FloorPlan1_physics"] VALID_SCENES = ["FloorPlan1_physics"] TEST_SCENES = ["FloorPlan1_physics"] # Setting up sensors and basic environment details SCREEN_SIZE = 224 SENSORS = [ RGBSensorThor( height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True, ), GoalObjectTypeThorSensor(object_types=OBJECT_TYPES), ] ENV_ARGS = { "player_screen_height": SCREEN_SIZE, "player_screen_width": SCREEN_SIZE, "quality": "Very Low", } MAX_STEPS = 128 ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None VALID_SAMPLES_IN_SCENE = 10 TEST_SAMPLES_IN_SCENE = 100 @classmethod def tag(cls): return "ObjectNavThorPPO" @classmethod def training_pipeline(cls, **kwargs): ppo_steps = int(1e6) lr = 2.5e-4 num_mini_batch = 2 if not torch.cuda.is_available() else 6 update_repeats = 4 num_steps = 128 metric_accumulate_interval = cls.MAX_STEPS * 10 # Log every 10 max length tasks save_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 1.0 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig), }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,), ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), ) @classmethod def machine_params(cls, mode="train", **kwargs): num_gpus = torch.cuda.device_count() has_gpu = num_gpus != 0 if mode == "train": nprocesses = 20 if has_gpu else 4 gpu_ids = [0] if has_gpu else [] elif mode == "valid": nprocesses = 1 gpu_ids = [1 % num_gpus] if has_gpu else [] elif mode == "test": nprocesses = 1 gpu_ids = [0] if has_gpu else [] else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") return {"nprocesses": nprocesses, "gpu_ids": gpu_ids} @classmethod def create_model(cls, **kwargs) -> nn.Module: return ObjectNavBaselineActorCritic( action_space=gym.spaces.Discrete( len(ObjectNaviThorGridTask.class_action_names()) ), observation_space=SensorSuite(cls.SENSORS).observation_spaces, rgb_uuid=cls.SENSORS[0].uuid, depth_uuid=None, goal_sensor_uuid="goal_object_type_ind", hidden_size=512, object_type_embedding_dim=8, ) @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return ObjectNavTaskSampler(**kwargs) @staticmethod def _partition_inds(n: int, num_parts: int): return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype( np.int32 ) def _get_sampler_args_for_scene_split( self, scenes: List[str], process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: if total_processes > len(scenes): # oversample some scenes -> bias if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[: total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind] : inds[process_ind + 1]], "object_types": self.OBJECT_TYPES, "env_args": self.ENV_ARGS, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete( len(ObjectNaviThorGridTask.class_action_names()) ), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, } def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( self.TRAIN_SCENES, process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_period"] = "manual" res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = ( ("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None ) return res def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( self.VALID_SCENES, process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_period"] = self.VALID_SAMPLES_IN_SCENE res["max_tasks"] = self.VALID_SAMPLES_IN_SCENE * len(res["scenes"]) res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = ( ("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None ) return res def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( self.TEST_SCENES, process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_period"] = self.TEST_SAMPLES_IN_SCENE res["max_tasks"] = self.TEST_SAMPLES_IN_SCENE * len(res["scenes"]) res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = ( ("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None ) return res
class ObjectNavBaseConfig(ExperimentConfig, abc.ABC): """An Object Navigation base configuration.""" # TARGET_TYPES = sorted( # [ # "AlarmClock", # "Apple", # "BaseballBat", # "BasketBall", # "Bowl", # "GarbageCan", # "HousePlant", # "Laptop", # "Mug", # # "Remote", # now it's called RemoteControl, so all epsiodes for this object will be random # "SprayBottle", # "Television", # "Vase", # ] # ) # TARGET_TYPES = sorted( # [ # 'AlarmClock', # 'Apple', # 'BasketBall', # 'Mug', # 'Television', # ] # ) TARGET_TYPES = sorted(["Television", "Mug"]) CAMERA_WIDTH = 400 CAMERA_HEIGHT = 300 SCREEN_SIZE = 224 VISION_UUID = "rgb" TARGET_UUID = "goal_object_type_ind" MAX_STEPS = 500 SENSORS = [ RGBSensorThor( height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True, uuid=VISION_UUID, ), GoalObjectTypeThorSensor(object_types=TARGET_TYPES, uuid=TARGET_UUID), ] ENV_ARGS = dict( width=CAMERA_WIDTH, height=CAMERA_HEIGHT, continuousMode=False, applyActionNoise=False, # agentType="stochastic", rotateStepDegrees=90.0, visibilityDistance=0.5, gridSize=0.25, snapToGrid=True, agentMode="bot", # include_private_scenes=True, ) @classmethod def make_sampler_fn(cls, **kwargs): return ObjectNavTaskSampler(**kwargs) @staticmethod def _partition_inds(n: int, num_parts: int): return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(np.int32) def _get_sampler_args_for_scene_split( self, scenes: List[str], process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: if total_processes > len(scenes): if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[:total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind]:inds[process_ind + 1]], "object_types": self.TARGET_TYPES, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(ObjectNavTask.class_action_names())), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": { "step_penalty": -0.01, "goal_success_reward": 10.0, "failed_stop_reward": 0.0, "shaping_weight": 1.0, # applied to the decrease in distance to target }, }