class TwoPhaseRGBPPOWalkthroughILUnshuffleExperimentConfig( TwoPhaseRGBBaseExperimentConfig ): SENSORS = [ *TwoPhaseRGBBaseExperimentConfig.SENSORS, ExpertActionSensor(len(RearrangeBaseExperimentConfig.actions())), ] USE_RESNET_CNN = False IL_PIPELINE_TYPE: str = "40proc-longtf" @classmethod def tag(cls) -> str: return f"TwoPhaseRGBPPOWalkthroughILUnshuffle_{cls.IL_PIPELINE_TYPE}" @classmethod def num_train_processes(cls) -> int: return cls._use_label_to_get_training_params()["num_train_processes"] @classmethod def _training_pipeline_info(cls) -> Dict[str, Any]: """Define how the model trains.""" training_steps = cls.TRAINING_STEPS il_params = cls._use_label_to_get_training_params() bc_tf1_steps = il_params["bc_tf1_steps"] dagger_steps = il_params["dagger_steps"] return dict( named_losses=dict( walkthrough_ppo_loss=MaskedPPO( mask_uuid="in_walkthrough_phase", ppo_params=dict( clip_decay=LinearDecay(training_steps), **PPOConfig ), ), imitation_loss=Imitation(), ), pipeline_stages=[ PipelineStage( loss_names=["walkthrough_ppo_loss", "imitation_loss"], max_stage_steps=training_steps, teacher_forcing=StepwiseLinearDecay( cumm_steps_and_values=[ (bc_tf1_steps, 1.0), (bc_tf1_steps + dagger_steps, 0.0), ] ), ) ], **il_params, ) @classmethod def _use_label_to_get_training_params(cls): return il_training_params( label=cls.IL_PIPELINE_TYPE.lower(), training_steps=cls.TRAINING_STEPS )
class PointNavHabitatRGBDeterministiSimpleConvGRUImitationExperimentConfig( DebugPointNavHabitatBaseConfig, PointNavMixInSimpleConvGRUConfig): """An Point Navigation experiment configuration in Habitat with Depth input.""" SENSORS = [ RGBSensorHabitat( height=DebugPointNavHabitatBaseConfig.SCREEN_SIZE, width=DebugPointNavHabitatBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, ), TargetCoordinatesSensorHabitat(coordinate_dims=2), ExpertActionSensor(nactions=len(PointNavTask.class_action_names())), ] @classmethod def tag(cls): return "Debug-Pointnav-Habitat-RGB-SimpleConv-BC" @classmethod def training_pipeline(cls, **kwargs): imitate_steps = int(75000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 3 num_steps = 30 save_interval = 5000000 log_interval = 10000 if torch.cuda.is_available() else 1 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={"imitation_loss": Imitation()}, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage( loss_names=["imitation_loss"], max_stage_steps=imitate_steps, # teacher_forcing=LinearDecay(steps=int(1e5), startp=1.0, endp=0.0,), ), ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=imitate_steps)}), )
def get_sensors(cls) -> Sequence[Sensor]: assert cls.USE_INSTR is not None return ([ EgocentricMiniGridSensor(agent_view_size=cls.AGENT_VIEW_SIZE, view_channels=3), ] + ([MiniGridMissionSensor(instr_len=cls.INSTR_LEN)] # type:ignore if cls.USE_INSTR else []) + ([ ExpertActionSensor( # type: ignore nactions=len(BabyAITask.class_action_names())) ] if cls.USE_EXPERT else []))
def __init__(self): super().__init__() self.SENSORS = [ RGBSensorThor( height=self.SCREEN_SIZE, width=self.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb", ), DepthSensorIThor( height=self.SCREEN_SIZE, width=self.SCREEN_SIZE, use_normalization=True, uuid="depth", ), GoalObjectTypeThorSensor(self.OBSTACLES_TYPES), GPSCompassSensorIThor(), LocalKeyPoints3DSensorThor(objectTypes=self.OBSTACLES_TYPES, uuid="3Dkeypoints_local"), GlobalKeyPoints3DSensorThor(objectTypes=self.OBSTACLES_TYPES, uuid="3Dkeypoints_global"), GlobalObjPoseSensorThor(objectTypes=self.OBSTACLES_TYPES, uuid="object_pose_global"), GlobalAgentPoseSensorThor(uuid="agent_pose_global"), GlobalObjUpdateMaskSensorThor(objectTypes=self.OBSTACLES_TYPES, uuid="object_update_mask"), GlobalObjActionMaskSensorThor(objectTypes=self.OBSTACLES_TYPES, uuid="object_action_mask"), ExpertActionSensor(nactions=len( ObjectPlacementTask.class_action_names()), uuid="expert_action", expert_args={"end_action_only": True}), ] self.PREPROCESSORS = [] self.OBSERVATIONS = [ "rgb", "depth", "goal_object_type_ind", "target_coordinates_ind", "3Dkeypoints_local", "3Dkeypoints_global", "object_pose_global", "agent_pose_global", "object_update_mask", "object_action_mask", "expert_action", ]
class OnePhaseRGBILBaseExperimentConfig(OnePhaseRGBBaseExperimentConfig): SENSORS = [ *OnePhaseRGBBaseExperimentConfig.SENSORS, ExpertActionSensor(len(RearrangeBaseExperimentConfig.actions())), ] IL_PIPELINE_TYPE: Optional[str] = None @classmethod def _training_pipeline_info(cls, **kwargs) -> Dict[str, Any]: """Define how the model trains.""" training_steps = cls.TRAINING_STEPS params = cls._use_label_to_get_training_params() bc_tf1_steps = params["bc_tf1_steps"] dagger_steps = params["dagger_steps"] return dict( named_losses=dict(imitation_loss=Imitation()), pipeline_stages=[ PipelineStage( loss_names=["imitation_loss"], max_stage_steps=training_steps, teacher_forcing=StepwiseLinearDecay( cumm_steps_and_values=[ (bc_tf1_steps, 1.0), (bc_tf1_steps + dagger_steps, 0.0), ] ), ) ], **params ) @classmethod def num_train_processes(cls) -> int: return cls._use_label_to_get_training_params()["num_train_processes"] @classmethod def _use_label_to_get_training_params(cls): return il_training_params( label=cls.IL_PIPELINE_TYPE.lower(), training_steps=cls.TRAINING_STEPS )
def __init__(self): super().__init__() self.SENSORS = [ RGBSensorThor( height=self.SCREEN_SIZE, width=self.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb", ), DepthSensorIThor( height=self.SCREEN_SIZE, width=self.SCREEN_SIZE, use_normalization=True, uuid="depth", ), GoalObjectTypeThorSensor(self.OBSTACLES_TYPES), GPSCompassSensorIThor(), ClassSegmentationSensorThor(objectTypes=self.OBSTACLES_TYPES, height=self.SCREEN_SIZE, width=self.SCREEN_SIZE, uuid="seg"), ExpertActionSensor(nactions=len( ObjectPlacementTask.class_action_names()), uuid="expert_action", expert_args={"end_action_only": True}), ] self.PREPROCESSORS = [] self.OBSERVATIONS = [ "rgb", "depth", "goal_object_type_ind", "target_coordinates_ind", "seg", "expert_action", ]
class ObjectNaviThorRGBDAggerExperimentConfig( ObjectNavRoboThorBaseConfig, ObjectNavMixInDAggerConfig, ObjectNavMixInResNetGRUConfig, ): """An Object Navigation experiment configuration in RoboThor with RGB input.""" SENSORS = [ RGBSensorThor( height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GoalObjectTypeThorSensor( object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES, ), ExpertActionSensor(nactions=len(ObjectNavTask.class_action_names()), ), ] @classmethod def tag(cls): return "Objectnav-RoboTHOR-RGB-ResNetGRU-DAgger"
class MiniGridCondTestExperimentConfig(ExperimentConfig): @classmethod def tag(cls) -> str: return "MiniGridCondTest" SENSORS = [ EgocentricMiniGridSensor(agent_view_size=5, view_channels=3), ExpertActionSensor(action_space=gym.spaces.Dict( higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2))), ] @classmethod def create_model(cls, **kwargs) -> nn.Module: return ConditionedMiniGridSimpleConvRNN( action_space=gym.spaces.Dict(higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2)), observation_space=SensorSuite(cls.SENSORS).observation_spaces, num_objects=cls.SENSORS[0].num_objects, num_colors=cls.SENSORS[0].num_colors, num_states=cls.SENSORS[0].num_states, ) @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return MiniGridTaskSampler(**kwargs) def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="train") def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="valid") def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: return self._get_sampler_args(process_ind=process_ind, mode="test") def _get_sampler_args(self, process_ind: int, mode: str) -> Dict[str, Any]: """Generate initialization arguments for train, valid, and test TaskSamplers. # Parameters process_ind : index of the current task sampler mode: one of `train`, `valid`, or `test` """ if mode == "train": max_tasks = None # infinite training tasks task_seeds_list = None # no predefined random seeds for training deterministic_sampling = False # randomly sample tasks in training else: max_tasks = 20 + 20 * ( mode == "test" ) # 20 tasks for valid, 40 for test (per sampler) # one seed for each task to sample: # - ensures different seeds for each sampler, and # - ensures a deterministic set of sampled tasks. task_seeds_list = list( range(process_ind * max_tasks, (process_ind + 1) * max_tasks)) deterministic_sampling = ( True # deterministically sample task in validation/testing ) return dict( max_tasks=max_tasks, # see above env_class=self. make_env, # builder for third-party environment (defined below) sensors=self. SENSORS, # sensors used to return observations to the agent env_info=dict( ), # parameters for environment builder (none for now) task_seeds_list=task_seeds_list, # see above deterministic_sampling=deterministic_sampling, # see above task_class=ConditionedMiniGridTask, ) @staticmethod def make_env(*args, **kwargs): return EmptyRandomEnv5x5() @classmethod def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]: return { "nprocesses": 4 if mode == "train" else 1, "devices": [], } @classmethod def training_pipeline(cls, **kwargs) -> TrainingPipeline: ppo_steps = int(512) return TrainingPipeline( named_losses=dict( imitation_loss=Imitation( cls.SENSORS[1]), # 0 is Minigrid, 1 is ExpertActionSensor ppo_loss=PPO(**PPOConfig, entropy_method_name="conditional_entropy"), ), # type:ignore pipeline_stages=[ PipelineStage( teacher_forcing=LinearDecay( startp=1.0, endp=0.0, steps=ppo_steps // 2, ), loss_names=["imitation_loss", "ppo_loss"], max_stage_steps=ppo_steps, ) ], optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)), num_mini_batch=4, update_repeats=3, max_grad_norm=0.5, num_steps=16, gamma=0.99, use_gae=True, gae_lambda=0.95, advance_scene_rollout_period=None, save_interval=10000, metric_accumulate_interval=1, lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} # type:ignore ), )
class ObjectNavThorDaggerThenPPOExperimentConfig( ObjectNavThorPPOExperimentConfig): """A simple object navigation experiment in THOR. Training with DAgger and then PPO. """ SENSORS = ObjectNavThorPPOExperimentConfig.SENSORS + [ ExpertActionSensor(action_space=len( ObjectNaviThorGridTask.class_action_names()), ), ] @classmethod def tag(cls): return "ObjectNavThorDaggerThenPPO" @classmethod def training_pipeline(cls, **kwargs): dagger_steos = int(1e4) ppo_steps = int(1e6) lr = 2.5e-4 num_mini_batch = 2 if not torch.cuda.is_available() else 6 update_repeats = 4 num_steps = 128 metric_accumulate_interval = cls.MAX_STEPS * 10 # Log every 10 max length tasks save_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 1.0 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig), "imitation_loss": Imitation(), # We add an imitation loss. }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage( loss_names=["imitation_loss"], teacher_forcing=LinearDecay( startp=1.0, endp=0.0, steps=dagger_steos, ), max_stage_steps=dagger_steos, ), PipelineStage( loss_names=["ppo_loss"], max_stage_steps=ppo_steps, ), ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}), )
class TwoPhaseRGBResNetFrozenMapPPOWalkthroughILUnshuffleExperimentConfig( TwoPhaseRGBResNetPPOWalkthroughILUnshuffleExperimentConfig): CNN_PREPROCESSOR_TYPE_AND_PRETRAINING = ( None # Not necessary as we're handling things in the model ) IL_PIPELINE_TYPE: str = "40proc-longtf" ORDERED_OBJECT_TYPES = list(sorted(PICKUPABLE_OBJECTS + OPENABLE_OBJECTS)) MAP_RANGE_SENSOR = ReachableBoundsTHORSensor(margin=1.0) MAP_INFO = dict( map_range_sensor=MAP_RANGE_SENSOR, vision_range_in_cm=40 * 5, map_size_in_cm=1050 if isinstance(MAP_RANGE_SENSOR, ReachableBoundsTHORSensor) else 2200, resolution_in_cm=5, ) SENSORS = [ ExpertActionSensor(len(RearrangeBaseExperimentConfig.actions())), RGBRearrangeSensor( height=RearrangeBaseExperimentConfig.SCREEN_SIZE, width=RearrangeBaseExperimentConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid=RearrangeBaseExperimentConfig.EGOCENTRIC_RGB_UUID, ), ClosestUnshuffledRGBRearrangeSensor( height=RearrangeBaseExperimentConfig.SCREEN_SIZE, width=RearrangeBaseExperimentConfig.SCREEN_SIZE, use_resnet_normalization=True, uuid=RearrangeBaseExperimentConfig.UNSHUFFLED_RGB_UUID, ), InWalkthroughPhaseSensor(), RelativePositionChangeTHORSensor(), MAP_RANGE_SENSOR, ] @classmethod def tag(cls) -> str: return f"TwoPhaseRGBResNetFrozenMapPPOWalkthroughILUnshuffle_{cls.IL_PIPELINE_TYPE}" @classmethod def create_model(cls, **kwargs) -> nn.Module: def get_sensor_uuid(stype: Type[Sensor]) -> Optional[str]: s = next( (s for s in cls.SENSORS if isinstance(s, stype)), None, ) return None if s is None else s.uuid walkthrougher_should_ignore_action_mask = [ any(k in a for k in ["drop", "open", "pickup"]) for a in cls.actions() ] map_kwargs = dict( frame_height=224, frame_width=224, vision_range_in_cm=cls.MAP_INFO["vision_range_in_cm"], resolution_in_cm=cls.MAP_INFO["resolution_in_cm"], map_size_in_cm=cls.MAP_INFO["map_size_in_cm"], ) observation_space = ( SensorSuite(cls.SENSORS).observation_spaces if kwargs.get("sensor_preprocessor_graph") is None else kwargs["sensor_preprocessor_graph"].observation_spaces) semantic_map_channels = len(cls.ORDERED_OBJECT_TYPES) height_map_channels = 3 map_kwargs[ "n_map_channels"] = height_map_channels + semantic_map_channels frozen_map = ActiveNeuralSLAM(**map_kwargs, use_resnet_layernorm=True) pretrained_map_ckpt_path = os.path.join( ABS_PATH_OF_REARRANGE_TOP_LEVEL_DIR, "pretrained_model_ckpts", "pretrained_active_neural_slam_via_walkthrough_75m.pt", ) multiprocessing_safe_download_file_from_url( url= "https://prior-model-weights.s3.us-east-2.amazonaws.com/embodied-ai/rearrangement/walkthrough/pretrained_active_neural_slam_via_walkthrough_75m.pt", save_path=pretrained_map_ckpt_path, ) frozen_map.load_state_dict( torch.load( pretrained_map_ckpt_path, map_location="cpu", )) return TwoPhaseRearrangeActorCriticFrozenMap( map=frozen_map, semantic_map_channels=semantic_map_channels, height_map_channels=height_map_channels, action_space=gym.spaces.Discrete(len(cls.actions())), observation_space=observation_space, rgb_uuid=cls.EGOCENTRIC_RGB_UUID, in_walkthrough_phase_uuid=get_sensor_uuid( InWalkthroughPhaseSensor), is_walkthrough_phase_embedding_dim=cls. IS_WALKTHROUGH_PHASE_EMBEDING_DIM, rnn_type=cls.RNN_TYPE, walkthrougher_should_ignore_action_mask= walkthrougher_should_ignore_action_mask, done_action_index=cls.actions().index("done"), )
def sensors(cls) -> Sequence[Sensor]: return [ *super(OnePhaseRGBILBaseExperimentConfig, cls).sensors(), ExpertActionSensor(len(RearrangeBaseExperimentConfig.actions())), ]