class TwoPhaseRGBPPOWalkthroughILUnshuffleExperimentConfig(
    TwoPhaseRGBBaseExperimentConfig
):
    SENSORS = [
        *TwoPhaseRGBBaseExperimentConfig.SENSORS,
        ExpertActionSensor(len(RearrangeBaseExperimentConfig.actions())),
    ]

    USE_RESNET_CNN = False
    IL_PIPELINE_TYPE: str = "40proc-longtf"

    @classmethod
    def tag(cls) -> str:
        return f"TwoPhaseRGBPPOWalkthroughILUnshuffle_{cls.IL_PIPELINE_TYPE}"

    @classmethod
    def num_train_processes(cls) -> int:
        return cls._use_label_to_get_training_params()["num_train_processes"]

    @classmethod
    def _training_pipeline_info(cls) -> Dict[str, Any]:
        """Define how the model trains."""

        training_steps = cls.TRAINING_STEPS
        il_params = cls._use_label_to_get_training_params()
        bc_tf1_steps = il_params["bc_tf1_steps"]
        dagger_steps = il_params["dagger_steps"]

        return dict(
            named_losses=dict(
                walkthrough_ppo_loss=MaskedPPO(
                    mask_uuid="in_walkthrough_phase",
                    ppo_params=dict(
                        clip_decay=LinearDecay(training_steps), **PPOConfig
                    ),
                ),
                imitation_loss=Imitation(),
            ),
            pipeline_stages=[
                PipelineStage(
                    loss_names=["walkthrough_ppo_loss", "imitation_loss"],
                    max_stage_steps=training_steps,
                    teacher_forcing=StepwiseLinearDecay(
                        cumm_steps_and_values=[
                            (bc_tf1_steps, 1.0),
                            (bc_tf1_steps + dagger_steps, 0.0),
                        ]
                    ),
                )
            ],
            **il_params,
        )

    @classmethod
    def _use_label_to_get_training_params(cls):
        return il_training_params(
            label=cls.IL_PIPELINE_TYPE.lower(), training_steps=cls.TRAINING_STEPS
        )
class PointNavHabitatRGBDeterministiSimpleConvGRUImitationExperimentConfig(
        DebugPointNavHabitatBaseConfig, PointNavMixInSimpleConvGRUConfig):
    """An Point Navigation experiment configuration in Habitat with Depth
    input."""

    SENSORS = [
        RGBSensorHabitat(
            height=DebugPointNavHabitatBaseConfig.SCREEN_SIZE,
            width=DebugPointNavHabitatBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
        ),
        TargetCoordinatesSensorHabitat(coordinate_dims=2),
        ExpertActionSensor(nactions=len(PointNavTask.class_action_names())),
    ]

    @classmethod
    def tag(cls):
        return "Debug-Pointnav-Habitat-RGB-SimpleConv-BC"

    @classmethod
    def training_pipeline(cls, **kwargs):
        imitate_steps = int(75000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 3
        num_steps = 30
        save_interval = 5000000
        log_interval = 10000 if torch.cuda.is_available() else 1
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5
        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={"imitation_loss": Imitation()},
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(
                    loss_names=["imitation_loss"],
                    max_stage_steps=imitate_steps,
                    # teacher_forcing=LinearDecay(steps=int(1e5), startp=1.0, endp=0.0,),
                ),
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=imitate_steps)}),
        )
Esempio n. 3
0
    def get_sensors(cls) -> Sequence[Sensor]:
        assert cls.USE_INSTR is not None

        return ([
            EgocentricMiniGridSensor(agent_view_size=cls.AGENT_VIEW_SIZE,
                                     view_channels=3),
        ] + ([MiniGridMissionSensor(instr_len=cls.INSTR_LEN)]  # type:ignore
             if cls.USE_INSTR else []) + ([
                 ExpertActionSensor(  # type: ignore
                     nactions=len(BabyAITask.class_action_names()))
             ] if cls.USE_EXPERT else []))
Esempio n. 4
0
    def __init__(self):
        super().__init__()

        self.SENSORS = [
            RGBSensorThor(
                height=self.SCREEN_SIZE,
                width=self.SCREEN_SIZE,
                use_resnet_normalization=True,
                uuid="rgb",
            ),
            DepthSensorIThor(
                height=self.SCREEN_SIZE,
                width=self.SCREEN_SIZE,
                use_normalization=True,
                uuid="depth",
            ),
            GoalObjectTypeThorSensor(self.OBSTACLES_TYPES),
            GPSCompassSensorIThor(),
            LocalKeyPoints3DSensorThor(objectTypes=self.OBSTACLES_TYPES,
                                       uuid="3Dkeypoints_local"),
            GlobalKeyPoints3DSensorThor(objectTypes=self.OBSTACLES_TYPES,
                                        uuid="3Dkeypoints_global"),
            GlobalObjPoseSensorThor(objectTypes=self.OBSTACLES_TYPES,
                                    uuid="object_pose_global"),
            GlobalAgentPoseSensorThor(uuid="agent_pose_global"),
            GlobalObjUpdateMaskSensorThor(objectTypes=self.OBSTACLES_TYPES,
                                          uuid="object_update_mask"),
            GlobalObjActionMaskSensorThor(objectTypes=self.OBSTACLES_TYPES,
                                          uuid="object_action_mask"),
            ExpertActionSensor(nactions=len(
                ObjectPlacementTask.class_action_names()),
                               uuid="expert_action",
                               expert_args={"end_action_only": True}),
        ]

        self.PREPROCESSORS = []

        self.OBSERVATIONS = [
            "rgb",
            "depth",
            "goal_object_type_ind",
            "target_coordinates_ind",
            "3Dkeypoints_local",
            "3Dkeypoints_global",
            "object_pose_global",
            "agent_pose_global",
            "object_update_mask",
            "object_action_mask",
            "expert_action",
        ]
Esempio n. 5
0
class OnePhaseRGBILBaseExperimentConfig(OnePhaseRGBBaseExperimentConfig):
    SENSORS = [
        *OnePhaseRGBBaseExperimentConfig.SENSORS,
        ExpertActionSensor(len(RearrangeBaseExperimentConfig.actions())),
    ]

    IL_PIPELINE_TYPE: Optional[str] = None

    @classmethod
    def _training_pipeline_info(cls, **kwargs) -> Dict[str, Any]:
        """Define how the model trains."""

        training_steps = cls.TRAINING_STEPS
        params = cls._use_label_to_get_training_params()
        bc_tf1_steps = params["bc_tf1_steps"]
        dagger_steps = params["dagger_steps"]

        return dict(
            named_losses=dict(imitation_loss=Imitation()),
            pipeline_stages=[
                PipelineStage(
                    loss_names=["imitation_loss"],
                    max_stage_steps=training_steps,
                    teacher_forcing=StepwiseLinearDecay(
                        cumm_steps_and_values=[
                            (bc_tf1_steps, 1.0),
                            (bc_tf1_steps + dagger_steps, 0.0),
                        ]
                    ),
                )
            ],
            **params
        )

    @classmethod
    def num_train_processes(cls) -> int:
        return cls._use_label_to_get_training_params()["num_train_processes"]

    @classmethod
    def _use_label_to_get_training_params(cls):
        return il_training_params(
            label=cls.IL_PIPELINE_TYPE.lower(), training_steps=cls.TRAINING_STEPS
        )
    def __init__(self):
        super().__init__()

        self.SENSORS = [
            RGBSensorThor(
                height=self.SCREEN_SIZE,
                width=self.SCREEN_SIZE,
                use_resnet_normalization=True,
                uuid="rgb",
            ),
            DepthSensorIThor(
                height=self.SCREEN_SIZE,
                width=self.SCREEN_SIZE,
                use_normalization=True,
                uuid="depth",
            ),
            GoalObjectTypeThorSensor(self.OBSTACLES_TYPES),
            GPSCompassSensorIThor(),
            ClassSegmentationSensorThor(objectTypes=self.OBSTACLES_TYPES,
                                        height=self.SCREEN_SIZE,
                                        width=self.SCREEN_SIZE,
                                        uuid="seg"),
            ExpertActionSensor(nactions=len(
                ObjectPlacementTask.class_action_names()),
                               uuid="expert_action",
                               expert_args={"end_action_only": True}),
        ]

        self.PREPROCESSORS = []

        self.OBSERVATIONS = [
            "rgb",
            "depth",
            "goal_object_type_ind",
            "target_coordinates_ind",
            "seg",
            "expert_action",
        ]
class ObjectNaviThorRGBDAggerExperimentConfig(
        ObjectNavRoboThorBaseConfig,
        ObjectNavMixInDAggerConfig,
        ObjectNavMixInResNetGRUConfig,
):
    """An Object Navigation experiment configuration in RoboThor with RGB
    input."""

    SENSORS = [
        RGBSensorThor(
            height=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            width=ObjectNavRoboThorBaseConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        GoalObjectTypeThorSensor(
            object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES, ),
        ExpertActionSensor(nactions=len(ObjectNavTask.class_action_names()), ),
    ]

    @classmethod
    def tag(cls):
        return "Objectnav-RoboTHOR-RGB-ResNetGRU-DAgger"
class MiniGridCondTestExperimentConfig(ExperimentConfig):
    @classmethod
    def tag(cls) -> str:
        return "MiniGridCondTest"

    SENSORS = [
        EgocentricMiniGridSensor(agent_view_size=5, view_channels=3),
        ExpertActionSensor(action_space=gym.spaces.Dict(
            higher=gym.spaces.Discrete(2), lower=gym.spaces.Discrete(2))),
    ]

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return ConditionedMiniGridSimpleConvRNN(
            action_space=gym.spaces.Dict(higher=gym.spaces.Discrete(2),
                                         lower=gym.spaces.Discrete(2)),
            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
            num_objects=cls.SENSORS[0].num_objects,
            num_colors=cls.SENSORS[0].num_colors,
            num_states=cls.SENSORS[0].num_states,
        )

    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return MiniGridTaskSampler(**kwargs)

    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="train")

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="valid")

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        return self._get_sampler_args(process_ind=process_ind, mode="test")

    def _get_sampler_args(self, process_ind: int, mode: str) -> Dict[str, Any]:
        """Generate initialization arguments for train, valid, and test
        TaskSamplers.

        # Parameters
        process_ind : index of the current task sampler
        mode:  one of `train`, `valid`, or `test`
        """
        if mode == "train":
            max_tasks = None  # infinite training tasks
            task_seeds_list = None  # no predefined random seeds for training
            deterministic_sampling = False  # randomly sample tasks in training
        else:
            max_tasks = 20 + 20 * (
                mode == "test"
            )  # 20 tasks for valid, 40 for test (per sampler)

            # one seed for each task to sample:
            # - ensures different seeds for each sampler, and
            # - ensures a deterministic set of sampled tasks.
            task_seeds_list = list(
                range(process_ind * max_tasks, (process_ind + 1) * max_tasks))

            deterministic_sampling = (
                True  # deterministically sample task in validation/testing
            )

        return dict(
            max_tasks=max_tasks,  # see above
            env_class=self.
            make_env,  # builder for third-party environment (defined below)
            sensors=self.
            SENSORS,  # sensors used to return observations to the agent
            env_info=dict(
            ),  # parameters for environment builder (none for now)
            task_seeds_list=task_seeds_list,  # see above
            deterministic_sampling=deterministic_sampling,  # see above
            task_class=ConditionedMiniGridTask,
        )

    @staticmethod
    def make_env(*args, **kwargs):
        return EmptyRandomEnv5x5()

    @classmethod
    def machine_params(cls, mode="train", **kwargs) -> Dict[str, Any]:
        return {
            "nprocesses": 4 if mode == "train" else 1,
            "devices": [],
        }

    @classmethod
    def training_pipeline(cls, **kwargs) -> TrainingPipeline:
        ppo_steps = int(512)
        return TrainingPipeline(
            named_losses=dict(
                imitation_loss=Imitation(
                    cls.SENSORS[1]),  # 0 is Minigrid, 1 is ExpertActionSensor
                ppo_loss=PPO(**PPOConfig,
                             entropy_method_name="conditional_entropy"),
            ),  # type:ignore
            pipeline_stages=[
                PipelineStage(
                    teacher_forcing=LinearDecay(
                        startp=1.0,
                        endp=0.0,
                        steps=ppo_steps // 2,
                    ),
                    loss_names=["imitation_loss", "ppo_loss"],
                    max_stage_steps=ppo_steps,
                )
            ],
            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam),
                                      dict(lr=1e-4)),
            num_mini_batch=4,
            update_repeats=3,
            max_grad_norm=0.5,
            num_steps=16,
            gamma=0.99,
            use_gae=True,
            gae_lambda=0.95,
            advance_scene_rollout_period=None,
            save_interval=10000,
            metric_accumulate_interval=1,
            lr_scheduler_builder=Builder(
                LambdaLR,
                {"lr_lambda": LinearDecay(steps=ppo_steps)}  # type:ignore
            ),
        )
Esempio n. 9
0
class ObjectNavThorDaggerThenPPOExperimentConfig(
        ObjectNavThorPPOExperimentConfig):
    """A simple object navigation experiment in THOR.

    Training with DAgger and then PPO.
    """

    SENSORS = ObjectNavThorPPOExperimentConfig.SENSORS + [
        ExpertActionSensor(action_space=len(
            ObjectNaviThorGridTask.class_action_names()), ),
    ]

    @classmethod
    def tag(cls):
        return "ObjectNavThorDaggerThenPPO"

    @classmethod
    def training_pipeline(cls, **kwargs):
        dagger_steos = int(1e4)
        ppo_steps = int(1e6)
        lr = 2.5e-4
        num_mini_batch = 2 if not torch.cuda.is_available() else 6
        update_repeats = 4
        num_steps = 128
        metric_accumulate_interval = cls.MAX_STEPS * 10  # Log every 10 max length tasks
        save_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 1.0
        max_grad_norm = 0.5

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=metric_accumulate_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={
                "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps),
                                **PPOConfig),
                "imitation_loss": Imitation(),  # We add an imitation loss.
            },
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(
                    loss_names=["imitation_loss"],
                    teacher_forcing=LinearDecay(
                        startp=1.0,
                        endp=0.0,
                        steps=dagger_steos,
                    ),
                    max_stage_steps=dagger_steos,
                ),
                PipelineStage(
                    loss_names=["ppo_loss"],
                    max_stage_steps=ppo_steps,
                ),
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}),
        )
class TwoPhaseRGBResNetFrozenMapPPOWalkthroughILUnshuffleExperimentConfig(
        TwoPhaseRGBResNetPPOWalkthroughILUnshuffleExperimentConfig):
    CNN_PREPROCESSOR_TYPE_AND_PRETRAINING = (
        None  # Not necessary as we're handling things in the model
    )
    IL_PIPELINE_TYPE: str = "40proc-longtf"

    ORDERED_OBJECT_TYPES = list(sorted(PICKUPABLE_OBJECTS + OPENABLE_OBJECTS))

    MAP_RANGE_SENSOR = ReachableBoundsTHORSensor(margin=1.0)

    MAP_INFO = dict(
        map_range_sensor=MAP_RANGE_SENSOR,
        vision_range_in_cm=40 * 5,
        map_size_in_cm=1050
        if isinstance(MAP_RANGE_SENSOR, ReachableBoundsTHORSensor) else 2200,
        resolution_in_cm=5,
    )

    SENSORS = [
        ExpertActionSensor(len(RearrangeBaseExperimentConfig.actions())),
        RGBRearrangeSensor(
            height=RearrangeBaseExperimentConfig.SCREEN_SIZE,
            width=RearrangeBaseExperimentConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid=RearrangeBaseExperimentConfig.EGOCENTRIC_RGB_UUID,
        ),
        ClosestUnshuffledRGBRearrangeSensor(
            height=RearrangeBaseExperimentConfig.SCREEN_SIZE,
            width=RearrangeBaseExperimentConfig.SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid=RearrangeBaseExperimentConfig.UNSHUFFLED_RGB_UUID,
        ),
        InWalkthroughPhaseSensor(),
        RelativePositionChangeTHORSensor(),
        MAP_RANGE_SENSOR,
    ]

    @classmethod
    def tag(cls) -> str:
        return f"TwoPhaseRGBResNetFrozenMapPPOWalkthroughILUnshuffle_{cls.IL_PIPELINE_TYPE}"

    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        def get_sensor_uuid(stype: Type[Sensor]) -> Optional[str]:
            s = next(
                (s for s in cls.SENSORS if isinstance(s, stype)),
                None,
            )
            return None if s is None else s.uuid

        walkthrougher_should_ignore_action_mask = [
            any(k in a for k in ["drop", "open", "pickup"])
            for a in cls.actions()
        ]

        map_kwargs = dict(
            frame_height=224,
            frame_width=224,
            vision_range_in_cm=cls.MAP_INFO["vision_range_in_cm"],
            resolution_in_cm=cls.MAP_INFO["resolution_in_cm"],
            map_size_in_cm=cls.MAP_INFO["map_size_in_cm"],
        )

        observation_space = (
            SensorSuite(cls.SENSORS).observation_spaces
            if kwargs.get("sensor_preprocessor_graph") is None else
            kwargs["sensor_preprocessor_graph"].observation_spaces)

        semantic_map_channels = len(cls.ORDERED_OBJECT_TYPES)
        height_map_channels = 3
        map_kwargs[
            "n_map_channels"] = height_map_channels + semantic_map_channels
        frozen_map = ActiveNeuralSLAM(**map_kwargs, use_resnet_layernorm=True)

        pretrained_map_ckpt_path = os.path.join(
            ABS_PATH_OF_REARRANGE_TOP_LEVEL_DIR,
            "pretrained_model_ckpts",
            "pretrained_active_neural_slam_via_walkthrough_75m.pt",
        )
        multiprocessing_safe_download_file_from_url(
            url=
            "https://prior-model-weights.s3.us-east-2.amazonaws.com/embodied-ai/rearrangement/walkthrough/pretrained_active_neural_slam_via_walkthrough_75m.pt",
            save_path=pretrained_map_ckpt_path,
        )
        frozen_map.load_state_dict(
            torch.load(
                pretrained_map_ckpt_path,
                map_location="cpu",
            ))

        return TwoPhaseRearrangeActorCriticFrozenMap(
            map=frozen_map,
            semantic_map_channels=semantic_map_channels,
            height_map_channels=height_map_channels,
            action_space=gym.spaces.Discrete(len(cls.actions())),
            observation_space=observation_space,
            rgb_uuid=cls.EGOCENTRIC_RGB_UUID,
            in_walkthrough_phase_uuid=get_sensor_uuid(
                InWalkthroughPhaseSensor),
            is_walkthrough_phase_embedding_dim=cls.
            IS_WALKTHROUGH_PHASE_EMBEDING_DIM,
            rnn_type=cls.RNN_TYPE,
            walkthrougher_should_ignore_action_mask=
            walkthrougher_should_ignore_action_mask,
            done_action_index=cls.actions().index("done"),
        )
 def sensors(cls) -> Sequence[Sensor]:
     return [
         *super(OnePhaseRGBILBaseExperimentConfig, cls).sensors(),
         ExpertActionSensor(len(RearrangeBaseExperimentConfig.actions())),
     ]