コード例 #1
0
 def training_pipeline(cls, **kwargs):
     ppo_steps = int(250000000)
     lr = 3e-4
     num_mini_batch = 1
     update_repeats = 3
     num_steps = 30
     save_interval = 5000000
     log_interval = 1000
     gamma = 0.99
     use_gae = True
     gae_lambda = 0.95
     max_grad_norm = 0.5
     return TrainingPipeline(
         save_interval=save_interval,
         metric_accumulate_interval=log_interval,
         optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
         num_mini_batch=num_mini_batch,
         update_repeats=update_repeats,
         max_grad_norm=max_grad_norm,
         num_steps=num_steps,
         named_losses={"ppo_loss": PPO(**PPOConfig)},
         gamma=gamma,
         use_gae=use_gae,
         gae_lambda=gae_lambda,
         advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
         pipeline_stages=[
             PipelineStage(loss_names=["ppo_loss"],
                           max_stage_steps=ppo_steps)
         ],
         lr_scheduler_builder=Builder(
             LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}),
     )
コード例 #2
0
 def training_pipeline(cls, **kwargs) -> TrainingPipeline:
     ppo_steps = int(150000)
     return TrainingPipeline(
         named_losses=dict(
             imitation_loss=Imitation(
                 cls.SENSORS[1]
             ),  # 0 is Minigrid, 1 is ExpertActionSensor
             ppo_loss=PPO(**PPOConfig, entropy_method_name="conditional_entropy"),
         ),  # type:ignore
         pipeline_stages=[
             PipelineStage(
                 teacher_forcing=LinearDecay(
                     startp=1.0, endp=0.0, steps=ppo_steps // 2,
                 ),
                 loss_names=["imitation_loss", "ppo_loss"],
                 max_stage_steps=ppo_steps,
             )
         ],
         optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)),
         num_mini_batch=4,
         update_repeats=3,
         max_grad_norm=0.5,
         num_steps=16,
         gamma=0.99,
         use_gae=True,
         gae_lambda=0.95,
         advance_scene_rollout_period=None,
         save_interval=10000,
         metric_accumulate_interval=1,
         lr_scheduler_builder=Builder(
             LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}  # type:ignore
         ),
     )
コード例 #3
0
 def training_pipeline(cls, **kwargs) -> TrainingPipeline:
     ppo_steps = int(150000)
     return TrainingPipeline(
         named_losses=dict(ppo_loss=PPO(**PPOConfig)),  # type:ignore
         pipeline_stages=[
             PipelineStage(loss_names=["ppo_loss"],
                           max_stage_steps=ppo_steps)
         ],
         optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam),
                                   dict(lr=1e-4)),
         num_mini_batch=4,
         update_repeats=3,
         max_grad_norm=0.5,
         num_steps=16,
         gamma=0.99,
         use_gae=True,
         gae_lambda=0.95,
         advance_scene_rollout_period=None,
         save_interval=10000,
         metric_accumulate_interval=1,
         lr_scheduler_builder=Builder(
             LambdaLR,
             {"lr_lambda": LinearDecay(steps=ppo_steps)}  # type:ignore
         ),
     )
コード例 #4
0
ファイル: gym_tutorial.py プロジェクト: elliotthwang/allenact
 def training_pipeline(cls, **kwargs) -> TrainingPipeline:
     ppo_steps = int(1.2e6)
     return TrainingPipeline(
         named_losses=dict(ppo_loss=PPO(
             clip_param=0.2,
             value_loss_coef=0.5,
             entropy_coef=0.0,
         ), ),  # type:ignore
         pipeline_stages=[
             PipelineStage(loss_names=["ppo_loss"],
                           max_stage_steps=ppo_steps),
         ],
         optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam),
                                   dict(lr=1e-3)),
         num_mini_batch=1,
         update_repeats=80,
         max_grad_norm=100,
         num_steps=2000,
         gamma=0.99,
         use_gae=False,
         gae_lambda=0.95,
         advance_scene_rollout_period=None,
         save_interval=200000,
         metric_accumulate_interval=50000,
         lr_scheduler_builder=Builder(
             LambdaLR,
             {"lr_lambda": LinearDecay(steps=ppo_steps)},  # type:ignore
         ),
     )
 def training_pipeline(cls, **kwargs):
     imitate_steps = int(75000000)
     lr = 3e-4
     num_mini_batch = 1
     update_repeats = 3
     num_steps = 30
     save_interval = 5000000
     log_interval = 10000 if torch.cuda.is_available() else 1
     gamma = 0.99
     use_gae = True
     gae_lambda = 0.95
     max_grad_norm = 0.5
     return TrainingPipeline(
         save_interval=save_interval,
         metric_accumulate_interval=log_interval,
         optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
         num_mini_batch=num_mini_batch,
         update_repeats=update_repeats,
         max_grad_norm=max_grad_norm,
         num_steps=num_steps,
         named_losses={"imitation_loss": Imitation()},
         gamma=gamma,
         use_gae=use_gae,
         gae_lambda=gae_lambda,
         advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
         pipeline_stages=[
             PipelineStage(
                 loss_names=["imitation_loss"],
                 max_stage_steps=imitate_steps,
                 # teacher_forcing=LinearDecay(steps=int(1e5), startp=1.0, endp=0.0,),
             ),
         ],
         lr_scheduler_builder=Builder(
             LambdaLR, {"lr_lambda": LinearDecay(steps=imitate_steps)}),
     )
コード例 #6
0
    def training_pipeline(cls, **kwargs):
        ppo_steps = int(1e6)
        lr = 2.5e-4
        num_mini_batch = 2 if not torch.cuda.is_available() else 6
        update_repeats = 4
        num_steps = 128
        metric_accumulate_interval = cls.MAX_STEPS * 10  # Log every 10 max length tasks
        save_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 1.0
        max_grad_norm = 0.5

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=metric_accumulate_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={
                "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),
            },
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,),
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
            ),
        )
コード例 #7
0
    def training_pipeline(cls, **kwargs):
        ppo_steps = int(75000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 4
        num_steps = 128
        save_interval = 5000000
        log_interval = 10000 if torch.cuda.is_available() else 1
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5

        action_strs = PointNavTask.class_action_names()
        non_end_action_inds_set = {
            i
            for i, a in enumerate(action_strs) if a != robothor_constants.END
        }
        end_action_ind_set = {action_strs.index(robothor_constants.END)}

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={
                "ppo_loss":
                PPO(**PPOConfig),
                "grouped_action_imitation":
                GroupedActionImitation(
                    nactions=len(PointNavTask.class_action_names()),
                    action_groups=[
                        non_end_action_inds_set, end_action_ind_set
                    ],
                ),
            },
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(
                    loss_names=["ppo_loss", "grouped_action_imitation"],
                    max_stage_steps=ppo_steps,
                )
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}),
        )
コード例 #8
0
 def training_pipeline(cls, **kwargs):
     ppo_steps = int(10000000)
     lr = 3e-4
     num_mini_batch = 1
     update_repeats = 3
     num_steps = 30
     save_interval = 1000000
     log_interval = 100
     gamma = 0.99
     use_gae = True
     gae_lambda = 0.95
     max_grad_norm = 0.5
     return TrainingPipeline(
         save_interval=save_interval,
         metric_accumulate_interval=log_interval,
         optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
         num_mini_batch=num_mini_batch,
         update_repeats=update_repeats,
         max_grad_norm=max_grad_norm,
         num_steps=num_steps,
         named_losses={
             "ppo_loss":
             PPO(**PPOConfig),
             "nie_loss":
             NIE_Reg(
                 agent_pose_uuid="agent_pose_global",
                 pose_uuid="object_pose_global",
                 local_keypoints_uuid="3Dkeypoints_local",
                 global_keypoints_uuid="3Dkeypoints_global",
                 obj_update_mask_uuid="object_update_mask",
                 obj_action_mask_uuid="object_action_mask",
             ),
             "yn_im_loss":
             YesNoImitation(yes_action_index=ObjectPlacementTask.
                            class_action_names().index(END)),
         },
         gamma=gamma,
         use_gae=use_gae,
         gae_lambda=gae_lambda,
         advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
         pipeline_stages=[
             PipelineStage(
                 loss_names=["ppo_loss", "nie_loss", "yn_im_loss"],
                 max_stage_steps=ppo_steps)
         ],
         lr_scheduler_builder=Builder(
             LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}),
     )
コード例 #9
0
 def rl_loss_default(cls, alg: str, steps: Optional[int] = None):
     if alg == "ppo":
         assert steps is not None
         return {
             "loss":
             Builder(
                 PPO,
                 kwargs={"clip_decay": LinearDecay(steps)},
                 default=PPOConfig,
             ),
             "num_mini_batch":
             cls.PPO_NUM_MINI_BATCH,
             "update_repeats":
             4,
         }
     elif alg == "a2c":
         return {
             "loss": A2C(**A2CConfig),
             "num_mini_batch": 1,
             "update_repeats": 1,
         }
     elif alg == "imitation":
         return {
             "loss": Imitation(),
             "num_mini_batch": cls.PPO_NUM_MINI_BATCH,
             "update_repeats": 4,
         }
     else:
         raise NotImplementedError
コード例 #10
0
 def get_lr_scheduler_builder(cls, use_lr_decay: bool):
     return (None if not use_lr_decay else Builder(
         LambdaLR,
         {
             "lr_lambda":
             LinearDecay(
                 steps=cls.TRAINING_STEPS // 3, startp=1.0, endp=1.0 / 3)
         },
     ))
コード例 #11
0
 def training_pipeline(cls, **kwargs) -> TrainingPipeline:
     lr = 1e-4
     ppo_steps = int(8e7)  # convergence may be after 1e8
     clip_param = 0.1
     value_loss_coef = 0.5
     entropy_coef = 0.0
     num_mini_batch = 4  # optimal 64
     update_repeats = 10
     max_grad_norm = 0.5
     num_steps = 2048
     gamma = 0.99
     use_gae = True
     gae_lambda = 0.95
     advance_scene_rollout_period = None
     save_interval = 200000
     metric_accumulate_interval = 50000
     return TrainingPipeline(
         named_losses=dict(ppo_loss=PPO(
             clip_param=clip_param,
             value_loss_coef=value_loss_coef,
             entropy_coef=entropy_coef,
         ), ),  # type:ignore
         pipeline_stages=[
             PipelineStage(loss_names=["ppo_loss"],
                           max_stage_steps=ppo_steps),
         ],
         optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam),
                                   dict(lr=lr)),
         num_mini_batch=num_mini_batch,
         update_repeats=update_repeats,
         max_grad_norm=max_grad_norm,
         num_steps=num_steps,
         gamma=gamma,
         use_gae=use_gae,
         gae_lambda=gae_lambda,
         advance_scene_rollout_period=advance_scene_rollout_period,
         save_interval=save_interval,
         metric_accumulate_interval=metric_accumulate_interval,
         lr_scheduler_builder=Builder(
             LambdaLR,
             {"lr_lambda": LinearDecay(steps=ppo_steps, startp=1, endp=1)
              },  # constant learning rate
         ),
     )
コード例 #12
0
    def _training_pipeline(
        cls,
        named_losses: Dict[str, Union[Loss, Builder]],
        pipeline_stages: List[PipelineStage],
        num_mini_batch: int,
        update_repeats: int,
        total_train_steps: int,
        lr: Optional[float] = None,
    ):
        lr = cls.DEFAULT_LR if lr is None else lr

        num_steps = cls.ROLLOUT_STEPS
        metric_accumulate_interval = (cls.METRIC_ACCUMULATE_INTERVAL()
                                      )  # Log every 10 max length tasks
        save_interval = int(cls.TOTAL_RL_TRAIN_STEPS / cls.NUM_CKPTS_TO_SAVE)
        gamma = 0.99

        use_gae = "reinforce_loss" not in named_losses
        gae_lambda = 0.99
        max_grad_norm = 0.5

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=metric_accumulate_interval,
            optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam),
                                      dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses=named_losses,
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=None,
            should_log=cls.SHOULD_LOG,
            pipeline_stages=pipeline_stages,
            lr_scheduler_builder=Builder(
                LambdaLR,
                {"lr_lambda": LinearDecay(steps=cls.TOTAL_RL_TRAIN_STEPS)
                 }  # type: ignore
            ),
        )
コード例 #13
0
    def training_pipeline(self, **kwargs):
        # PPO
        ppo_steps = int(75000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 4
        num_steps = 128
        save_interval = 5000000
        log_interval = 10000 if torch.cuda.is_available() else 1
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5
        PPOConfig["normalize_advantage"] = self.NORMALIZE_ADVANTAGE

        named_losses = {"ppo_loss": (PPO(**PPOConfig), 1.0)}
        named_losses = self._update_with_auxiliary_losses(named_losses)

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={key: val[0]
                          for key, val in named_losses.items()},
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(
                    loss_names=list(named_losses.keys()),
                    max_stage_steps=ppo_steps,
                    loss_weights=[val[1] for val in named_losses.values()],
                )
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}),
        )
コード例 #14
0
    def training_pipeline(cls, **kwargs) -> TrainingPipeline:
        info = cls._training_pipeline_info()

        return TrainingPipeline(
            gamma=info.get("gamma", 0.99),
            use_gae=info.get("use_gae", True),
            gae_lambda=info.get("gae_lambda", 0.95),
            num_steps=info["num_steps"],
            num_mini_batch=info["num_mini_batch"],
            update_repeats=info["update_repeats"],
            max_grad_norm=info.get("max_grad_norm", 0.5),
            save_interval=cls.SAVE_INTERVAL,
            named_losses=info["named_losses"],
            metric_accumulate_interval=cls.num_train_processes() *
            max(*cls.MAX_STEPS.values()) if torch.cuda.is_available() else 1,
            optimizer_builder=Builder(optim.Adam, dict(lr=info["lr"])),
            advance_scene_rollout_period=None,
            pipeline_stages=info["pipeline_stages"],
            lr_scheduler_builder=cls.get_lr_scheduler_builder(
                use_lr_decay=info["use_lr_decay"]),
        )
コード例 #15
0
class PointNavRoboThorRGBPPOExperimentConfig(ExperimentConfig):
    """A Point Navigation experiment configuration in RoboThor."""

    # %%
    """
    We then define the task parameters. For PointNav, these include the maximum number of steps our agent
    can take before being reset (this prevents the agent from wandering on forever), and a configuration
    for the reward function that we will be using. 
    """

    # %%
    # Task Parameters
    MAX_STEPS = 500
    REWARD_CONFIG = {
        "step_penalty": -0.01,
        "goal_success_reward": 10.0,
        "failed_stop_reward": 0.0,
        "shaping_weight": 1.0,
    }

    # %%
    """
    In this case, we set the maximum number of steps to 500.
    We give the agent a reward of -0.01 for each action that it takes (this is to encourage it to reach the goal
    in as few actions as possible), and a reward of 10.0 if the agent manages to successfully reach its destination.
    If the agent selects the `stop` action without reaching the target we do not punish it (although this is
    sometimes useful for preventing the agent from stopping prematurely). Finally, our agent gets rewarded if it moves
    closer to the target and gets punished if it moves further away. `shaping_weight` controls how strong this signal should
    be and is here set to 1.0. These parameters work well for training an agent on PointNav, but feel free to play around
    with them.
    
    Next, we set the parameters of the simulator itself. Here we select a resolution at which the engine will render
    every frame (640 by 480) and a resolution at which the image will be fed into the neural network (here it is set
    to a 224 by 224 box).
    """

    # %%
    # Simulator Parameters
    CAMERA_WIDTH = 640
    CAMERA_HEIGHT = 480
    SCREEN_SIZE = 224

    # %%
    """
    Next, we set the hardware parameters for the training engine. `NUM_PROCESSES` sets the total number of parallel
    processes that will be used to train the model. In general, more processes result in faster training,
    but since each process is a unique instance of the environment in which we are training they can take up a
    lot of memory. Depending on the size of the model, the environment, and the hardware we are using, we may
    need to adjust this number, but for a setup with 8 GTX Titans, 60 processes work fine. 60 also happens to
    be the number of training scenes in RoboTHOR, which allows each process to load only a single scene into
    memory, saving time and space.
    
    `TRAINING_GPUS` takes the ids of the GPUS on which
    the model should be trained. Similarly `VALIDATION_GPUS` and `TESTING_GPUS` hold the ids of the GPUS on which
    the validation and testing will occur. During training, a validation process is constantly running and evaluating
    the current model, to show the progress on the validation set, so reserving a GPU for validation can be a good idea.
    If our hardware setup does not include a GPU, these fields can be set to empty lists, as the codebase will default
    to running everything on the CPU with only 1 process.
    """

    # %%
    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
    NUM_PROCESSES = 20
    TRAINING_GPUS: Sequence[int] = [0]
    VALIDATION_GPUS: Sequence[int] = [0]
    TESTING_GPUS: Sequence[int] = [0]

    # %%
    """
    Since we are using a dataset to train our model we need to define the path to where we have stored it. If we
    download the dataset instructed above we can define the path as follows
    """

    # %%
    TRAIN_DATASET_DIR = os.path.join(os.getcwd(),
                                     "datasets/robothor-pointnav/debug")
    VAL_DATASET_DIR = os.path.join(os.getcwd(),
                                   "datasets/robothor-pointnav/debug")

    # %%
    """
    Next, we define the sensors. `RGBSensorThor` is the environment's implementation of an RGB sensor. It takes the
    raw image outputted by the simulator and resizes it, to the input dimensions for our neural network that we
    specified above. It also performs normalization if we want. `GPSCompassSensorRoboThor` is a sensor that tracks
    the point our agent needs to move to. It tells us the direction and distance to our goal at every time step.
    """

    # %%
    SENSORS = [
        RGBSensorThor(
            height=SCREEN_SIZE,
            width=SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        GPSCompassSensorRoboThor(),
    ]

    # %%
    """
    For the sake of this example, we are also going to be using a preprocessor with our model. In *AllenAct*
    the preprocessor abstraction is designed with large models with frozen weights in mind. These models often
    hail from the ResNet family and transform the raw pixels that our agent observes in the environment, into a
    complex embedding, which then gets stored and used as input to our trainable model instead of the original image.
    Most other preprocessing work is done in the sensor classes (as we just saw with the RGB
    sensor scaling and normalizing our input), but for the sake of efficiency, all neural network preprocessing should
    use this abstraction.
    """

    # %%
    PREPROCESSORS = [
        Builder(
            ResNetPreprocessor,
            {
                "input_height": SCREEN_SIZE,
                "input_width": SCREEN_SIZE,
                "output_width": 7,
                "output_height": 7,
                "output_dims": 512,
                "pool": False,
                "torchvision_resnet_model": models.resnet18,
                "input_uuids": ["rgb_lowres"],
                "output_uuid": "rgb_resnet",
            },
        ),
    ]

    # %%
    """
    Next, we must define all of the observation inputs that our model will use. These are just
    the hardcoded ids of the sensors we are using in the experiment.
    """

    # %%
    OBSERVATIONS = [
        "rgb_resnet",
        "target_coordinates_ind",
    ]

    # %%
    """
    Finally, we must define the settings of our simulator. We set the camera dimensions to the values
    we defined earlier. We set rotateStepDegrees to 30 degrees, which means that every time the agent takes a
    turn action, they will rotate by 30 degrees. We set grid size to 0.25 which means that every time the
    agent moves forward, it will do so by 0.25 meters. 
    """

    # %%
    ENV_ARGS = dict(
        width=CAMERA_WIDTH,
        height=CAMERA_HEIGHT,
        rotateStepDegrees=30.0,
        visibilityDistance=1.0,
        gridSize=0.25,
    )

    # %%
    """
    Now we move on to the methods that we must define to finish implementing an experiment config. Firstly we
    have a simple method that just returns the name of the experiment.
    """

    # %%
    @classmethod
    def tag(cls):
        return "PointNavRobothorRGBPPO"

    # %%
    """
    Next, we define the training pipeline. In this function, we specify exactly which algorithm or algorithms
    we will use to train our model. In this simple example, we are using the PPO loss with a learning rate of 3e-4.
    We specify 250 million steps of training and a rollout length of 30 with the `ppo_steps` and `num_steps` parameters
    respectively. All the other standard PPO parameters are also present in this function. `metric_accumulate_interval`
    sets the frequency at which data is accumulated from all the processes and logged while `save_interval` sets how
    often we save the model weights and run validation on them.
    """

    # %%
    @classmethod
    def training_pipeline(cls, **kwargs):
        ppo_steps = int(250000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 3
        num_steps = 30
        save_interval = 5000000
        log_interval = 1000
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5
        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={"ppo_loss": PPO(**PPOConfig)},
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"],
                              max_stage_steps=ppo_steps)
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}),
        )

    # %%
    """
    The `machine_params` method returns the hardware parameters of each
    process, based on the list of devices we defined above.
    """

    # %%
    def machine_params(self, mode="train", **kwargs):
        sampler_devices: List[int] = []
        if mode == "train":
            workers_per_device = 1
            gpu_ids = ([] if not torch.cuda.is_available() else
                       list(self.TRAINING_GPUS) * workers_per_device)
            nprocesses = (8 if not torch.cuda.is_available() else
                          evenly_distribute_count_into_bins(
                              self.NUM_PROCESSES, len(gpu_ids)))
            sampler_devices = list(self.TRAINING_GPUS)
        elif mode == "valid":
            nprocesses = 1
            gpu_ids = [] if not torch.cuda.is_available(
            ) else self.VALIDATION_GPUS
        elif mode == "test":
            nprocesses = 1
            gpu_ids = [] if not torch.cuda.is_available(
            ) else self.TESTING_GPUS
        else:
            raise NotImplementedError(
                "mode must be 'train', 'valid', or 'test'.")

        sensor_preprocessor_graph = (SensorPreprocessorGraph(
            source_observation_spaces=SensorSuite(
                self.SENSORS).observation_spaces,
            preprocessors=self.PREPROCESSORS,
        ) if mode == "train" or (
            (isinstance(nprocesses, int) and nprocesses > 0) or
            (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)) else
                                     None)

        return MachineParams(
            nprocesses=nprocesses,
            devices=gpu_ids,
            sampler_devices=sampler_devices
            if mode == "train" else gpu_ids,  # ignored with > 1 gpu_ids
            sensor_preprocessor_graph=sensor_preprocessor_graph,
        )

    # %%
    """
    Now we define the actual model that we will be using. **AllenAct** offers first-class support for PyTorch,
    so any PyTorch model that implements the provided `ActorCriticModel` class will work here. Here we borrow a modelfrom the `pointnav_baselines` project (which
    unsurprisingly contains several PointNav baselines). It is a small convolutional network that expects the output of a ResNet as its rgb input followed by a single-layered GRU. The model accepts as input the number of different
    actions our agent can perform in the environment through the `action_space` parameter, which we get from the task definition. We also define the shape of the inputs we are going to be passing to the model with `observation_space`
    We specify the names of our sensors with `goal_sensor_uuid` and `rgb_resnet_preprocessor_uuid`. Finally, we define
    the size of our RNN with `hidden_layer` and the size of the embedding of our goal sensor data (the direction and
    distance to the target) with `goal_dims`.
    """

    # %%
    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return ResnetTensorPointNavActorCritic(
            action_space=gym.spaces.Discrete(
                len(PointNavTask.class_action_names())),
            observation_space=kwargs["sensor_preprocessor_graph"].
            observation_spaces,
            goal_sensor_uuid="target_coordinates_ind",
            rgb_resnet_preprocessor_uuid="rgb_resnet",
            hidden_size=512,
            goal_dims=32,
        )

    # %%
    """
    We also need to define the task sampler that we will be using. This is a piece of code that generates instances
    of tasks for our agent to perform (essentially starting locations and targets for PointNav). Since we are getting
    our tasks from a dataset, the task sampler is a very simple code that just reads the specified file and sets
    the agent to the next starting locations whenever the agent exceeds the maximum number of steps or selects the
    `stop` action.
    """

    # %%
    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return PointNavDatasetTaskSampler(**kwargs)

    # %%
    """
    You might notice that we did not specify the task sampler's arguments, but are rather passing them in. The
    reason for this is that each process will have its own task sampler, and we need to specify exactly which scenes
    each process should work with. If we have several GPUS and many scenes this process of distributing the work can be rather complicated so we define a few helper functions to do just this.
    """

    # %%
    @staticmethod
    def _partition_inds(n: int, num_parts: int):
        return np.round(np.linspace(0, n, num_parts + 1,
                                    endpoint=True)).astype(np.int32)

    def _get_sampler_args_for_scene_split(
        self,
        scenes_dir: str,
        process_ind: int,
        total_processes: int,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        path = os.path.join(scenes_dir, "*.json.gz")
        scenes = [
            scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)
        ]
        if len(scenes) == 0:
            raise RuntimeError((
                "Could find no scene dataset information in directory {}."
                " Are you sure you've downloaded them? "
                " If not, see https://allenact.org/installation/download-datasets/ information"
                " on how this can be done.").format(scenes_dir))
        if total_processes > len(scenes):  # oversample some scenes -> bias
            if total_processes % len(scenes) != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisible by the number of scenes"
                )
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[:total_processes *
                            (len(scenes) // total_processes)]
        else:
            if len(scenes) % total_processes != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisor of the number of scenes"
                )
        inds = self._partition_inds(len(scenes), total_processes)

        return {
            "scenes":
            scenes[inds[process_ind]:inds[process_ind + 1]],
            "max_steps":
            self.MAX_STEPS,
            "sensors":
            self.SENSORS,
            "action_space":
            gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            "seed":
            seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn":
            deterministic_cudnn,
            "rewards_config":
            self.REWARD_CONFIG,
        }

    # %%
    """
    The very last things we need to define are the sampler arguments themselves. We define them separately for a train,
    validation, and test sampler, but in this case, they are almost the same. The arguments need to include the location
    of the dataset and distance cache as well as the environment arguments for our simulator, both of which we defined above
    and are just referencing here. The only consequential differences between these task samplers are the path to the dataset
    we are using (train or validation) and whether we want to loop over the dataset or not (we want this for training since
    we want to train for several epochs, but we do not need this for validation and testing). Since the test scenes of
    RoboTHOR are private we are also testing on our validation set.
    """

    # %%
    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            os.path.join(self.TRAIN_DATASET_DIR, "episodes"),
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_directory"] = self.TRAIN_DATASET_DIR
        res["loop_dataset"] = True
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        res["env_args"]["x_display"] = (("0.%d" %
                                         devices[process_ind % len(devices)])
                                        if devices is not None
                                        and len(devices) > 0 else None)
        res["allow_flipping"] = True
        return res

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            os.path.join(self.VAL_DATASET_DIR, "episodes"),
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_directory"] = self.VAL_DATASET_DIR
        res["loop_dataset"] = False
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        res["env_args"]["x_display"] = (("0.%d" %
                                         devices[process_ind % len(devices)])
                                        if devices is not None
                                        and len(devices) > 0 else None)
        return res

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            os.path.join(self.VAL_DATASET_DIR, "episodes"),
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_directory"] = self.VAL_DATASET_DIR
        res["loop_dataset"] = False
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        return res
コード例 #16
0
class ObjectNavRoboThorRGBPPOExperimentConfig(ExperimentConfig):
    """A Point Navigation experiment configuration in RoboThor."""

    # Task Parameters
    MAX_STEPS = 500
    REWARD_CONFIG = {
        "step_penalty": -0.01,
        "goal_success_reward": 10.0,
        "failed_stop_reward": 0.0,
        "shaping_weight": 1.0,
    }

    # Simulator Parameters
    CAMERA_WIDTH = 640
    CAMERA_HEIGHT = 480
    SCREEN_SIZE = 224

    # Training Engine Parameters
    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
    NUM_PROCESSES = 60
    TRAINING_GPUS = list(range(torch.cuda.device_count()))
    VALIDATION_GPUS = [torch.cuda.device_count() - 1]
    TESTING_GPUS = [torch.cuda.device_count() - 1]

    # Dataset Parameters
    TRAIN_DATASET_DIR = os.path.join(os.getcwd(),
                                     "datasets/ithor-objectnav/train")
    VAL_DATASET_DIR = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val")

    SENSORS = [
        RGBSensorThor(
            height=SCREEN_SIZE,
            width=SCREEN_SIZE,
            use_resnet_normalization=True,
            uuid="rgb_lowres",
        ),
        GPSCompassSensorRoboThor(),
    ]

    PREPROCESSORS = [
        Builder(
            ResNetPreprocessor,
            {
                "input_height": SCREEN_SIZE,
                "input_width": SCREEN_SIZE,
                "output_width": 7,
                "output_height": 7,
                "output_dims": 512,
                "pool": False,
                "torchvision_resnet_model": models.resnet18,
                "input_uuids": ["rgb_lowres"],
                "output_uuid": "rgb_resnet",
            },
        ),
    ]

    OBSERVATIONS = [
        "rgb_resnet",
        "target_coordinates_ind",
    ]

    ENV_ARGS = dict(
        width=CAMERA_WIDTH,
        height=CAMERA_HEIGHT,
        rotateStepDegrees=30.0,
        visibilityDistance=1.0,
        gridSize=0.25,
    )

    @classmethod
    def tag(cls):
        return "PointNavithorRGBPPO"

    @classmethod
    def training_pipeline(cls, **kwargs):
        ppo_steps = int(250000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 3
        num_steps = 30
        save_interval = 5000000
        log_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5
        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={"ppo_loss": PPO(**PPOConfig)},
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"],
                              max_stage_steps=ppo_steps)
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}),
        )

    def machine_params(self, mode="train", **kwargs):
        sampler_devices: Sequence[int] = []
        if mode == "train":
            workers_per_device = 1
            gpu_ids = ([] if not torch.cuda.is_available() else
                       self.TRAINING_GPUS * workers_per_device)
            nprocesses = (1 if not torch.cuda.is_available() else
                          evenly_distribute_count_into_bins(
                              self.NUM_PROCESSES, len(gpu_ids)))
            sampler_devices = self.TRAINING_GPUS
        elif mode == "valid":
            nprocesses = 1
            gpu_ids = [] if not torch.cuda.is_available(
            ) else self.VALIDATION_GPUS
        elif mode == "test":
            nprocesses = 1
            gpu_ids = [] if not torch.cuda.is_available(
            ) else self.TESTING_GPUS
        else:
            raise NotImplementedError(
                "mode must be 'train', 'valid', or 'test'.")

        sensor_preprocessor_graph = (SensorPreprocessorGraph(
            source_observation_spaces=SensorSuite(
                self.SENSORS).observation_spaces,
            preprocessors=self.PREPROCESSORS,
        ) if mode == "train" or (
            (isinstance(nprocesses, int) and nprocesses > 0) or
            (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)) else
                                     None)

        return MachineParams(
            nprocesses=nprocesses,
            devices=gpu_ids,
            sampler_devices=sampler_devices
            if mode == "train" else gpu_ids,  # ignored with > 1 gpu_ids
            sensor_preprocessor_graph=sensor_preprocessor_graph,
        )

    # Define Model
    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return ResnetTensorPointNavActorCritic(
            action_space=gym.spaces.Discrete(
                len(PointNavTask.class_action_names())),
            observation_space=kwargs["sensor_preprocessor_graph"].
            observation_spaces,
            goal_sensor_uuid="target_coordinates_ind",
            rgb_resnet_preprocessor_uuid="rgb_resnet",
            hidden_size=512,
            goal_dims=32,
        )

    # Define Task Sampler
    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return PointNavDatasetTaskSampler(**kwargs)

    # Utility Functions for distributing scenes between GPUs
    @staticmethod
    def _partition_inds(n: int, num_parts: int):
        return np.round(np.linspace(0, n, num_parts + 1,
                                    endpoint=True)).astype(np.int32)

    def _get_sampler_args_for_scene_split(
        self,
        scenes_dir: str,
        process_ind: int,
        total_processes: int,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        path = os.path.join(scenes_dir, "*.json.gz")
        scenes = [
            scene.split("/")[-1].split(".")[0] for scene in glob.glob(path)
        ]
        if len(scenes) == 0:
            raise RuntimeError((
                "Could find no scene dataset information in directory {}."
                " Are you sure you've downloaded them? "
                " If not, see https://allenact.org/installation/download-datasets/ information"
                " on how this can be done.").format(scenes_dir))
        if total_processes > len(scenes):  # oversample some scenes -> bias
            if total_processes % len(scenes) != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisible by the number of scenes"
                )
            scenes = scenes * int(ceil(total_processes / len(scenes)))
            scenes = scenes[:total_processes *
                            (len(scenes) // total_processes)]
        else:
            if len(scenes) % total_processes != 0:
                print(
                    "Warning: oversampling some of the scenes to feed all processes."
                    " You can avoid this by setting a number of workers divisor of the number of scenes"
                )
        inds = self._partition_inds(len(scenes), total_processes)

        return {
            "scenes":
            scenes[inds[process_ind]:inds[process_ind + 1]],
            "max_steps":
            self.MAX_STEPS,
            "sensors":
            self.SENSORS,
            "action_space":
            gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            "seed":
            seeds[process_ind] if seeds is not None else None,
            "deterministic_cudnn":
            deterministic_cudnn,
            "rewards_config":
            self.REWARD_CONFIG,
        }

    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            os.path.join(self.TRAIN_DATASET_DIR, "episodes"),
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_directory"] = self.TRAIN_DATASET_DIR
        res["loop_dataset"] = True
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        res["env_args"]["x_display"] = (("0.%d" %
                                         devices[process_ind % len(devices)])
                                        if devices is not None
                                        and len(devices) > 0 else None)
        res["allow_flipping"] = True
        return res

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            os.path.join(self.VAL_DATASET_DIR, "episodes"),
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_directory"] = self.VAL_DATASET_DIR
        res["loop_dataset"] = False
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        res["env_args"]["x_display"] = (("0.%d" %
                                         devices[process_ind % len(devices)])
                                        if devices is not None
                                        and len(devices) > 0 else None)
        return res

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        res = self._get_sampler_args_for_scene_split(
            os.path.join(self.VAL_DATASET_DIR, "episodes"),
            process_ind,
            total_processes,
            seeds=seeds,
            deterministic_cudnn=deterministic_cudnn,
        )
        res["scene_directory"] = self.VAL_DATASET_DIR
        res["loop_dataset"] = False
        res["env_args"] = {}
        res["env_args"].update(self.ENV_ARGS)
        return res
コード例 #17
0
class PointNavHabitatRGBPPOTutorialExperimentConfig(ExperimentConfig):
    """A Point Navigation experiment configuration in Habitat."""

    # Task Parameters
    MAX_STEPS = 500
    REWARD_CONFIG = {
        "step_penalty": -0.01,
        "goal_success_reward": 10.0,
        "failed_stop_reward": 0.0,
        "shaping_weight": 1.0,
    }
    DISTANCE_TO_GOAL = 0.2

    # Simulator Parameters
    CAMERA_WIDTH = 640
    CAMERA_HEIGHT = 480
    SCREEN_SIZE = 224

    # Training Engine Parameters
    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
    NUM_PROCESSES = max(5 * torch.cuda.device_count() - 1, 4)
    TRAINING_GPUS = list(range(torch.cuda.device_count()))
    VALIDATION_GPUS = [torch.cuda.device_count() - 1]
    TESTING_GPUS = [torch.cuda.device_count() - 1]

    task_data_dir_template = os.path.join(HABITAT_DATASETS_DIR,
                                          "pointnav/gibson/v1/{}/{}.json.gz")
    TRAIN_SCENES = task_data_dir_template.format(*(["train"] * 2))
    VALID_SCENES = task_data_dir_template.format(*(["val"] * 2))
    TEST_SCENES = task_data_dir_template.format(*(["test"] * 2))

    CONFIG = get_habitat_config(
        os.path.join(HABITAT_CONFIGS_DIR, "tasks/pointnav_gibson.yaml"))
    CONFIG.defrost()
    CONFIG.NUM_PROCESSES = NUM_PROCESSES
    CONFIG.SIMULATOR_GPU_IDS = TRAINING_GPUS
    CONFIG.DATASET.SCENES_DIR = "habitat/habitat-api/data/scene_datasets/"
    CONFIG.DATASET.POINTNAVV1.CONTENT_SCENES = ["*"]
    CONFIG.DATASET.DATA_PATH = TRAIN_SCENES
    CONFIG.SIMULATOR.AGENT_0.SENSORS = ["RGB_SENSOR"]
    CONFIG.SIMULATOR.RGB_SENSOR.WIDTH = CAMERA_WIDTH
    CONFIG.SIMULATOR.RGB_SENSOR.HEIGHT = CAMERA_HEIGHT
    CONFIG.SIMULATOR.TURN_ANGLE = 30
    CONFIG.SIMULATOR.FORWARD_STEP_SIZE = 0.25
    CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS = MAX_STEPS

    CONFIG.TASK.TYPE = "Nav-v0"
    CONFIG.TASK.SUCCESS_DISTANCE = DISTANCE_TO_GOAL
    CONFIG.TASK.SENSORS = ["POINTGOAL_WITH_GPS_COMPASS_SENSOR"]
    CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.GOAL_FORMAT = "POLAR"
    CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.DIMENSIONALITY = 2
    CONFIG.TASK.GOAL_SENSOR_UUID = "pointgoal_with_gps_compass"
    CONFIG.TASK.MEASUREMENTS = ["DISTANCE_TO_GOAL", "SUCCESS", "SPL"]
    CONFIG.TASK.SPL.TYPE = "SPL"
    CONFIG.TASK.SPL.SUCCESS_DISTANCE = DISTANCE_TO_GOAL
    CONFIG.TASK.SUCCESS.SUCCESS_DISTANCE = DISTANCE_TO_GOAL

    CONFIG.MODE = "train"

    SENSORS = [
        RGBSensorHabitat(
            height=SCREEN_SIZE,
            width=SCREEN_SIZE,
            use_resnet_normalization=True,
        ),
        TargetCoordinatesSensorHabitat(coordinate_dims=2),
    ]

    PREPROCESSORS = [
        Builder(
            ResNetPreprocessor,
            {
                "input_height": SCREEN_SIZE,
                "input_width": SCREEN_SIZE,
                "output_width": 7,
                "output_height": 7,
                "output_dims": 512,
                "pool": False,
                "torchvision_resnet_model": models.resnet18,
                "input_uuids": ["rgb_lowres"],
                "output_uuid": "rgb_resnet",
            },
        ),
    ]

    OBSERVATIONS = [
        "rgb_resnet",
        "target_coordinates_ind",
    ]

    TRAIN_CONFIGS = construct_env_configs(CONFIG)

    @classmethod
    def tag(cls):
        return "PointNavHabitatRGBPPO"

    @classmethod
    def training_pipeline(cls, **kwargs):
        ppo_steps = int(250000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 3
        num_steps = 30
        save_interval = 5000000
        log_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5
        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={"ppo_loss": PPO(**PPOConfig)},
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"],
                              max_stage_steps=ppo_steps)
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}),
        )

    def machine_params(self, mode="train", **kwargs):
        if mode == "train":
            workers_per_device = 1
            gpu_ids = ([] if not torch.cuda.is_available() else
                       self.TRAINING_GPUS * workers_per_device)
            nprocesses = (1 if not torch.cuda.is_available() else
                          evenly_distribute_count_into_bins(
                              self.NUM_PROCESSES, len(gpu_ids)))
        elif mode == "valid":
            nprocesses = 1
            gpu_ids = [] if not torch.cuda.is_available(
            ) else self.VALIDATION_GPUS
        elif mode == "test":
            nprocesses = 1
            gpu_ids = [] if not torch.cuda.is_available(
            ) else self.TESTING_GPUS
        else:
            raise NotImplementedError(
                "mode must be 'train', 'valid', or 'test'.")

        sensor_preprocessor_graph = (SensorPreprocessorGraph(
            source_observation_spaces=SensorSuite(
                self.SENSORS).observation_spaces,
            preprocessors=self.PREPROCESSORS,
        ) if mode == "train" or (
            (isinstance(nprocesses, int) and nprocesses > 0) or
            (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)) else
                                     None)

        return MachineParams(
            nprocesses=nprocesses,
            devices=gpu_ids,
            sensor_preprocessor_graph=sensor_preprocessor_graph,
        )

    # Define Model
    @classmethod
    def create_model(cls, **kwargs) -> nn.Module:
        return ResnetTensorPointNavActorCritic(
            action_space=gym.spaces.Discrete(
                len(PointNavTask.class_action_names())),
            observation_space=kwargs["sensor_preprocessor_graph"].
            observation_spaces,
            goal_sensor_uuid="target_coordinates_ind",
            rgb_resnet_preprocessor_uuid="rgb_resnet",
            hidden_size=512,
            goal_dims=32,
        )

    # Define Task Sampler
    @classmethod
    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
        return PointNavTaskSampler(**kwargs)

    def train_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        config = self.TRAIN_CONFIGS[process_ind]
        return {
            "env_config":
            config,
            "max_steps":
            self.MAX_STEPS,
            "sensors":
            self.SENSORS,
            "action_space":
            gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            "distance_to_goal":
            self.DISTANCE_TO_GOAL,  # type:ignore
        }

    def valid_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        config = self.CONFIG.clone()
        config.defrost()
        config.DATASET.DATA_PATH = self.VALID_SCENES
        config.MODE = "validate"
        config.freeze()
        return {
            "env_config":
            config,
            "max_steps":
            self.MAX_STEPS,
            "sensors":
            self.SENSORS,
            "action_space":
            gym.spaces.Discrete(len(PointNavTask.class_action_names())),
            "distance_to_goal":
            self.DISTANCE_TO_GOAL,  # type:ignore
        }

    def test_task_sampler_args(
        self,
        process_ind: int,
        total_processes: int,
        devices: Optional[List[int]] = None,
        seeds: Optional[List[int]] = None,
        deterministic_cudnn: bool = False,
    ) -> Dict[str, Any]:
        raise NotImplementedError("Testing not implemented for this tutorial.")
コード例 #18
0
    def training_pipeline(self, **kwargs):
        # These params are identical to the baseline configuration for 60 samplers (1 machine)
        ppo_steps = int(300e6)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 4
        num_steps = 128
        save_interval = 5000000
        log_interval = 10000 if torch.cuda.is_available() else 1
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5

        # We add 30 million steps for small batch learning
        small_batch_steps = int(30e6)
        # And a short transition phase towards large learning rate
        # (see comment in the `lr_scheduler` helper method
        transition_steps = int(2 / 3 * self.distributed_nodes * 1e6)

        # Find exact number of samplers per GPU
        assert (self.num_train_processes % len(self.train_gpu_ids) == 0
                ), "Expected uniform number of samplers per GPU"
        samplers_per_gpu = self.num_train_processes // len(self.train_gpu_ids)

        # Multiply num_mini_batch by the largest divisor of
        # samplers_per_gpu to keep all batches of same size:
        num_mini_batch_multiplier = [
            i for i in reversed(
                range(1,
                      min(samplers_per_gpu // 2, self.distributed_nodes) + 1))
            if samplers_per_gpu % i == 0
        ][0]

        # Multiply update_repeats so that the product of this factor and
        # num_mini_batch_multiplier is >= self.distributed_nodes:
        update_repeats_multiplier = int(
            math.ceil(self.distributed_nodes / num_mini_batch_multiplier))

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={"ppo_loss": PPO(**PPOConfig, show_ratios=False)},
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                # We increase the number of batches for the first stage to reach an
                # equivalent number of updates per collected rollout data as in the
                # 1 node/60 samplers setting
                PipelineStage(
                    loss_names=["ppo_loss"],
                    max_stage_steps=small_batch_steps,
                    num_mini_batch=num_mini_batch * num_mini_batch_multiplier,
                    update_repeats=update_repeats * update_repeats_multiplier,
                ),
                # The we proceed with the base configuration (leading to larger
                # batches due to the increased number of samplers)
                PipelineStage(
                    loss_names=["ppo_loss"],
                    max_stage_steps=ppo_steps - small_batch_steps,
                ),
            ],
            # We use the MultiLinearDecay curve defined by the helper function,
            # setting the learning rate scaling as the square root of the number
            # of nodes. Linear scaling might also works, but we leave that
            # check to the reader.
            lr_scheduler_builder=Builder(
                LambdaLR,
                {
                    "lr_lambda":
                    self.lr_scheduler(
                        small_batch_steps=small_batch_steps,
                        transition_steps=transition_steps,
                        ppo_steps=ppo_steps,
                        lr_scaling=math.sqrt(self.distributed_nodes),
                    )
                },
            ),
        )