Beispiel #1
0
 def rl_loss_default(cls, alg: str, steps: Optional[int] = None):
     if alg == "ppo":
         assert steps is not None
         return {
             "loss":
             Builder(
                 PPO,
                 kwargs={"clip_decay": LinearDecay(steps)},
                 default=PPOConfig,
             ),
             "num_mini_batch":
             cls.PPO_NUM_MINI_BATCH,
             "update_repeats":
             4,
         }
     elif alg == "a2c":
         return {
             "loss": A2C(**A2CConfig),
             "num_mini_batch": 1,
             "update_repeats": 1,
         }
     elif alg == "imitation":
         return {
             "loss": Imitation(),
             "num_mini_batch": cls.PPO_NUM_MINI_BATCH,
             "update_repeats": 4,
         }
     else:
         raise NotImplementedError
    def _training_pipeline_info(cls) -> Dict[str, Any]:
        """Define how the model trains."""

        training_steps = cls.TRAINING_STEPS
        il_params = cls._use_label_to_get_training_params()
        bc_tf1_steps = il_params["bc_tf1_steps"]
        dagger_steps = il_params["dagger_steps"]

        return dict(
            named_losses=dict(
                walkthrough_ppo_loss=MaskedPPO(
                    mask_uuid="in_walkthrough_phase",
                    ppo_params=dict(
                        clip_decay=LinearDecay(training_steps), **PPOConfig
                    ),
                ),
                imitation_loss=Imitation(),
            ),
            pipeline_stages=[
                PipelineStage(
                    loss_names=["walkthrough_ppo_loss", "imitation_loss"],
                    max_stage_steps=training_steps,
                    teacher_forcing=StepwiseLinearDecay(
                        cumm_steps_and_values=[
                            (bc_tf1_steps, 1.0),
                            (bc_tf1_steps + dagger_steps, 0.0),
                        ]
                    ),
                )
            ],
            **il_params,
        )
 def training_pipeline(cls, **kwargs):
     imitate_steps = int(75000000)
     lr = 3e-4
     num_mini_batch = 1
     update_repeats = 3
     num_steps = 30
     save_interval = 5000000
     log_interval = 10000 if torch.cuda.is_available() else 1
     gamma = 0.99
     use_gae = True
     gae_lambda = 0.95
     max_grad_norm = 0.5
     return TrainingPipeline(
         save_interval=save_interval,
         metric_accumulate_interval=log_interval,
         optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
         num_mini_batch=num_mini_batch,
         update_repeats=update_repeats,
         max_grad_norm=max_grad_norm,
         num_steps=num_steps,
         named_losses={"imitation_loss": Imitation()},
         gamma=gamma,
         use_gae=use_gae,
         gae_lambda=gae_lambda,
         advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
         pipeline_stages=[
             PipelineStage(
                 loss_names=["imitation_loss"],
                 max_stage_steps=imitate_steps,
                 # teacher_forcing=LinearDecay(steps=int(1e5), startp=1.0, endp=0.0,),
             ),
         ],
         lr_scheduler_builder=Builder(
             LambdaLR, {"lr_lambda": LinearDecay(steps=imitate_steps)}),
     )
Beispiel #4
0
 def training_pipeline(cls, **kwargs) -> TrainingPipeline:
     ppo_steps = int(150000)
     return TrainingPipeline(
         named_losses=dict(
             imitation_loss=Imitation(
                 cls.SENSORS[1]
             ),  # 0 is Minigrid, 1 is ExpertActionSensor
             ppo_loss=PPO(**PPOConfig, entropy_method_name="conditional_entropy"),
         ),  # type:ignore
         pipeline_stages=[
             PipelineStage(
                 teacher_forcing=LinearDecay(
                     startp=1.0, endp=0.0, steps=ppo_steps // 2,
                 ),
                 loss_names=["imitation_loss", "ppo_loss"],
                 max_stage_steps=ppo_steps,
             )
         ],
         optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)),
         num_mini_batch=4,
         update_repeats=3,
         max_grad_norm=0.5,
         num_steps=16,
         gamma=0.99,
         use_gae=True,
         gae_lambda=0.95,
         advance_scene_rollout_period=None,
         save_interval=10000,
         metric_accumulate_interval=1,
         lr_scheduler_builder=Builder(
             LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}  # type:ignore
         ),
     )
Beispiel #5
0
    def training_pipeline(cls, **kwargs):
        dagger_steos = int(1e4)
        ppo_steps = int(1e6)
        lr = 2.5e-4
        num_mini_batch = 2 if not torch.cuda.is_available() else 6
        update_repeats = 4
        num_steps = 128
        metric_accumulate_interval = cls.MAX_STEPS * 10  # Log every 10 max length tasks
        save_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 1.0
        max_grad_norm = 0.5

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=metric_accumulate_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={
                "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps),
                                **PPOConfig),
                "imitation_loss": Imitation(),  # We add an imitation loss.
            },
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(
                    loss_names=["imitation_loss"],
                    teacher_forcing=LinearDecay(
                        startp=1.0,
                        endp=0.0,
                        steps=dagger_steos,
                    ),
                    max_stage_steps=dagger_steos,
                ),
                PipelineStage(
                    loss_names=["ppo_loss"],
                    max_stage_steps=ppo_steps,
                ),
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}),
        )
Beispiel #6
0
    def _training_pipeline_info(cls, **kwargs) -> Dict[str, Any]:
        """Define how the model trains."""

        training_steps = cls.TRAINING_STEPS
        params = cls._use_label_to_get_training_params()
        bc_tf1_steps = params["bc_tf1_steps"]
        dagger_steps = params["dagger_steps"]

        return dict(
            named_losses=dict(imitation_loss=Imitation()),
            pipeline_stages=[
                PipelineStage(
                    loss_names=["imitation_loss"],
                    max_stage_steps=training_steps,
                    teacher_forcing=StepwiseLinearDecay(
                        cumm_steps_and_values=[
                            (bc_tf1_steps, 1.0),
                            (bc_tf1_steps + dagger_steps, 0.0),
                        ]
                    ),
                )
            ],
            **params
        )