コード例 #1
0
    def training_pipeline(cls, **kwargs):
        ppo_steps = int(1e6)
        lr = 2.5e-4
        num_mini_batch = 2 if not torch.cuda.is_available() else 6
        update_repeats = 4
        num_steps = 128
        metric_accumulate_interval = cls.MAX_STEPS * 10  # Log every 10 max length tasks
        save_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 1.0
        max_grad_norm = 0.5

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=metric_accumulate_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={
                "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),
            },
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,),
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
            ),
        )
コード例 #2
0
    def training_pipeline(self, **kwargs):
        training_steps = int(300000000)
        tf_steps = int(5e6)
        anneal_steps = int(5e6)
        il_no_tf_steps = training_steps - tf_steps - anneal_steps
        assert il_no_tf_steps > 0

        lr = 3e-4
        num_mini_batch = 2 if torch.cuda.is_available() else 1
        update_repeats = 4
        num_steps = 30
        save_interval = 5000000
        log_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5
        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={
                "imitation_loss": Imitation(),
            },
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(
                    loss_names=["imitation_loss"],
                    max_stage_steps=tf_steps,
                    teacher_forcing=LinearDecay(
                        startp=1.0,
                        endp=1.0,
                        steps=tf_steps,
                    ),
                ),
                PipelineStage(
                    loss_names=["imitation_loss"],
                    max_stage_steps=anneal_steps + il_no_tf_steps,
                    teacher_forcing=LinearDecay(
                        startp=1.0,
                        endp=0.0,
                        steps=anneal_steps,
                    ),
                ),
            ],
            lr_scheduler_builder=Builder(
                LambdaLR,
                {"lr_lambda": LinearDecay(steps=training_steps)},
            ),
        )
コード例 #3
0
 def training_pipeline(cls, **kwargs):
     ppo_steps = int(250000000)
     lr = 3e-4
     num_mini_batch = 1
     update_repeats = 3
     num_steps = 30
     save_interval = 5000000
     log_interval = 1000
     gamma = 0.99
     use_gae = True
     gae_lambda = 0.95
     max_grad_norm = 0.5
     return TrainingPipeline(
         save_interval=save_interval,
         metric_accumulate_interval=log_interval,
         optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
         num_mini_batch=num_mini_batch,
         update_repeats=update_repeats,
         max_grad_norm=max_grad_norm,
         num_steps=num_steps,
         named_losses={"ppo_loss": PPO(**PPOConfig)},
         gamma=gamma,
         use_gae=use_gae,
         gae_lambda=gae_lambda,
         advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
         pipeline_stages=[
             PipelineStage(loss_names=["ppo_loss"],
                           max_stage_steps=ppo_steps)
         ],
         lr_scheduler_builder=Builder(
             LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}),
     )
コード例 #4
0
    def training_pipeline(cls, **kwargs):
        total_train_steps = cls.TOTAL_IL_TRAIN_STEPS

        ppo_info = cls.rl_loss_default("ppo", steps=-1)
        imitation_info = cls.rl_loss_default("imitation")

        return cls._training_pipeline(
            named_losses={
                "imitation_loss": imitation_info["loss"],
            },
            pipeline_stages=[
                PipelineStage(
                    loss_names=["imitation_loss"],
                    teacher_forcing=LinearDecay(
                        startp=1.0,
                        endp=1.0,
                        steps=total_train_steps,
                    ),
                    max_stage_steps=total_train_steps,
                ),
            ],
            num_mini_batch=min(info["num_mini_batch"]
                               for info in [ppo_info, imitation_info]),
            update_repeats=min(info["update_repeats"]
                               for info in [ppo_info, imitation_info]),
            total_train_steps=total_train_steps,
        )
コード例 #5
0
ファイル: nav_base.py プロジェクト: jvrana/allenact
 def training_pipeline(cls, **kwargs):
     ppo_steps = int(1e6)
     return TrainingPipeline(
         save_interval=200000,
         metric_accumulate_interval=1,
         optimizer_builder=Builder(optim.Adam, dict(lr=3e-4)),
         num_mini_batch=2,
         update_repeats=3,
         max_grad_norm=0.5,
         num_steps=30,
         named_losses={
             "ppo_loss": Builder(
                 PPO,
                 kwargs={},
                 default=PPOConfig,
             )
         },
         gamma=0.99,
         use_gae=True,
         gae_lambda=0.95,
         advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
         pipeline_stages=[
             PipelineStage(loss_names=["ppo_loss"],
                           max_stage_steps=ppo_steps)
         ],
         lr_scheduler_builder=Builder(
             LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}),
     )
コード例 #6
0
ファイル: base.py プロジェクト: tzadouri/allenact
 def rl_loss_default(cls, alg: str, steps: Optional[int] = None):
     if alg == "ppo":
         assert steps is not None
         return {
             "loss":
             Builder(
                 PPO,
                 kwargs={"clip_decay": LinearDecay(steps)},
                 default=PPOConfig,
             ),
             "num_mini_batch":
             cls.PPO_NUM_MINI_BATCH,
             "update_repeats":
             4,
         }
     elif alg == "a2c":
         return {
             "loss": A2C(**A2CConfig),
             "num_mini_batch": 1,
             "update_repeats": 1,
         }
     elif alg == "imitation":
         return {
             "loss": Imitation(),
             "num_mini_batch": cls.PPO_NUM_MINI_BATCH,
             "update_repeats": 4,
         }
     else:
         raise NotImplementedError
コード例 #7
0
    def training_pipeline(self, **kwargs):
        ppo_steps = int(300000000)
        lr = 3e-4
        num_mini_batch = 1
        update_repeats = 4
        num_steps = 128
        save_interval = 5000000
        log_interval = 10000
        gamma = 0.99
        use_gae = True
        gae_lambda = 0.95
        max_grad_norm = 0.5

        action_strs = ObjectNavTask.class_action_names()
        non_end_action_inds_set = {
            i
            for i, a in enumerate(action_strs) if a != robothor_constants.END
        }
        end_action_ind_set = {action_strs.index(robothor_constants.END)}

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=log_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses={
                "ppo_loss":
                PPO(**PPOConfig),
                "grouped_action_imitation":
                GroupedActionImitation(
                    nactions=len(ObjectNavTask.class_action_names()),
                    action_groups=[
                        non_end_action_inds_set, end_action_ind_set
                    ],
                ),
            },
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD,
            pipeline_stages=[
                PipelineStage(
                    loss_names=["ppo_loss", "grouped_action_imitation"],
                    max_stage_steps=ppo_steps,
                )
            ],
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}),
        )
コード例 #8
0
ファイル: base.py プロジェクト: tzadouri/allenact
    def _training_pipeline(  # type:ignore
        cls,
        named_losses: Dict[str, Union[Loss, Builder]],
        pipeline_stages: List[PipelineStage],
        num_mini_batch: int,
        update_repeats: int,
        total_train_steps: int,
        lr: Optional[float] = None,
    ):
        lr = cls.DEFAULT_LR

        num_steps = cls.ROLLOUT_STEPS
        metric_accumulate_interval = (
            cls.METRIC_ACCUMULATE_INTERVAL()
        )  # Log every 10 max length tasks
        save_interval = 2 ** 31
        gamma = 0.99

        use_gae = "reinforce_loss" not in named_losses
        gae_lambda = 0.99
        max_grad_norm = 0.5

        return TrainingPipeline(
            save_interval=save_interval,
            metric_accumulate_interval=metric_accumulate_interval,
            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
            num_mini_batch=num_mini_batch,
            update_repeats=update_repeats,
            max_grad_norm=max_grad_norm,
            num_steps=num_steps,
            named_losses=named_losses,
            gamma=gamma,
            use_gae=use_gae,
            gae_lambda=gae_lambda,
            advance_scene_rollout_period=None,
            should_log=cls.SHOULD_LOG,
            pipeline_stages=pipeline_stages,
            lr_scheduler_builder=Builder(
                LambdaLR, {"lr_lambda": LinearDecay(steps=total_train_steps)}  # type: ignore
            )
            if cls.USE_LR_DECAY
            else None,
        )
コード例 #9
0
 def training_pipeline(cls, **kwargs):
     total_train_steps = cls.TOTAL_IL_TRAIN_STEPS
     loss_info = cls.rl_loss_default("imitation")
     return cls._training_pipeline(
         named_losses={"imitation_loss": loss_info["loss"]},
         pipeline_stages=[
             PipelineStage(
                 loss_names=["imitation_loss"],
                 teacher_forcing=LinearDecay(
                     startp=1.0,
                     endp=0.0,
                     steps=total_train_steps // 2,
                 ),
                 max_stage_steps=total_train_steps,
             )
         ],
         num_mini_batch=loss_info["num_mini_batch"],
         update_repeats=loss_info["update_repeats"],
         total_train_steps=total_train_steps,
     )
コード例 #10
0
 def training_pipeline(cls, **kwargs) -> TrainingPipeline:
     ppo_steps = int(150000)
     return TrainingPipeline(
         named_losses=dict(ppo_loss=PPO(**PPOConfig)),  # type:ignore
         pipeline_stages=[
             PipelineStage(loss_names=["ppo_loss"],
                           max_stage_steps=ppo_steps)
         ],
         optimizer_builder=Builder(optim.Adam, dict(lr=1e-4)),
         num_mini_batch=4,
         update_repeats=3,
         max_grad_norm=0.5,
         num_steps=16,
         gamma=0.99,
         use_gae=True,
         gae_lambda=0.95,
         advance_scene_rollout_period=None,
         save_interval=10000,
         metric_accumulate_interval=1,
         lr_scheduler_builder=Builder(
             LambdaLR,
             {"lr_lambda": LinearDecay(steps=ppo_steps)}  # type:ignore
         ),
     )