def training_pipeline(cls, **kwargs): ppo_steps = int(1e6) lr = 2.5e-4 num_mini_batch = 2 if not torch.cuda.is_available() else 6 update_repeats = 4 num_steps = 128 metric_accumulate_interval = cls.MAX_STEPS * 10 # Log every 10 max length tasks save_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 1.0 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig), }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,), ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), )
def training_pipeline(self, **kwargs): training_steps = int(300000000) tf_steps = int(5e6) anneal_steps = int(5e6) il_no_tf_steps = training_steps - tf_steps - anneal_steps assert il_no_tf_steps > 0 lr = 3e-4 num_mini_batch = 2 if torch.cuda.is_available() else 1 update_repeats = 4 num_steps = 30 save_interval = 5000000 log_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "imitation_loss": Imitation(), }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage( loss_names=["imitation_loss"], max_stage_steps=tf_steps, teacher_forcing=LinearDecay( startp=1.0, endp=1.0, steps=tf_steps, ), ), PipelineStage( loss_names=["imitation_loss"], max_stage_steps=anneal_steps + il_no_tf_steps, teacher_forcing=LinearDecay( startp=1.0, endp=0.0, steps=anneal_steps, ), ), ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=training_steps)}, ), )
def training_pipeline(cls, **kwargs): ppo_steps = int(250000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 3 num_steps = 30 save_interval = 5000000 log_interval = 1000 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={"ppo_loss": PPO(**PPOConfig)}, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}), )
def training_pipeline(cls, **kwargs): total_train_steps = cls.TOTAL_IL_TRAIN_STEPS ppo_info = cls.rl_loss_default("ppo", steps=-1) imitation_info = cls.rl_loss_default("imitation") return cls._training_pipeline( named_losses={ "imitation_loss": imitation_info["loss"], }, pipeline_stages=[ PipelineStage( loss_names=["imitation_loss"], teacher_forcing=LinearDecay( startp=1.0, endp=1.0, steps=total_train_steps, ), max_stage_steps=total_train_steps, ), ], num_mini_batch=min(info["num_mini_batch"] for info in [ppo_info, imitation_info]), update_repeats=min(info["update_repeats"] for info in [ppo_info, imitation_info]), total_train_steps=total_train_steps, )
def training_pipeline(cls, **kwargs): ppo_steps = int(1e6) return TrainingPipeline( save_interval=200000, metric_accumulate_interval=1, optimizer_builder=Builder(optim.Adam, dict(lr=3e-4)), num_mini_batch=2, update_repeats=3, max_grad_norm=0.5, num_steps=30, named_losses={ "ppo_loss": Builder( PPO, kwargs={}, default=PPOConfig, ) }, gamma=0.99, use_gae=True, gae_lambda=0.95, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}), )
def rl_loss_default(cls, alg: str, steps: Optional[int] = None): if alg == "ppo": assert steps is not None return { "loss": Builder( PPO, kwargs={"clip_decay": LinearDecay(steps)}, default=PPOConfig, ), "num_mini_batch": cls.PPO_NUM_MINI_BATCH, "update_repeats": 4, } elif alg == "a2c": return { "loss": A2C(**A2CConfig), "num_mini_batch": 1, "update_repeats": 1, } elif alg == "imitation": return { "loss": Imitation(), "num_mini_batch": cls.PPO_NUM_MINI_BATCH, "update_repeats": 4, } else: raise NotImplementedError
def training_pipeline(self, **kwargs): ppo_steps = int(300000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 4 num_steps = 128 save_interval = 5000000 log_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 action_strs = ObjectNavTask.class_action_names() non_end_action_inds_set = { i for i, a in enumerate(action_strs) if a != robothor_constants.END } end_action_ind_set = {action_strs.index(robothor_constants.END)} return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": PPO(**PPOConfig), "grouped_action_imitation": GroupedActionImitation( nactions=len(ObjectNavTask.class_action_names()), action_groups=[ non_end_action_inds_set, end_action_ind_set ], ), }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage( loss_names=["ppo_loss", "grouped_action_imitation"], max_stage_steps=ppo_steps, ) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}), )
def _training_pipeline( # type:ignore cls, named_losses: Dict[str, Union[Loss, Builder]], pipeline_stages: List[PipelineStage], num_mini_batch: int, update_repeats: int, total_train_steps: int, lr: Optional[float] = None, ): lr = cls.DEFAULT_LR num_steps = cls.ROLLOUT_STEPS metric_accumulate_interval = ( cls.METRIC_ACCUMULATE_INTERVAL() ) # Log every 10 max length tasks save_interval = 2 ** 31 gamma = 0.99 use_gae = "reinforce_loss" not in named_losses gae_lambda = 0.99 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses=named_losses, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=None, should_log=cls.SHOULD_LOG, pipeline_stages=pipeline_stages, lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=total_train_steps)} # type: ignore ) if cls.USE_LR_DECAY else None, )
def training_pipeline(cls, **kwargs): total_train_steps = cls.TOTAL_IL_TRAIN_STEPS loss_info = cls.rl_loss_default("imitation") return cls._training_pipeline( named_losses={"imitation_loss": loss_info["loss"]}, pipeline_stages=[ PipelineStage( loss_names=["imitation_loss"], teacher_forcing=LinearDecay( startp=1.0, endp=0.0, steps=total_train_steps // 2, ), max_stage_steps=total_train_steps, ) ], num_mini_batch=loss_info["num_mini_batch"], update_repeats=loss_info["update_repeats"], total_train_steps=total_train_steps, )
def training_pipeline(cls, **kwargs) -> TrainingPipeline: ppo_steps = int(150000) return TrainingPipeline( named_losses=dict(ppo_loss=PPO(**PPOConfig)), # type:ignore pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], optimizer_builder=Builder(optim.Adam, dict(lr=1e-4)), num_mini_batch=4, update_repeats=3, max_grad_norm=0.5, num_steps=16, gamma=0.99, use_gae=True, gae_lambda=0.95, advance_scene_rollout_period=None, save_interval=10000, metric_accumulate_interval=1, lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} # type:ignore ), )