def training_pipeline(cls, **kwargs) -> TrainingPipeline: ppo_steps = int(150000) return TrainingPipeline( named_losses=dict(ppo_loss=PPO(**PPOConfig)), # type:ignore pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)), num_mini_batch=4, update_repeats=3, max_grad_norm=0.5, num_steps=16, gamma=0.99, use_gae=True, gae_lambda=0.95, advance_scene_rollout_period=None, save_interval=10000, metric_accumulate_interval=1, lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} # type:ignore ), )
def training_pipeline(cls, **kwargs) -> TrainingPipeline: ppo_steps = int(1.2e6) return TrainingPipeline( named_losses=dict(ppo_loss=PPO( clip_param=0.2, value_loss_coef=0.5, entropy_coef=0.0, ), ), # type:ignore pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps), ], optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-3)), num_mini_batch=1, update_repeats=80, max_grad_norm=100, num_steps=2000, gamma=0.99, use_gae=False, gae_lambda=0.95, advance_scene_rollout_period=None, save_interval=200000, metric_accumulate_interval=50000, lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}, # type:ignore ), )
def training_pipeline(cls, **kwargs): ppo_steps = int(10000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 3 num_steps = 30 save_interval = 1000000 log_interval = 100 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": PPO(**PPOConfig), "yn_im_loss": YesNoImitation(yes_action_index=ObjectPlacementTask.class_action_names().index(END)), }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss", "yn_im_loss"], max_stage_steps=ppo_steps) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), )
def training_pipeline(cls, **kwargs) -> TrainingPipeline: ppo_steps = int(150000) return TrainingPipeline( named_losses=dict( imitation_loss=Imitation( cls.SENSORS[1] ), # 0 is Minigrid, 1 is ExpertActionSensor ppo_loss=PPO(**PPOConfig, entropy_method_name="conditional_entropy"), ), # type:ignore pipeline_stages=[ PipelineStage( teacher_forcing=LinearDecay( startp=1.0, endp=0.0, steps=ppo_steps // 2, ), loss_names=["imitation_loss", "ppo_loss"], max_stage_steps=ppo_steps, ) ], optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=1e-4)), num_mini_batch=4, update_repeats=3, max_grad_norm=0.5, num_steps=16, gamma=0.99, use_gae=True, gae_lambda=0.95, advance_scene_rollout_period=None, save_interval=10000, metric_accumulate_interval=1, lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} # type:ignore ), )
def training_pipeline(cls, **kwargs): ppo_steps = int(10000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 3 num_steps = 30 save_interval = 1000000 log_interval = 100 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": PPO(**PPOConfig), "nie_loss": NIE_Reg( agent_pose_uuid="agent_pose_global", pose_uuid="object_pose_global", local_keypoints_uuid="3Dkeypoints_local", global_keypoints_uuid="3Dkeypoints_global", obj_update_mask_uuid="object_update_mask", obj_action_mask_uuid="object_action_mask", ), }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss", "nie_loss"], max_stage_steps=ppo_steps) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}), )
def training_pipeline(cls, **kwargs) -> TrainingPipeline: lr = 1e-4 ppo_steps = int(8e7) # convergence may be after 1e8 clip_param = 0.1 value_loss_coef = 0.5 entropy_coef = 0.0 num_mini_batch = 4 # optimal 64 update_repeats = 10 max_grad_norm = 0.5 num_steps = 2048 gamma = 0.99 use_gae = True gae_lambda = 0.95 advance_scene_rollout_period = None save_interval = 200000 metric_accumulate_interval = 50000 return TrainingPipeline( named_losses=dict(ppo_loss=PPO( clip_param=clip_param, value_loss_coef=value_loss_coef, entropy_coef=entropy_coef, ), ), # type:ignore pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps), ], optimizer_builder=Builder(cast(optim.Optimizer, optim.Adam), dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=advance_scene_rollout_period, save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps, startp=1, endp=1) }, # constant learning rate ), )