def set_env(config: Config, expand: ImageSetting) -> None: # ppo parameters config.nworkers = 32 config.nsteps = 125 config.value_loss_weight = 0.5 config.entropy_weight = 0.01 config.gae_tau = 0.95 config.use_gae = True config.ppo_minibatch_size = 400 config.ppo_clip = 0.1 config.lr_decay = False config.set_parallel_env(lambda _env_gen, _num_w: ParallelRogueEnvExt( StairRewardParallel( [CONFIG] * config.nworkers, max_steps=500, stair_reward=50.0, image_setting=expand, ))) config.eval_env = RogueEnvExt( StairRewardEnv( RogueEnv(config_dict=CONFIG, mex_steps=500, stair_reward=50.0, image_setting=expand), 100.0)) config.max_steps = int(2e7) * 2 config.eval_freq = None config.save_freq = int(2e6)
def main( envname: str = "Hopper", tau: float = 12 * 20, update_freq: int = 10, ) -> Config: c = Config() c.max_steps = int(4e5) c.nworkers = 12 c.nsteps = 20 c.set_env(lambda: PyBullet(envname)) c.set_net_fn("actor-critic", net.actor_critic.fc_shared(policy=SeparateStdGaussianDist)) c.set_parallel_env(pybullet_parallel()) c.set_optimizer(kfac.default_sgd(eta_max=0.1)) c.set_preconditioner(lambda net: kfac.KfacPreConditioner( net, tau=tau, update_freq=update_freq, norm_scaler=kfac.SquaredFisherScaler(eta_max=0.1, delta=0.001), )) c.gae_lambda = 0.95 c.use_gae = True c.eval_deterministic = False c.value_loss_weight = 0.5 c.entropy_weight = 0.0 c.eval_freq = None return c
def config() -> Config: c = Config() c.max_steps = int(4e5) c.nworkers = 12 c.nsteps = 20 c.set_parallel_env(MultiProcEnv) c.set_optimizer(kfac.default_sgd(eta_max=0.1)) c.set_preconditioner( lambda net: kfac.KfacPreConditioner(net, **KFAC_KWARGS)) c.gae_lambda = 0.95 c.use_gae = False c.lr_min = 0.0 c.value_loss_weight = 0.1 c.entropy_weight = 0.01 c.eval_freq = None return c
def config() -> Config: c = Config() c.set_env(lambda: PyBullet('Hopper')) c.set_net_fn('actor-critic', net.actor_critic.fc_shared(policy=SeparateStdGaussinanHead)) c.set_parallel_env(pybullet_parallel()) c.max_steps = int(1e6) c.nworkers = 12 c.nsteps = 5 c.set_optimizer(lambda params: Adam(params, lr=0.001)) c.grad_clip = 0.5 c.gae_lambda = 0.95 c.value_loss_weight = 0.5 c.entropy_weight = 0.0 c.use_gae = False c.eval_deterministic = False c.eval_freq = None return c
def config() -> Config: c = Config() c.max_steps = int(4e5) c.nworkers = 12 c.nsteps = 20 c.set_env(lambda: PyBullet('Hopper')) c.set_net_fn('actor-critic', net.actor_critic.fc_shared(policy=SeparateStdGaussinanHead)) c.set_parallel_env(pybullet_parallel()) c.set_optimizer(kfac.default_sgd(eta_max=0.1)) c.set_preconditioner( lambda net: kfac.KfacPreConditioner(net, **KFAC_KWARGS)) c.gae_lambda = 0.95 c.use_gae = True c.eval_deterministic = False c.value_loss_weight = 0.5 c.entropy_weight = 0.0 c.eval_freq = None return c
def config() -> Config: c = Config() c.set_env(lambda: PyBullet('Hopper')) c.set_net_fn('actor-critic', net.actor_critic.fc_shared(policy=SeparateStdGaussinanHead)) c.set_parallel_env(pybullet_parallel()) c.set_optimizer(lambda params: Adam(params, lr=3.0e-4, eps=1.0e-4)) c.max_steps = int(2e6) c.grad_clip = 0.5 # ppo params c.value_loss_weight = 0.5 c.entropy_weight = 0.0 c.gae_lambda = 0.95 c.nworkers = 4 c.nsteps = 512 c.ppo_minibatch_size = (4 * 512) // 8 c.ppo_clip = 0.2 c.use_gae = True c.use_reward_monitor = True c.eval_freq = None return c