def main( envname: str = "Hopper", tau: float = 12 * 20, update_freq: int = 10, ) -> Config: c = Config() c.max_steps = int(4e5) c.nworkers = 12 c.nsteps = 20 c.set_env(lambda: PyBullet(envname)) c.set_net_fn("actor-critic", net.actor_critic.fc_shared(policy=SeparateStdGaussianDist)) c.set_parallel_env(pybullet_parallel()) c.set_optimizer(kfac.default_sgd(eta_max=0.1)) c.set_preconditioner(lambda net: kfac.KfacPreConditioner( net, tau=tau, update_freq=update_freq, norm_scaler=kfac.SquaredFisherScaler(eta_max=0.1, delta=0.001), )) c.gae_lambda = 0.95 c.use_gae = True c.eval_deterministic = False c.value_loss_weight = 0.5 c.entropy_weight = 0.0 c.eval_freq = None return c
def test_rms_save() -> None: c = config() c.set_env(lambda: PyBullet()) c.set_parallel_env(pybullet_parallel()) ppo = PPOAgent(c) ppo.penv.as_cls("NormalizeObsParallel")._rms.mean = 10.0 ppo.save("ppo-agent.pth") ppo.close() ppo = PPOAgent(c) path = ppo.config.logger.logdir.joinpath("ppo-agent.pth") ppo.load(path) mean = ppo.penv.as_cls("NormalizeObsParallel")._rms.mean.mean() assert 9.999 <= mean <= 10.001 ppo.close()
def config() -> Config: c = Config() c.set_env(lambda: PyBullet('Hopper')) c.set_net_fn('actor-critic', net.actor_critic.fc_shared(policy=SeparateStdGaussinanHead)) c.set_parallel_env(pybullet_parallel()) c.max_steps = int(1e6) c.nworkers = 12 c.nsteps = 5 c.set_optimizer(lambda params: Adam(params, lr=0.001)) c.grad_clip = 0.5 c.gae_lambda = 0.95 c.value_loss_weight = 0.5 c.entropy_weight = 0.0 c.use_gae = False c.eval_deterministic = False c.eval_freq = None return c
def main(envname: str = "Hopper") -> rainy.Config: c = rainy.Config() c.set_env(lambda: PyBullet(envname)) c.set_net_fn( "actor-critic", rainy.net.actor_critic.fc_shared(policy=SeparateStdGaussianDist) ) c.set_parallel_env(pybullet_parallel()) c.max_steps = int(1e6) c.nworkers = 12 c.nsteps = 5 c.set_optimizer(lambda params: Adam(params, lr=0.001)) c.grad_clip = 0.5 c.gae_lambda = 0.95 c.value_loss_weight = 0.5 c.entropy_weight = 0.0 c.use_gae = False c.eval_deterministic = False c.eval_freq = None return c
def config() -> Config: c = Config() c.max_steps = int(4e5) c.nworkers = 12 c.nsteps = 20 c.set_env(lambda: PyBullet('Hopper')) c.set_net_fn('actor-critic', net.actor_critic.fc_shared(policy=SeparateStdGaussinanHead)) c.set_parallel_env(pybullet_parallel()) c.set_optimizer(kfac.default_sgd(eta_max=0.1)) c.set_preconditioner( lambda net: kfac.KfacPreConditioner(net, **KFAC_KWARGS)) c.gae_lambda = 0.95 c.use_gae = True c.eval_deterministic = False c.value_loss_weight = 0.5 c.entropy_weight = 0.0 c.eval_freq = None return c
def config() -> Config: c = Config() c.set_env(lambda: PyBullet('Hopper')) c.set_net_fn('actor-critic', net.actor_critic.fc_shared(policy=SeparateStdGaussinanHead)) c.set_parallel_env(pybullet_parallel()) c.set_optimizer(lambda params: Adam(params, lr=3.0e-4, eps=1.0e-4)) c.max_steps = int(2e6) c.grad_clip = 0.5 # ppo params c.value_loss_weight = 0.5 c.entropy_weight = 0.0 c.gae_lambda = 0.95 c.nworkers = 4 c.nsteps = 512 c.ppo_minibatch_size = (4 * 512) // 8 c.ppo_clip = 0.2 c.use_gae = True c.use_reward_monitor = True c.eval_freq = None return c
def main(envname: str = "HalfCheetah") -> rainy.Config: c = rainy.Config() c.set_env(lambda: PyBullet(envname)) c.set_net_fn( "actor-critic", rainy.net.actor_critic.fc_shared(policy=SeparateStdGaussianDist) ) c.set_parallel_env(pybullet_parallel()) c.set_optimizer(lambda params: Adam(params, lr=3.0e-4, eps=1.0e-4)) c.max_steps = int(2e6) c.grad_clip = 0.5 # ppo params c.value_loss_weight = 0.5 c.entropy_weight = 0.0 c.gae_lambda = 0.95 c.nworkers = 16 c.nsteps = 128 c.ppo_minibatch_size = (16 * 128) // 16 c.ppo_clip = 0.2 c.use_gae = True c.eval_freq = None return c
def main( envname: str = "Hopper", num_options: int = 2, opt_delib_cost: float = 0.0, opt_beta_adv_merginal: float = 0.01, opt_avg_baseline: bool = False, proximal_update_for_mu: bool = False, ) -> rainy.Config: c = rainy.Config() c.set_env(lambda: PyBullet(envname)) c.set_parallel_env( pybullet_parallel(normalize_obs=True, normalize_reward=True)) c.set_optimizer(lambda params: Adam(params, lr=3.0e-4, eps=1.0e-4)) c.max_steps = int(1e6) c.grad_clip = 0.5 # Option settings c.opt_delib_cost = opt_delib_cost c.opt_beta_adv_merginal = opt_beta_adv_merginal c.set_net_fn( "option-critic", rainy.net.option_critic.fc_shared(num_options=num_options, policy=PerOptionStdGaussianDist, has_mu=True), ) # PPO params c.nworkers = 4 c.nsteps = 512 c.ppo_minibatch_size = (4 * 512) // 8 c.ppo_clip = 0.2 c.use_gae = True c.eval_freq = c.max_steps // 10 c.entropy_weight = 0.01 c.value_loss_weight = 1.0 c.eval_deterministic = True c.eval_times = 4 c.proximal_update_for_mu = proximal_update_for_mu return c