def main( envname: str = "Breakout", num_options: int = 4, opt_delib_cost: float = 0.025, opt_beta_adv_merginal: float = 0.01, use_gae: bool = False, ) -> Config: c = Config() c.set_env(lambda: Atari(envname, frame_stack=False)) c.set_optimizer(lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5)) c.set_net_fn("option-critic", net.option_critic.conv_shared(num_options=4)) c.nworkers = 16 c.nsteps = 5 c.set_parallel_env(atari_parallel()) c.grad_clip = 0.5 c.value_loss_weight = 1.0 c.use_gae = use_gae c.max_steps = int(2e7) c.eval_env = Atari(envname) c.eval_deterministic = False c.episode_log_freq = 100 c.opt_delib_cost = opt_delib_cost c.opt_beta_adv_merginal = opt_beta_adv_merginal c.eval_freq = c.max_steps // 10 c.save_freq = None return c
def main( envname: str = "Hopper", tau: float = 12 * 20, update_freq: int = 10, ) -> Config: c = Config() c.max_steps = int(4e5) c.nworkers = 12 c.nsteps = 20 c.set_env(lambda: PyBullet(envname)) c.set_net_fn("actor-critic", net.actor_critic.fc_shared(policy=SeparateStdGaussianDist)) c.set_parallel_env(pybullet_parallel()) c.set_optimizer(kfac.default_sgd(eta_max=0.1)) c.set_preconditioner(lambda net: kfac.KfacPreConditioner( net, tau=tau, update_freq=update_freq, norm_scaler=kfac.SquaredFisherScaler(eta_max=0.1, delta=0.001), )) c.gae_lambda = 0.95 c.use_gae = True c.eval_deterministic = False c.value_loss_weight = 0.5 c.entropy_weight = 0.0 c.eval_freq = None return c
def config() -> Config: c = Config() c.nworkers = 8 c.set_parallel_env(lambda _env_gen, _num_w: ParallelRogueEnvExt(StairRewardParallel( [CONFIG] * c.nworkers, max_steps=500, stair_reward=50.0, image_setting=EXPAND, ))) c.eval_env = RogueEnvExt(StairRewardEnv( config_dict=CONFIG, max_steps=500, stair_reward=50.0, image_setting=EXPAND )) c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4)) c.set_net_fn('actor-critic', a2c_conv) c.max_steps = int(2e7) c.grad_clip = 0.5 c.episode_log_freq = 100 c.eval_freq = None c.eval_deterministic = False # ppo parameters c.nsteps = 100 c.value_loss_weight = 0.5 c.gae_tau = 0.95 c.use_gae = True c.ppo_minibatch_size = 200 c.ppo_clip = 0.1 c.lr_decay = True return c
def config() -> Config: c = Config() set_env(c, EXPAND) c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4)) c.set_net_fn('actor-critic', a2c_conv()) c.grad_clip = 0.5 c.episode_log_freq = 100 c.eval_deterministic = False return c
def main(envname: str = "Hopper", nworkers: int = 1) -> Config: c = Config() c.set_env(lambda: PyBullet(envname)) c.max_steps = int(1e6) c.set_optimizer(lambda params: Adam(params, lr=1e-3), key="actor") c.set_optimizer(lambda params: Adam(params, lr=1e-3), key="critic") c.replay_size = int(1e6) c.train_start = int(1e4) c.set_explorer(lambda: explore.GaussianNoise()) c.set_explorer(lambda: explore.Greedy(), key="eval") c.eval_deterministic = True c.grad_clip = None c.eval_freq = c.max_steps // 10 c.nworkers = nworkers c.replay_batch_size = 100 * nworkers return c
def config() -> Config: c = Config() c.set_env(lambda: PyBullet('Hopper')) c.set_net_fn('actor-critic', net.actor_critic.fc_shared(policy=SeparateStdGaussinanHead)) c.set_parallel_env(pybullet_parallel()) c.max_steps = int(1e6) c.nworkers = 12 c.nsteps = 5 c.set_optimizer(lambda params: Adam(params, lr=0.001)) c.grad_clip = 0.5 c.gae_lambda = 0.95 c.value_loss_weight = 0.5 c.entropy_weight = 0.0 c.use_gae = False c.eval_deterministic = False c.eval_freq = None return c
def config() -> Config: c = Config() c.max_steps = int(4e5) c.nworkers = 12 c.nsteps = 20 c.set_env(lambda: PyBullet('Hopper')) c.set_net_fn('actor-critic', net.actor_critic.fc_shared(policy=SeparateStdGaussinanHead)) c.set_parallel_env(pybullet_parallel()) c.set_optimizer(kfac.default_sgd(eta_max=0.1)) c.set_preconditioner( lambda net: kfac.KfacPreConditioner(net, **KFAC_KWARGS)) c.gae_lambda = 0.95 c.use_gae = True c.eval_deterministic = False c.value_loss_weight = 0.5 c.entropy_weight = 0.0 c.eval_freq = None return c
def config() -> Config: c = Config() c.set_env(lambda: Atari('Breakout', frame_stack=False)) c.set_optimizer(kfac.default_sgd(eta_max=0.2)) c.set_preconditioner( lambda net: kfac.KfacPreConditioner(net, **KFAC_KWARGS)) c.set_net_fn('actor-critic', net.actor_critic.ac_conv()) c.nworkers = 32 c.nsteps = 20 c.set_parallel_env(atari_parallel()) c.value_loss_weight = 0.5 c.use_gae = True c.lr_min = 0.0 c.max_steps = int(2e7) c.eval_env = Atari('Breakout') c.eval_freq = None c.episode_log_freq = 100 c.use_reward_monitor = True c.eval_deterministic = False return c
def config() -> Config: c = Config() env_use = "Pong" c.set_env(lambda: Atari(env_use, frame_stack=False)) c.set_optimizer( lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5)) c.set_net_fn('actor-critic', net.actor_critic.ac_conv(rnn=net.GruBlock)) #c.set_net_fn('actor-critic', net.actor_critic.ac_conv()) c.nworkers = 16 c.nsteps = 5 c.set_parallel_env(atari_parallel()) c.grad_clip = 0.5 c.value_loss_weight = 0.5 c.use_gae = False c.max_steps = int(2e7) c.eval_env = Atari(env_use) c.use_reward_monitor = True c.eval_deterministic = False c.episode_log_freq = 100 c.eval_freq = None c.save_freq = None print("GRU on Pong!") return c