def main( envname: str = "Breakout", use_rnn: bool = False, ) -> rainy.Config: c = rainy.Config() c.set_env(lambda: Atari(envname, frame_stack=False)) if use_rnn: c.set_net_fn("actor-critic", rainy.net.actor_critic.conv_shared(rnn=net.GruBlock)) else: c.set_net_fn("actor-critic", rainy.net.actor_critic.conv_shared()) c.set_parallel_env(atari_parallel()) c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4)) c.max_steps = int(2e7) c.grad_clip = 0.5 # ppo params c.nworkers = 8 c.nsteps = 128 c.value_loss_weight = 1.0 c.gae_lambda = 0.95 c.ppo_minibatch_size = 32 * 8 c.ppo_clip = 0.1 c.ppo_epochs = 3 c.use_gae = True c.lr_min = None # set 0.0 if you decrease ppo_clip # eval settings c.eval_env = Atari(envname) c.episode_log_freq = 100 c.eval_freq = None c.save_freq = None return c
def main( envname: str = "Breakout", num_options: int = 4, opt_delib_cost: float = 0.025, opt_beta_adv_merginal: float = 0.01, use_gae: bool = False, ) -> Config: c = Config() c.set_env(lambda: Atari(envname, frame_stack=False)) c.set_optimizer(lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5)) c.set_net_fn("option-critic", net.option_critic.conv_shared(num_options=4)) c.nworkers = 16 c.nsteps = 5 c.set_parallel_env(atari_parallel()) c.grad_clip = 0.5 c.value_loss_weight = 1.0 c.use_gae = use_gae c.max_steps = int(2e7) c.eval_env = Atari(envname) c.eval_deterministic = False c.episode_log_freq = 100 c.opt_delib_cost = opt_delib_cost c.opt_beta_adv_merginal = opt_beta_adv_merginal c.eval_freq = c.max_steps // 10 c.save_freq = None return c
def config(envname: str = "MontezumaRevenge") -> rainy.Config: c = rainy.Config() c.set_env(lambda: Atari(envname, cfg=atari_config(), frame_stack=False)) c.set_parallel_env(atari_parallel()) c.set_net_fn("actor-critic", rainy.net.actor_critic.ac_conv()) c.set_optimizer(lambda params: Adam(params, lr=1.0e-4, eps=1.0e-8)) c.max_steps = int(1e8) * 6 c.grad_clip = 1.0 # ppo params c.discount_factor = 0.999 c.entropy_weight = 0.001 c.ppo_epochs = 4 c.ppo_clip = 0.1 c.use_gae = True c.nworkers = 128 c.nsteps = 128 c.value_loss_weight = 0.5 c.gae_lambda = 0.95 c.ppo_minibatch_size = (c.nworkers * c.nsteps) // 4 c.use_reward_monitor = True # eval settings c.eval_env = Atari(envname, cfg=atari_config()) c.episode_log_freq = 100 c.eval_freq = None c.save_freq = None return c
def main( envname: str = "Breakout", tau: float = 32 * 20 // 2, update_freq: int = 10, ) -> rainy.Config: c = rainy.Config() c.set_env(lambda: Atari(envname, frame_stack=False)) c.set_optimizer(kfac.default_sgd(eta_max=0.2)) c.set_preconditioner(lambda net: kfac.KfacPreConditioner( net, tau=tau, update_freq=update_freq, norm_scaler=kfac.SquaredFisherScaler(eta_max=0.2, delta=0.001), )) c.set_net_fn("actor-critic", rainy.net.actor_critic.conv_shared()) c.nworkers = 32 c.nsteps = 20 c.set_parallel_env(atari_parallel()) c.value_loss_weight = 1.0 c.use_gae = True c.lr_min = 0.0 c.max_steps = int(2e7) c.eval_env = Atari(envname) c.eval_freq = None c.episode_log_freq = 100 c.eval_deterministic = False return c
def config() -> Config: c = Config() c.set_env(lambda: Atari('Breakout', frame_stack=False)) # c.set_net_fn('actor-critic', net.actor_critic.ac_conv(rnn=net.GruBlock)) c.set_net_fn('actor-critic', net.actor_critic.ac_conv()) c.set_parallel_env(atari_parallel()) c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4)) c.max_steps = int(2e7) c.grad_clip = 0.5 # ppo params c.nworkers = 8 c.nsteps = 128 c.value_loss_weight = 0.5 c.gae_lambda = 0.95 c.ppo_minibatch_size = 32 * 8 c.ppo_clip = 0.1 c.ppo_epochs = 3 c.use_gae = True c.use_reward_monitor = True c.lr_min = None # set 0.0 if you decrease ppo_clip # eval settings c.eval_env = Atari('Breakout') c.episode_log_freq = 100 c.eval_freq = None c.save_freq = None return c
def config() -> rnd.RNDConfig: c = rnd.RNDConfig() c.nworkers = 4 c.nsteps = 4 c.set_env( lambda: Atari("Venture", cfg=rnd.atari_config(), frame_stack=False)) c.set_parallel_env(atari_parallel()) return c
def config() -> rainy.Config: c = ppo_atari.config() c.set_env(lambda: Atari('Breakout', flicker_frame=True, frame_stack=False)) c.set_parallel_env(atari_parallel(frame_stack=False)) c.set_net_fn('actor-critic', rainy.net.actor_critic.ac_conv(rnn=rainy.net.GruBlock)) c.eval_env = Atari('Breakout', frame_stack=False) return c
def main(envname: str = "Breakout") -> rainy.Config: c = ppo_atari.main(envname) c.set_env(lambda: Atari(envname, flicker_frame=True, frame_stack=False)) c.set_parallel_env(atari_parallel(frame_stack=False)) c.set_net_fn( "actor-critic", rainy.net.actor_critic.conv_shared(rnn=rainy.net.GruBlock) ) c.eval_env = Atari(envname, flicker_frame=True, frame_stack=True) return c
def config(envname: str = "MontezumaRevenge") -> rnd.RNDConfig: c = rnd.RNDConfig() c.set_env(lambda: Atari(envname, cfg=rnd.atari_config(), frame_stack=False)) c.set_parallel_env(atari_parallel()) c.max_steps = int(1e8) * 6 c.grad_clip = 1.0 # ppo params c.nworkers = 64 c.nsteps = 128 c.value_loss_weight = 0.5 c.gae_lambda = 0.95 c.ppo_minibatch_size = (c.nworkers * c.nsteps) // 4 c.auxloss_use_ratio = min(1.0, 32.0 / c.nworkers) c.use_reward_monitor = True # eval settings c.eval_env = Atari(envname, cfg=rnd.atari_config()) c.episode_log_freq = 1000 c.eval_freq = None c.save_freq = int(1e8) return c
def main(envname: str = "Breakout") -> rainy.Config: c = rainy.Config() c.set_env(lambda: Atari(envname, frame_stack=False)) c.set_optimizer( lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5)) # c.set_net_fn('actor-critic', rainy.net.actor_critic.conv_shared(rnn=net.GruBlock)) c.set_net_fn("actor-critic", rainy.net.actor_critic.conv_shared()) c.nworkers = 16 c.nsteps = 5 c.set_parallel_env(atari_parallel()) c.grad_clip = 0.5 c.value_loss_weight = 1.0 c.use_gae = False c.max_steps = int(2e7) c.eval_env = Atari(envname) c.eval_deterministic = False c.episode_log_freq = 100 c.eval_freq = None c.save_freq = None return c
def config() -> Config: c = Config() c.set_env(lambda: Atari('Breakout', frame_stack=False)) c.set_optimizer(kfac.default_sgd(eta_max=0.2)) c.set_preconditioner( lambda net: kfac.KfacPreConditioner(net, **KFAC_KWARGS)) c.set_net_fn('actor-critic', net.actor_critic.ac_conv()) c.nworkers = 32 c.nsteps = 20 c.set_parallel_env(atari_parallel()) c.value_loss_weight = 0.5 c.use_gae = True c.lr_min = 0.0 c.max_steps = int(2e7) c.eval_env = Atari('Breakout') c.eval_freq = None c.episode_log_freq = 100 c.use_reward_monitor = True c.eval_deterministic = False return c
def config() -> Config: c = Config() env_use = "Pong" c.set_env(lambda: Atari(env_use, frame_stack=False)) c.set_optimizer( lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5)) c.set_net_fn('actor-critic', net.actor_critic.ac_conv(rnn=net.GruBlock)) #c.set_net_fn('actor-critic', net.actor_critic.ac_conv()) c.nworkers = 16 c.nsteps = 5 c.set_parallel_env(atari_parallel()) c.grad_clip = 0.5 c.value_loss_weight = 0.5 c.use_gae = False c.max_steps = int(2e7) c.eval_env = Atari(env_use) c.use_reward_monitor = True c.eval_deterministic = False c.episode_log_freq = 100 c.eval_freq = None c.save_freq = None print("GRU on Pong!") return c
def main( envname: str = "Breakout", num_options: int = 4, opt_delib_cost: float = 0.0, opt_beta_adv_merginal: float = 0.01, opt_avg_baseline: bool = False, proximal_update_for_mu: bool = False, ) -> rainy.Config: c = rainy.Config() c.set_env(lambda: Atari(envname, frame_stack=False)) c.set_parallel_env(atari_parallel()) c.set_optimizer(lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5)) c.max_steps = int(2e7) c.grad_clip = 0.5 # Option settings c.opt_delib_cost = opt_delib_cost c.opt_beta_adv_merginal = opt_beta_adv_merginal c.set_net_fn( "option-critic", rainy.net.option_critic.conv_shared(num_options=num_options, has_mu=True), ) # PPO params c.nworkers = 8 c.nsteps = 128 c.value_loss_weight = 1.0 c.gae_lambda = 0.95 c.ppo_minibatch_size = 32 * 8 c.ppo_clip = 0.1 c.ppo_epochs = 3 c.use_gae = True # Eval settings c.eval_env = Atari(envname) c.eval_deterministic = False c.episode_log_freq = 100 c.eval_freq = c.max_steps // 10 c.save_freq = None return c