def main( envname: str = "Breakout", num_options: int = 4, opt_delib_cost: float = 0.025, opt_beta_adv_merginal: float = 0.01, use_gae: bool = False, ) -> Config: c = Config() c.set_env(lambda: Atari(envname, frame_stack=False)) c.set_optimizer(lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5)) c.set_net_fn("option-critic", net.option_critic.conv_shared(num_options=4)) c.nworkers = 16 c.nsteps = 5 c.set_parallel_env(atari_parallel()) c.grad_clip = 0.5 c.value_loss_weight = 1.0 c.use_gae = use_gae c.max_steps = int(2e7) c.eval_env = Atari(envname) c.eval_deterministic = False c.episode_log_freq = 100 c.opt_delib_cost = opt_delib_cost c.opt_beta_adv_merginal = opt_beta_adv_merginal c.eval_freq = c.max_steps // 10 c.save_freq = None return c
def set_env(config: Config, expand: ImageSetting) -> None: # ppo parameters config.nworkers = 32 config.nsteps = 125 config.value_loss_weight = 0.5 config.entropy_weight = 0.01 config.gae_tau = 0.95 config.use_gae = True config.ppo_minibatch_size = 400 config.ppo_clip = 0.1 config.lr_decay = False config.set_parallel_env(lambda _env_gen, _num_w: ParallelRogueEnvExt( StairRewardParallel( [CONFIG] * config.nworkers, max_steps=500, stair_reward=50.0, image_setting=expand, ))) config.eval_env = RogueEnvExt( StairRewardEnv( RogueEnv(config_dict=CONFIG, mex_steps=500, stair_reward=50.0, image_setting=expand), 100.0)) config.max_steps = int(2e7) * 2 config.eval_freq = None config.save_freq = int(2e6)
def config() -> Config: c = Config() c.nworkers = 8 c.set_parallel_env(lambda _env_gen, _num_w: ParallelRogueEnvExt(StairRewardParallel( [CONFIG] * c.nworkers, max_steps=500, stair_reward=50.0, image_setting=EXPAND, ))) c.eval_env = RogueEnvExt(StairRewardEnv( config_dict=CONFIG, max_steps=500, stair_reward=50.0, image_setting=EXPAND )) c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4)) c.set_net_fn('actor-critic', a2c_conv) c.max_steps = int(2e7) c.grad_clip = 0.5 c.episode_log_freq = 100 c.eval_freq = None c.eval_deterministic = False # ppo parameters c.nsteps = 100 c.value_loss_weight = 0.5 c.gae_tau = 0.95 c.use_gae = True c.ppo_minibatch_size = 200 c.ppo_clip = 0.1 c.lr_decay = True return c
def config() -> Config: c = Config() c.set_env(lambda: Atari('Breakout', frame_stack=False)) # c.set_net_fn('actor-critic', net.actor_critic.ac_conv(rnn=net.GruBlock)) c.set_net_fn('actor-critic', net.actor_critic.ac_conv()) c.set_parallel_env(atari_parallel()) c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4)) c.max_steps = int(2e7) c.grad_clip = 0.5 # ppo params c.nworkers = 8 c.nsteps = 128 c.value_loss_weight = 0.5 c.gae_lambda = 0.95 c.ppo_minibatch_size = 32 * 8 c.ppo_clip = 0.1 c.ppo_epochs = 3 c.use_gae = True c.use_reward_monitor = True c.lr_min = None # set 0.0 if you decrease ppo_clip # eval settings c.eval_env = Atari('Breakout') c.episode_log_freq = 100 c.eval_freq = None c.save_freq = None return c
def main(envname: str = "Breakout") -> Config: c = Config() c.set_env(lambda: Atari(envname)) c.set_optimizer(lambda params: RMSprop( params, lr=0.00025, alpha=0.95, eps=0.01, centered=True)) c.set_explorer(lambda: EpsGreedy(1.0, LinearCooler(1.0, 0.1, int(1e6)))) c.set_net_fn("dqn", net.value.dqn_conv()) c.replay_size = int(1e6) c.replay_batch_size = 32 c.train_start = 50000 c.sync_freq = 10000 c.max_steps = int(2e7) c.eval_env = Atari(envname, episodic_life=False) c.eval_freq = None c.seed = 1 return c
def config() -> Config: c = Config() c.set_env(lambda: Atari('Breakout')) c.set_optimizer(lambda params: RMSprop( params, lr=0.00025, alpha=0.95, eps=0.01, centered=True)) c.set_explorer(lambda: EpsGreedy(1.0, LinearCooler(1.0, 0.1, int(1e6)))) c.set_net_fn('value', net.value.dqn_conv()) c.replay_size = int(1e6) c.batch_size = 32 c.train_start = 50000 c.sync_freq = 10000 c.max_steps = int(2e7) c.eval_env = Atari('Breakout') c.eval_freq = None c.use_reward_monitor = True return c
def config() -> Config: c = Config() c.set_env(lambda: Atari('Breakout', frame_stack=False)) c.set_optimizer(kfac.default_sgd(eta_max=0.2)) c.set_preconditioner( lambda net: kfac.KfacPreConditioner(net, **KFAC_KWARGS)) c.set_net_fn('actor-critic', net.actor_critic.ac_conv()) c.nworkers = 32 c.nsteps = 20 c.set_parallel_env(atari_parallel()) c.value_loss_weight = 0.5 c.use_gae = True c.lr_min = 0.0 c.max_steps = int(2e7) c.eval_env = Atari('Breakout') c.eval_freq = None c.episode_log_freq = 100 c.use_reward_monitor = True c.eval_deterministic = False return c
def config() -> Config: c = Config() env_use = "Pong" c.set_env(lambda: Atari(env_use, frame_stack=False)) c.set_optimizer( lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5)) c.set_net_fn('actor-critic', net.actor_critic.ac_conv(rnn=net.GruBlock)) #c.set_net_fn('actor-critic', net.actor_critic.ac_conv()) c.nworkers = 16 c.nsteps = 5 c.set_parallel_env(atari_parallel()) c.grad_clip = 0.5 c.value_loss_weight = 0.5 c.use_gae = False c.max_steps = int(2e7) c.eval_env = Atari(env_use) c.use_reward_monitor = True c.eval_deterministic = False c.episode_log_freq = 100 c.eval_freq = None c.save_freq = None print("GRU on Pong!") return c