def main( envname: str = "Breakout", num_options: int = 4, opt_delib_cost: float = 0.025, opt_beta_adv_merginal: float = 0.01, use_gae: bool = False, ) -> Config: c = Config() c.set_env(lambda: Atari(envname, frame_stack=False)) c.set_optimizer(lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5)) c.set_net_fn("option-critic", net.option_critic.conv_shared(num_options=4)) c.nworkers = 16 c.nsteps = 5 c.set_parallel_env(atari_parallel()) c.grad_clip = 0.5 c.value_loss_weight = 1.0 c.use_gae = use_gae c.max_steps = int(2e7) c.eval_env = Atari(envname) c.eval_deterministic = False c.episode_log_freq = 100 c.opt_delib_cost = opt_delib_cost c.opt_beta_adv_merginal = opt_beta_adv_merginal c.eval_freq = c.max_steps // 10 c.save_freq = None return c
def config() -> Config: c = Config() c.nworkers = 8 c.set_parallel_env(lambda _env_gen, _num_w: ParallelRogueEnvExt(StairRewardParallel( [CONFIG] * c.nworkers, max_steps=500, stair_reward=50.0, image_setting=EXPAND, ))) c.eval_env = RogueEnvExt(StairRewardEnv( config_dict=CONFIG, max_steps=500, stair_reward=50.0, image_setting=EXPAND )) c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4)) c.set_net_fn('actor-critic', a2c_conv) c.max_steps = int(2e7) c.grad_clip = 0.5 c.episode_log_freq = 100 c.eval_freq = None c.eval_deterministic = False # ppo parameters c.nsteps = 100 c.value_loss_weight = 0.5 c.gae_tau = 0.95 c.use_gae = True c.ppo_minibatch_size = 200 c.ppo_clip = 0.1 c.lr_decay = True return c
def config() -> Config: c = Config() c.set_env(lambda: Atari('Breakout', frame_stack=False)) # c.set_net_fn('actor-critic', net.actor_critic.ac_conv(rnn=net.GruBlock)) c.set_net_fn('actor-critic', net.actor_critic.ac_conv()) c.set_parallel_env(atari_parallel()) c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4)) c.max_steps = int(2e7) c.grad_clip = 0.5 # ppo params c.nworkers = 8 c.nsteps = 128 c.value_loss_weight = 0.5 c.gae_lambda = 0.95 c.ppo_minibatch_size = 32 * 8 c.ppo_clip = 0.1 c.ppo_epochs = 3 c.use_gae = True c.use_reward_monitor = True c.lr_min = None # set 0.0 if you decrease ppo_clip # eval settings c.eval_env = Atari('Breakout') c.episode_log_freq = 100 c.eval_freq = None c.save_freq = None return c
def config() -> Config: c = Config() set_env(c, EXPAND) c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4)) c.set_net_fn('actor-critic', a2c_conv()) c.grad_clip = 0.5 c.episode_log_freq = 100 c.eval_deterministic = False return c
def config() -> Config: c = Config() c.set_env(lambda: Atari('Breakout', frame_stack=False)) c.set_optimizer(kfac.default_sgd(eta_max=0.2)) c.set_preconditioner( lambda net: kfac.KfacPreConditioner(net, **KFAC_KWARGS)) c.set_net_fn('actor-critic', net.actor_critic.ac_conv()) c.nworkers = 32 c.nsteps = 20 c.set_parallel_env(atari_parallel()) c.value_loss_weight = 0.5 c.use_gae = True c.lr_min = 0.0 c.max_steps = int(2e7) c.eval_env = Atari('Breakout') c.eval_freq = None c.episode_log_freq = 100 c.use_reward_monitor = True c.eval_deterministic = False return c
def main( envname: str = "CartPole-v0", max_steps: int = 1000000, rpf: bool = False, replay_prob: float = 0.5, prior_scale: float = 1.0, ) -> Config: c = Config() c.set_optimizer(lambda params: optim.Adam(params)) c.set_explorer(lambda: explore.Greedy()) c.set_explorer(lambda: explore.Greedy(), key="eval") c.set_env(lambda: ClassicControl(envname)) c.max_steps = max_steps c.episode_log_freq = 100 c.replay_prob = replay_prob if rpf: c.set_net_fn("bootdqn", bootstrap.rpf_fc_separated(10, prior_scale=prior_scale)) c.set_replay_buffer(lambda capacity: UniformReplayBuffer( BootDQNReplayFeed, capacity=capacity)) return c
def config() -> Config: c = Config() env_use = "Pong" c.set_env(lambda: Atari(env_use, frame_stack=False)) c.set_optimizer( lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5)) c.set_net_fn('actor-critic', net.actor_critic.ac_conv(rnn=net.GruBlock)) #c.set_net_fn('actor-critic', net.actor_critic.ac_conv()) c.nworkers = 16 c.nsteps = 5 c.set_parallel_env(atari_parallel()) c.grad_clip = 0.5 c.value_loss_weight = 0.5 c.use_gae = False c.max_steps = int(2e7) c.eval_env = Atari(env_use) c.use_reward_monitor = True c.eval_deterministic = False c.episode_log_freq = 100 c.eval_freq = None c.save_freq = None print("GRU on Pong!") return c