def config() -> Config: c = Config() set_env(c, EXPAND) c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4)) c.set_net_fn('actor-critic', a2c_conv()) c.grad_clip = 0.5 c.episode_log_freq = 100 c.eval_deterministic = False return c
def config() -> Config: c = Config() c.max_steps = int(1e6) c.nworkers = 8 c.set_parallel_env(lambda env_gen, num_w: MultiProcEnv(env_gen, num_w)) c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4)) c.ppo_clip = 0.5 if not LOG_DIR.exists(): LOG_DIR.mkdir() c.logger.set_dir(LOG_DIR) return c
def main( envname: str = "CartPole-v0", max_steps: int = 1000000, rpf: bool = False, replay_prob: float = 0.5, prior_scale: float = 1.0, ) -> Config: c = Config() c.set_optimizer(lambda params: optim.Adam(params)) c.set_explorer(lambda: explore.Greedy()) c.set_explorer(lambda: explore.Greedy(), key="eval") c.set_env(lambda: ClassicControl(envname)) c.max_steps = max_steps c.episode_log_freq = 100 c.replay_prob = replay_prob if rpf: c.set_net_fn("bootdqn", bootstrap.rpf_fc_separated(10, prior_scale=prior_scale)) c.set_replay_buffer(lambda capacity: UniformReplayBuffer( BootDQNReplayFeed, capacity=capacity)) return c
def config() -> Config: c = Config() c.set_env(lambda: PyBullet('Hopper')) c.set_net_fn('actor-critic', net.actor_critic.fc_shared(policy=SeparateStdGaussinanHead)) c.set_parallel_env(pybullet_parallel()) c.set_optimizer(lambda params: Adam(params, lr=3.0e-4, eps=1.0e-4)) c.max_steps = int(2e6) c.grad_clip = 0.5 # ppo params c.value_loss_weight = 0.5 c.entropy_weight = 0.0 c.gae_lambda = 0.95 c.nworkers = 4 c.nsteps = 512 c.ppo_minibatch_size = (4 * 512) // 8 c.ppo_clip = 0.2 c.use_gae = True c.use_reward_monitor = True c.eval_freq = None return c
def config() -> Config: c = Config() c.max_steps = int(4e5) c.nworkers = 12 c.nsteps = 20 c.set_parallel_env(MultiProcEnv) c.set_optimizer(kfac.default_sgd(eta_max=0.1)) c.set_preconditioner( lambda net: kfac.KfacPreConditioner(net, **KFAC_KWARGS)) c.gae_lambda = 0.95 c.use_gae = False c.lr_min = 0.0 c.value_loss_weight = 0.1 c.entropy_weight = 0.01 c.eval_freq = None return c
def main(envname: str = "Hopper", nworkers: int = 1) -> Config: c = Config() c.set_env(lambda: PyBullet(envname)) c.max_steps = int(1e6) c.set_optimizer(lambda params: Adam(params, lr=1e-3), key="actor") c.set_optimizer(lambda params: Adam(params, lr=1e-3), key="critic") c.replay_size = int(1e6) c.train_start = int(1e4) c.set_explorer(lambda: explore.GaussianNoise()) c.set_explorer(lambda: explore.Greedy(), key="eval") c.eval_deterministic = True c.grad_clip = None c.eval_freq = c.max_steps // 10 c.nworkers = nworkers c.replay_batch_size = 100 * nworkers return c
def config() -> Config: c = Config() c.set_env(lambda: Atari('Breakout', frame_stack=False)) c.set_optimizer(kfac.default_sgd(eta_max=0.2)) c.set_preconditioner( lambda net: kfac.KfacPreConditioner(net, **KFAC_KWARGS)) c.set_net_fn('actor-critic', net.actor_critic.ac_conv()) c.nworkers = 32 c.nsteps = 20 c.set_parallel_env(atari_parallel()) c.value_loss_weight = 0.5 c.use_gae = True c.lr_min = 0.0 c.max_steps = int(2e7) c.eval_env = Atari('Breakout') c.eval_freq = None c.episode_log_freq = 100 c.use_reward_monitor = True c.eval_deterministic = False return c
def config() -> Config: c = Config() c.max_steps = 100000 return c
def config() -> Config: c = Config() c.set_env(lambda: Atari('Breakout')) c.set_optimizer(lambda params: RMSprop( params, lr=0.00025, alpha=0.95, eps=0.01, centered=True)) c.set_explorer(lambda: EpsGreedy(1.0, LinearCooler(1.0, 0.1, int(1e6)))) c.set_net_fn('value', net.value.dqn_conv()) c.replay_size = int(1e6) c.batch_size = 32 c.train_start = 50000 c.sync_freq = 10000 c.max_steps = int(2e7) c.eval_env = Atari('Breakout') c.eval_freq = None c.use_reward_monitor = True return c
def config() -> Config: c = Config() c.nworkers = 8 c.set_parallel_env(lambda _env_gen, _num_w: ParallelRogueEnvExt(StairRewardParallel( [CONFIG] * c.nworkers, max_steps=500, stair_reward=50.0, image_setting=EXPAND, ))) c.eval_env = RogueEnvExt(StairRewardEnv( config_dict=CONFIG, max_steps=500, stair_reward=50.0, image_setting=EXPAND )) c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4)) c.set_net_fn('actor-critic', a2c_conv) c.max_steps = int(2e7) c.grad_clip = 0.5 c.episode_log_freq = 100 c.eval_freq = None c.eval_deterministic = False # ppo parameters c.nsteps = 100 c.value_loss_weight = 0.5 c.gae_tau = 0.95 c.use_gae = True c.ppo_minibatch_size = 200 c.ppo_clip = 0.1 c.lr_decay = True return c
def main( envname: str = "Hopper", tau: float = 12 * 20, update_freq: int = 10, ) -> Config: c = Config() c.max_steps = int(4e5) c.nworkers = 12 c.nsteps = 20 c.set_env(lambda: PyBullet(envname)) c.set_net_fn("actor-critic", net.actor_critic.fc_shared(policy=SeparateStdGaussianDist)) c.set_parallel_env(pybullet_parallel()) c.set_optimizer(kfac.default_sgd(eta_max=0.1)) c.set_preconditioner(lambda net: kfac.KfacPreConditioner( net, tau=tau, update_freq=update_freq, norm_scaler=kfac.SquaredFisherScaler(eta_max=0.1, delta=0.001), )) c.gae_lambda = 0.95 c.use_gae = True c.eval_deterministic = False c.value_loss_weight = 0.5 c.entropy_weight = 0.0 c.eval_freq = None return c
def main(envname: str = "CartPole-v0") -> Config: c = Config() c.set_env(lambda: ClassicControl(envname)) c.max_steps = 100000 return c
def config() -> Config: c = Config() c.max_steps = int(4e5) c.nworkers = 12 c.nsteps = 20 c.set_env(lambda: PyBullet('Hopper')) c.set_net_fn('actor-critic', net.actor_critic.fc_shared(policy=SeparateStdGaussinanHead)) c.set_parallel_env(pybullet_parallel()) c.set_optimizer(kfac.default_sgd(eta_max=0.1)) c.set_preconditioner( lambda net: kfac.KfacPreConditioner(net, **KFAC_KWARGS)) c.gae_lambda = 0.95 c.use_gae = True c.eval_deterministic = False c.value_loss_weight = 0.5 c.entropy_weight = 0.0 c.eval_freq = None return c
def config() -> Config: c = Config() c.set_env(lambda: PyBullet('Hopper')) c.set_net_fn('actor-critic', net.actor_critic.fc_shared(policy=SeparateStdGaussinanHead)) c.set_parallel_env(pybullet_parallel()) c.max_steps = int(1e6) c.nworkers = 12 c.nsteps = 5 c.set_optimizer(lambda params: Adam(params, lr=0.001)) c.grad_clip = 0.5 c.gae_lambda = 0.95 c.value_loss_weight = 0.5 c.entropy_weight = 0.0 c.use_gae = False c.eval_deterministic = False c.eval_freq = None return c
def patched_config() -> Config: config = Config() setattr(config, 'vae_loss', BetaVaeLoss(beta=4.0, decoder_type='bernoulli')) setattr(config, 'vae_loss_weight', 1.0) return config
def set_env(config: Config, expand: ImageSetting) -> None: # ppo parameters config.nworkers = 32 config.nsteps = 125 config.value_loss_weight = 0.5 config.entropy_weight = 0.01 config.gae_tau = 0.95 config.use_gae = True config.ppo_minibatch_size = 400 config.ppo_clip = 0.1 config.lr_decay = False config.set_parallel_env(lambda _env_gen, _num_w: ParallelRogueEnvExt( StairRewardParallel( [CONFIG] * config.nworkers, max_steps=500, stair_reward=50.0, image_setting=expand, ))) config.eval_env = RogueEnvExt( StairRewardEnv( RogueEnv(config_dict=CONFIG, mex_steps=500, stair_reward=50.0, image_setting=expand), 100.0)) config.max_steps = int(2e7) * 2 config.eval_freq = None config.save_freq = int(2e6)
def config() -> Config: c = Config() env_use = "Pong" c.set_env(lambda: Atari(env_use, frame_stack=False)) c.set_optimizer( lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5)) c.set_net_fn('actor-critic', net.actor_critic.ac_conv(rnn=net.GruBlock)) #c.set_net_fn('actor-critic', net.actor_critic.ac_conv()) c.nworkers = 16 c.nsteps = 5 c.set_parallel_env(atari_parallel()) c.grad_clip = 0.5 c.value_loss_weight = 0.5 c.use_gae = False c.max_steps = int(2e7) c.eval_env = Atari(env_use) c.use_reward_monitor = True c.eval_deterministic = False c.episode_log_freq = 100 c.eval_freq = None c.save_freq = None print("GRU on Pong!") return c
def main( envname: str = "Breakout", num_options: int = 4, opt_delib_cost: float = 0.025, opt_beta_adv_merginal: float = 0.01, use_gae: bool = False, ) -> Config: c = Config() c.set_env(lambda: Atari(envname, frame_stack=False)) c.set_optimizer(lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5)) c.set_net_fn("option-critic", net.option_critic.conv_shared(num_options=4)) c.nworkers = 16 c.nsteps = 5 c.set_parallel_env(atari_parallel()) c.grad_clip = 0.5 c.value_loss_weight = 1.0 c.use_gae = use_gae c.max_steps = int(2e7) c.eval_env = Atari(envname) c.eval_deterministic = False c.episode_log_freq = 100 c.opt_delib_cost = opt_delib_cost c.opt_beta_adv_merginal = opt_beta_adv_merginal c.eval_freq = c.max_steps // 10 c.save_freq = None return c
def main(envname: str = "Breakout") -> Config: c = Config() c.set_env(lambda: Atari(envname)) c.set_optimizer(lambda params: RMSprop( params, lr=0.00025, alpha=0.95, eps=0.01, centered=True)) c.set_explorer(lambda: EpsGreedy(1.0, LinearCooler(1.0, 0.1, int(1e6)))) c.set_net_fn("dqn", net.value.dqn_conv()) c.replay_size = int(1e6) c.replay_batch_size = 32 c.train_start = 50000 c.sync_freq = 10000 c.max_steps = int(2e7) c.eval_env = Atari(envname, episodic_life=False) c.eval_freq = None c.seed = 1 return c
def config() -> Config: c = Config() c.set_env(lambda: Atari('Breakout', frame_stack=False)) # c.set_net_fn('actor-critic', net.actor_critic.ac_conv(rnn=net.GruBlock)) c.set_net_fn('actor-critic', net.actor_critic.ac_conv()) c.set_parallel_env(atari_parallel()) c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4)) c.max_steps = int(2e7) c.grad_clip = 0.5 # ppo params c.nworkers = 8 c.nsteps = 128 c.value_loss_weight = 0.5 c.gae_lambda = 0.95 c.ppo_minibatch_size = 32 * 8 c.ppo_clip = 0.1 c.ppo_epochs = 3 c.use_gae = True c.use_reward_monitor = True c.lr_min = None # set 0.0 if you decrease ppo_clip # eval settings c.eval_env = Atari('Breakout') c.episode_log_freq = 100 c.eval_freq = None c.save_freq = None return c