Ejemplo n.º 1
0
def main(
    envname: str = "Breakout",
    use_rnn: bool = False,
) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: Atari(envname, frame_stack=False))
    if use_rnn:
        c.set_net_fn("actor-critic",
                     rainy.net.actor_critic.conv_shared(rnn=net.GruBlock))
    else:
        c.set_net_fn("actor-critic", rainy.net.actor_critic.conv_shared())
    c.set_parallel_env(atari_parallel())
    c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4))
    c.max_steps = int(2e7)
    c.grad_clip = 0.5
    # ppo params
    c.nworkers = 8
    c.nsteps = 128
    c.value_loss_weight = 1.0
    c.gae_lambda = 0.95
    c.ppo_minibatch_size = 32 * 8
    c.ppo_clip = 0.1
    c.ppo_epochs = 3
    c.use_gae = True
    c.lr_min = None  # set 0.0 if you decrease ppo_clip
    # eval settings
    c.eval_env = Atari(envname)
    c.episode_log_freq = 100
    c.eval_freq = None
    c.save_freq = None
    return c
Ejemplo n.º 2
0
def main(
    envname: str = "Breakout",
    num_options: int = 4,
    opt_delib_cost: float = 0.025,
    opt_beta_adv_merginal: float = 0.01,
    use_gae: bool = False,
) -> Config:
    c = Config()
    c.set_env(lambda: Atari(envname, frame_stack=False))
    c.set_optimizer(lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5))
    c.set_net_fn("option-critic", net.option_critic.conv_shared(num_options=4))
    c.nworkers = 16
    c.nsteps = 5
    c.set_parallel_env(atari_parallel())
    c.grad_clip = 0.5
    c.value_loss_weight = 1.0
    c.use_gae = use_gae
    c.max_steps = int(2e7)
    c.eval_env = Atari(envname)
    c.eval_deterministic = False
    c.episode_log_freq = 100
    c.opt_delib_cost = opt_delib_cost
    c.opt_beta_adv_merginal = opt_beta_adv_merginal
    c.eval_freq = c.max_steps // 10
    c.save_freq = None
    return c
Ejemplo n.º 3
0
def config(envname: str = "MontezumaRevenge") -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: Atari(envname, cfg=atari_config(), frame_stack=False))
    c.set_parallel_env(atari_parallel())
    c.set_net_fn("actor-critic", rainy.net.actor_critic.ac_conv())
    c.set_optimizer(lambda params: Adam(params, lr=1.0e-4, eps=1.0e-8))
    c.max_steps = int(1e8) * 6
    c.grad_clip = 1.0
    # ppo params
    c.discount_factor = 0.999
    c.entropy_weight = 0.001
    c.ppo_epochs = 4
    c.ppo_clip = 0.1
    c.use_gae = True
    c.nworkers = 128
    c.nsteps = 128
    c.value_loss_weight = 0.5
    c.gae_lambda = 0.95
    c.ppo_minibatch_size = (c.nworkers * c.nsteps) // 4
    c.use_reward_monitor = True
    # eval settings
    c.eval_env = Atari(envname, cfg=atari_config())
    c.episode_log_freq = 100
    c.eval_freq = None
    c.save_freq = None
    return c
Ejemplo n.º 4
0
def main(
    envname: str = "Breakout",
    tau: float = 32 * 20 // 2,
    update_freq: int = 10,
) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: Atari(envname, frame_stack=False))
    c.set_optimizer(kfac.default_sgd(eta_max=0.2))
    c.set_preconditioner(lambda net: kfac.KfacPreConditioner(
        net,
        tau=tau,
        update_freq=update_freq,
        norm_scaler=kfac.SquaredFisherScaler(eta_max=0.2, delta=0.001),
    ))
    c.set_net_fn("actor-critic", rainy.net.actor_critic.conv_shared())
    c.nworkers = 32
    c.nsteps = 20
    c.set_parallel_env(atari_parallel())
    c.value_loss_weight = 1.0
    c.use_gae = True
    c.lr_min = 0.0
    c.max_steps = int(2e7)
    c.eval_env = Atari(envname)
    c.eval_freq = None
    c.episode_log_freq = 100
    c.eval_deterministic = False
    return c
Ejemplo n.º 5
0
def config() -> Config:
    c = Config()
    c.set_env(lambda: Atari('Breakout', frame_stack=False))
    #  c.set_net_fn('actor-critic', net.actor_critic.ac_conv(rnn=net.GruBlock))
    c.set_net_fn('actor-critic', net.actor_critic.ac_conv())
    c.set_parallel_env(atari_parallel())
    c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4))
    c.max_steps = int(2e7)
    c.grad_clip = 0.5
    # ppo params
    c.nworkers = 8
    c.nsteps = 128
    c.value_loss_weight = 0.5
    c.gae_lambda = 0.95
    c.ppo_minibatch_size = 32 * 8
    c.ppo_clip = 0.1
    c.ppo_epochs = 3
    c.use_gae = True
    c.use_reward_monitor = True
    c.lr_min = None  # set 0.0 if you decrease ppo_clip
    # eval settings
    c.eval_env = Atari('Breakout')
    c.episode_log_freq = 100
    c.eval_freq = None
    c.save_freq = None
    return c
Ejemplo n.º 6
0
def config() -> rnd.RNDConfig:
    c = rnd.RNDConfig()
    c.nworkers = 4
    c.nsteps = 4
    c.set_env(
        lambda: Atari("Venture", cfg=rnd.atari_config(), frame_stack=False))
    c.set_parallel_env(atari_parallel())
    return c
Ejemplo n.º 7
0
def config() -> rainy.Config:
    c = ppo_atari.config()
    c.set_env(lambda: Atari('Breakout', flicker_frame=True, frame_stack=False))
    c.set_parallel_env(atari_parallel(frame_stack=False))
    c.set_net_fn('actor-critic',
                 rainy.net.actor_critic.ac_conv(rnn=rainy.net.GruBlock))
    c.eval_env = Atari('Breakout', frame_stack=False)
    return c
Ejemplo n.º 8
0
def main(envname: str = "Breakout") -> rainy.Config:
    c = ppo_atari.main(envname)
    c.set_env(lambda: Atari(envname, flicker_frame=True, frame_stack=False))
    c.set_parallel_env(atari_parallel(frame_stack=False))
    c.set_net_fn(
        "actor-critic", rainy.net.actor_critic.conv_shared(rnn=rainy.net.GruBlock)
    )
    c.eval_env = Atari(envname, flicker_frame=True, frame_stack=True)
    return c
Ejemplo n.º 9
0
def config(envname: str = "MontezumaRevenge") -> rnd.RNDConfig:
    c = rnd.RNDConfig()
    c.set_env(lambda: Atari(envname, cfg=rnd.atari_config(), frame_stack=False))
    c.set_parallel_env(atari_parallel())
    c.max_steps = int(1e8) * 6
    c.grad_clip = 1.0
    # ppo params
    c.nworkers = 64
    c.nsteps = 128
    c.value_loss_weight = 0.5
    c.gae_lambda = 0.95
    c.ppo_minibatch_size = (c.nworkers * c.nsteps) // 4
    c.auxloss_use_ratio = min(1.0, 32.0 / c.nworkers)
    c.use_reward_monitor = True
    # eval settings
    c.eval_env = Atari(envname, cfg=rnd.atari_config())
    c.episode_log_freq = 1000
    c.eval_freq = None
    c.save_freq = int(1e8)
    return c
Ejemplo n.º 10
0
def main(envname: str = "Breakout") -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: Atari(envname, frame_stack=False))
    c.set_optimizer(
        lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5))
    # c.set_net_fn('actor-critic', rainy.net.actor_critic.conv_shared(rnn=net.GruBlock))
    c.set_net_fn("actor-critic", rainy.net.actor_critic.conv_shared())
    c.nworkers = 16
    c.nsteps = 5
    c.set_parallel_env(atari_parallel())
    c.grad_clip = 0.5
    c.value_loss_weight = 1.0
    c.use_gae = False
    c.max_steps = int(2e7)
    c.eval_env = Atari(envname)
    c.eval_deterministic = False
    c.episode_log_freq = 100
    c.eval_freq = None
    c.save_freq = None
    return c
Ejemplo n.º 11
0
def config() -> Config:
    c = Config()
    c.set_env(lambda: Atari('Breakout', frame_stack=False))
    c.set_optimizer(kfac.default_sgd(eta_max=0.2))
    c.set_preconditioner(
        lambda net: kfac.KfacPreConditioner(net, **KFAC_KWARGS))
    c.set_net_fn('actor-critic', net.actor_critic.ac_conv())
    c.nworkers = 32
    c.nsteps = 20
    c.set_parallel_env(atari_parallel())
    c.value_loss_weight = 0.5
    c.use_gae = True
    c.lr_min = 0.0
    c.max_steps = int(2e7)
    c.eval_env = Atari('Breakout')
    c.eval_freq = None
    c.episode_log_freq = 100
    c.use_reward_monitor = True
    c.eval_deterministic = False
    return c
Ejemplo n.º 12
0
def config() -> Config:
    c = Config()
    env_use = "Pong"
    c.set_env(lambda: Atari(env_use, frame_stack=False))
    c.set_optimizer(
        lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5))
    c.set_net_fn('actor-critic', net.actor_critic.ac_conv(rnn=net.GruBlock))
    #c.set_net_fn('actor-critic', net.actor_critic.ac_conv())
    c.nworkers = 16
    c.nsteps = 5
    c.set_parallel_env(atari_parallel())
    c.grad_clip = 0.5
    c.value_loss_weight = 0.5
    c.use_gae = False
    c.max_steps = int(2e7)
    c.eval_env = Atari(env_use)
    c.use_reward_monitor = True
    c.eval_deterministic = False
    c.episode_log_freq = 100
    c.eval_freq = None
    c.save_freq = None
    print("GRU on Pong!")
    return c
Ejemplo n.º 13
0
def main(
    envname: str = "Breakout",
    num_options: int = 4,
    opt_delib_cost: float = 0.0,
    opt_beta_adv_merginal: float = 0.01,
    opt_avg_baseline: bool = False,
    proximal_update_for_mu: bool = False,
) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: Atari(envname, frame_stack=False))
    c.set_parallel_env(atari_parallel())
    c.set_optimizer(lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5))
    c.max_steps = int(2e7)
    c.grad_clip = 0.5
    # Option settings
    c.opt_delib_cost = opt_delib_cost
    c.opt_beta_adv_merginal = opt_beta_adv_merginal
    c.set_net_fn(
        "option-critic",
        rainy.net.option_critic.conv_shared(num_options=num_options, has_mu=True),
    )
    # PPO params
    c.nworkers = 8
    c.nsteps = 128
    c.value_loss_weight = 1.0
    c.gae_lambda = 0.95
    c.ppo_minibatch_size = 32 * 8
    c.ppo_clip = 0.1
    c.ppo_epochs = 3
    c.use_gae = True
    # Eval settings
    c.eval_env = Atari(envname)
    c.eval_deterministic = False
    c.episode_log_freq = 100
    c.eval_freq = c.max_steps // 10
    c.save_freq = None
    return c