Example #1
0
def set_env(config: Config, expand: ImageSetting) -> None:
    # ppo parameters
    config.nworkers = 32
    config.nsteps = 125
    config.value_loss_weight = 0.5
    config.entropy_weight = 0.01
    config.gae_tau = 0.95
    config.use_gae = True
    config.ppo_minibatch_size = 400
    config.ppo_clip = 0.1
    config.lr_decay = False
    config.set_parallel_env(lambda _env_gen, _num_w: ParallelRogueEnvExt(
        StairRewardParallel(
            [CONFIG] * config.nworkers,
            max_steps=500,
            stair_reward=50.0,
            image_setting=expand,
        )))
    config.eval_env = RogueEnvExt(
        StairRewardEnv(
            RogueEnv(config_dict=CONFIG,
                     mex_steps=500,
                     stair_reward=50.0,
                     image_setting=expand), 100.0))
    config.max_steps = int(2e7) * 2
    config.eval_freq = None
    config.save_freq = int(2e6)
Example #2
0
def main(
    envname: str = "Hopper",
    tau: float = 12 * 20,
    update_freq: int = 10,
) -> Config:
    c = Config()
    c.max_steps = int(4e5)
    c.nworkers = 12
    c.nsteps = 20
    c.set_env(lambda: PyBullet(envname))
    c.set_net_fn("actor-critic",
                 net.actor_critic.fc_shared(policy=SeparateStdGaussianDist))
    c.set_parallel_env(pybullet_parallel())
    c.set_optimizer(kfac.default_sgd(eta_max=0.1))
    c.set_preconditioner(lambda net: kfac.KfacPreConditioner(
        net,
        tau=tau,
        update_freq=update_freq,
        norm_scaler=kfac.SquaredFisherScaler(eta_max=0.1, delta=0.001),
    ))
    c.gae_lambda = 0.95
    c.use_gae = True
    c.eval_deterministic = False
    c.value_loss_weight = 0.5
    c.entropy_weight = 0.0
    c.eval_freq = None
    return c
Example #3
0
def config() -> Config:
    c = Config()
    c.max_steps = int(4e5)
    c.nworkers = 12
    c.nsteps = 20
    c.set_parallel_env(MultiProcEnv)
    c.set_optimizer(kfac.default_sgd(eta_max=0.1))
    c.set_preconditioner(
        lambda net: kfac.KfacPreConditioner(net, **KFAC_KWARGS))
    c.gae_lambda = 0.95
    c.use_gae = False
    c.lr_min = 0.0
    c.value_loss_weight = 0.1
    c.entropy_weight = 0.01
    c.eval_freq = None
    return c
Example #4
0
def config() -> Config:
    c = Config()
    c.set_env(lambda: PyBullet('Hopper'))
    c.set_net_fn('actor-critic',
                 net.actor_critic.fc_shared(policy=SeparateStdGaussinanHead))
    c.set_parallel_env(pybullet_parallel())
    c.max_steps = int(1e6)
    c.nworkers = 12
    c.nsteps = 5
    c.set_optimizer(lambda params: Adam(params, lr=0.001))
    c.grad_clip = 0.5
    c.gae_lambda = 0.95
    c.value_loss_weight = 0.5
    c.entropy_weight = 0.0
    c.use_gae = False
    c.eval_deterministic = False
    c.eval_freq = None
    return c
Example #5
0
def config() -> Config:
    c = Config()
    c.max_steps = int(4e5)
    c.nworkers = 12
    c.nsteps = 20
    c.set_env(lambda: PyBullet('Hopper'))
    c.set_net_fn('actor-critic',
                 net.actor_critic.fc_shared(policy=SeparateStdGaussinanHead))
    c.set_parallel_env(pybullet_parallel())
    c.set_optimizer(kfac.default_sgd(eta_max=0.1))
    c.set_preconditioner(
        lambda net: kfac.KfacPreConditioner(net, **KFAC_KWARGS))
    c.gae_lambda = 0.95
    c.use_gae = True
    c.eval_deterministic = False
    c.value_loss_weight = 0.5
    c.entropy_weight = 0.0
    c.eval_freq = None
    return c
def config() -> Config:
    c = Config()
    c.set_env(lambda: PyBullet('Hopper'))
    c.set_net_fn('actor-critic',
                 net.actor_critic.fc_shared(policy=SeparateStdGaussinanHead))
    c.set_parallel_env(pybullet_parallel())
    c.set_optimizer(lambda params: Adam(params, lr=3.0e-4, eps=1.0e-4))
    c.max_steps = int(2e6)
    c.grad_clip = 0.5
    # ppo params
    c.value_loss_weight = 0.5
    c.entropy_weight = 0.0
    c.gae_lambda = 0.95
    c.nworkers = 4
    c.nsteps = 512
    c.ppo_minibatch_size = (4 * 512) // 8
    c.ppo_clip = 0.2
    c.use_gae = True
    c.use_reward_monitor = True
    c.eval_freq = None
    return c