예제 #1
0
def main(
    envname: str = "Hopper",
    tau: float = 12 * 20,
    update_freq: int = 10,
) -> Config:
    c = Config()
    c.max_steps = int(4e5)
    c.nworkers = 12
    c.nsteps = 20
    c.set_env(lambda: PyBullet(envname))
    c.set_net_fn("actor-critic",
                 net.actor_critic.fc_shared(policy=SeparateStdGaussianDist))
    c.set_parallel_env(pybullet_parallel())
    c.set_optimizer(kfac.default_sgd(eta_max=0.1))
    c.set_preconditioner(lambda net: kfac.KfacPreConditioner(
        net,
        tau=tau,
        update_freq=update_freq,
        norm_scaler=kfac.SquaredFisherScaler(eta_max=0.1, delta=0.001),
    ))
    c.gae_lambda = 0.95
    c.use_gae = True
    c.eval_deterministic = False
    c.value_loss_weight = 0.5
    c.entropy_weight = 0.0
    c.eval_freq = None
    return c
예제 #2
0
def test_rms_save() -> None:
    c = config()
    c.set_env(lambda: PyBullet())
    c.set_parallel_env(pybullet_parallel())
    ppo = PPOAgent(c)
    ppo.penv.as_cls("NormalizeObsParallel")._rms.mean = 10.0
    ppo.save("ppo-agent.pth")
    ppo.close()
    ppo = PPOAgent(c)
    path = ppo.config.logger.logdir.joinpath("ppo-agent.pth")
    ppo.load(path)
    mean = ppo.penv.as_cls("NormalizeObsParallel")._rms.mean.mean()
    assert 9.999 <= mean <= 10.001
    ppo.close()
예제 #3
0
def config() -> Config:
    c = Config()
    c.set_env(lambda: PyBullet('Hopper'))
    c.set_net_fn('actor-critic',
                 net.actor_critic.fc_shared(policy=SeparateStdGaussinanHead))
    c.set_parallel_env(pybullet_parallel())
    c.max_steps = int(1e6)
    c.nworkers = 12
    c.nsteps = 5
    c.set_optimizer(lambda params: Adam(params, lr=0.001))
    c.grad_clip = 0.5
    c.gae_lambda = 0.95
    c.value_loss_weight = 0.5
    c.entropy_weight = 0.0
    c.use_gae = False
    c.eval_deterministic = False
    c.eval_freq = None
    return c
예제 #4
0
def main(envname: str = "Hopper") -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: PyBullet(envname))
    c.set_net_fn(
        "actor-critic", rainy.net.actor_critic.fc_shared(policy=SeparateStdGaussianDist)
    )
    c.set_parallel_env(pybullet_parallel())
    c.max_steps = int(1e6)
    c.nworkers = 12
    c.nsteps = 5
    c.set_optimizer(lambda params: Adam(params, lr=0.001))
    c.grad_clip = 0.5
    c.gae_lambda = 0.95
    c.value_loss_weight = 0.5
    c.entropy_weight = 0.0
    c.use_gae = False
    c.eval_deterministic = False
    c.eval_freq = None
    return c
예제 #5
0
def config() -> Config:
    c = Config()
    c.max_steps = int(4e5)
    c.nworkers = 12
    c.nsteps = 20
    c.set_env(lambda: PyBullet('Hopper'))
    c.set_net_fn('actor-critic',
                 net.actor_critic.fc_shared(policy=SeparateStdGaussinanHead))
    c.set_parallel_env(pybullet_parallel())
    c.set_optimizer(kfac.default_sgd(eta_max=0.1))
    c.set_preconditioner(
        lambda net: kfac.KfacPreConditioner(net, **KFAC_KWARGS))
    c.gae_lambda = 0.95
    c.use_gae = True
    c.eval_deterministic = False
    c.value_loss_weight = 0.5
    c.entropy_weight = 0.0
    c.eval_freq = None
    return c
예제 #6
0
def config() -> Config:
    c = Config()
    c.set_env(lambda: PyBullet('Hopper'))
    c.set_net_fn('actor-critic',
                 net.actor_critic.fc_shared(policy=SeparateStdGaussinanHead))
    c.set_parallel_env(pybullet_parallel())
    c.set_optimizer(lambda params: Adam(params, lr=3.0e-4, eps=1.0e-4))
    c.max_steps = int(2e6)
    c.grad_clip = 0.5
    # ppo params
    c.value_loss_weight = 0.5
    c.entropy_weight = 0.0
    c.gae_lambda = 0.95
    c.nworkers = 4
    c.nsteps = 512
    c.ppo_minibatch_size = (4 * 512) // 8
    c.ppo_clip = 0.2
    c.use_gae = True
    c.use_reward_monitor = True
    c.eval_freq = None
    return c
예제 #7
0
def main(envname: str = "HalfCheetah") -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: PyBullet(envname))
    c.set_net_fn(
        "actor-critic", rainy.net.actor_critic.fc_shared(policy=SeparateStdGaussianDist)
    )
    c.set_parallel_env(pybullet_parallel())
    c.set_optimizer(lambda params: Adam(params, lr=3.0e-4, eps=1.0e-4))
    c.max_steps = int(2e6)
    c.grad_clip = 0.5
    # ppo params
    c.value_loss_weight = 0.5
    c.entropy_weight = 0.0
    c.gae_lambda = 0.95
    c.nworkers = 16
    c.nsteps = 128
    c.ppo_minibatch_size = (16 * 128) // 16
    c.ppo_clip = 0.2
    c.use_gae = True
    c.eval_freq = None
    return c
예제 #8
0
파일: ppoc_hopper.py 프로젝트: kngwyu/Rainy
def main(
    envname: str = "Hopper",
    num_options: int = 2,
    opt_delib_cost: float = 0.0,
    opt_beta_adv_merginal: float = 0.01,
    opt_avg_baseline: bool = False,
    proximal_update_for_mu: bool = False,
) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: PyBullet(envname))
    c.set_parallel_env(
        pybullet_parallel(normalize_obs=True, normalize_reward=True))
    c.set_optimizer(lambda params: Adam(params, lr=3.0e-4, eps=1.0e-4))
    c.max_steps = int(1e6)
    c.grad_clip = 0.5
    # Option settings
    c.opt_delib_cost = opt_delib_cost
    c.opt_beta_adv_merginal = opt_beta_adv_merginal
    c.set_net_fn(
        "option-critic",
        rainy.net.option_critic.fc_shared(num_options=num_options,
                                          policy=PerOptionStdGaussianDist,
                                          has_mu=True),
    )
    # PPO params
    c.nworkers = 4
    c.nsteps = 512
    c.ppo_minibatch_size = (4 * 512) // 8
    c.ppo_clip = 0.2
    c.use_gae = True
    c.eval_freq = c.max_steps // 10
    c.entropy_weight = 0.01
    c.value_loss_weight = 1.0
    c.eval_deterministic = True
    c.eval_times = 4
    c.proximal_update_for_mu = proximal_update_for_mu
    return c