コード例 #1
0
def main(
    envname: str = "CartPole-v0",
    num_options: int = 2,
    opt_delib_cost: float = 0.0,
    opt_beta_adv_merginal: float = 0.01,
    use_gae: bool = False,
    opt_avg_baseline: bool = False,
) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: ClassicControl(envname))
    c.max_steps = int(4e5)
    c.nworkers = 12
    c.nsteps = 5
    c.set_parallel_env(MultiProcEnv)
    c.set_optimizer(lambda params: optim.RMSprop(params, lr=0.0007))
    c.grad_clip = 1.0
    c.eval_freq = c.max_steps // 10
    c.network_log_freq = (c.max_steps // c.batch_size) // 10
    c.entropy_weight = 0.001
    c.value_loss_weight = 1.0
    c.opt_delib_cost = opt_delib_cost
    c.opt_beta_adv_merginal = opt_beta_adv_merginal
    c.opt_avg_baseline = opt_avg_baseline
    c.use_gae = use_gae
    c.set_net_fn("option-critic",
                 rainy.net.option_critic.fc_shared(num_options=num_options))
    return c
コード例 #2
0
def main(
    envname: str = "Breakout",
    tau: float = 32 * 20 // 2,
    update_freq: int = 10,
) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: Atari(envname, frame_stack=False))
    c.set_optimizer(kfac.default_sgd(eta_max=0.2))
    c.set_preconditioner(lambda net: kfac.KfacPreConditioner(
        net,
        tau=tau,
        update_freq=update_freq,
        norm_scaler=kfac.SquaredFisherScaler(eta_max=0.2, delta=0.001),
    ))
    c.set_net_fn("actor-critic", rainy.net.actor_critic.conv_shared())
    c.nworkers = 32
    c.nsteps = 20
    c.set_parallel_env(atari_parallel())
    c.value_loss_weight = 1.0
    c.use_gae = True
    c.lr_min = 0.0
    c.max_steps = int(2e7)
    c.eval_env = Atari(envname)
    c.eval_freq = None
    c.episode_log_freq = 100
    c.eval_deterministic = False
    return c
コード例 #3
0
ファイル: actc_atari.py プロジェクト: kngwyu/Rainy
def main(envname: str = "Breakout", num_options: int = 4) -> rainy.Config:
    c = rainy.Config()

    c.set_env(lambda: rainy.envs.Atari(envname, frame_stack=False))
    c.set_parallel_env(rainy.envs.atari_parallel())
    c.eval_env = rainy.envs.Atari(envname)

    c.max_steps = int(2e7)
    c.nworkers = 16
    c.nsteps = 5

    c.grad_clip = 1.0
    c.eval_freq = c.max_steps // 20
    c.network_log_freq = (c.max_steps // c.batch_size) // 10
    c.entropy_weight = 0.001
    c.value_loss_weight = 1.0

    c.set_optimizer(
        lambda params: optim.RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5))
    c.set_optimizer(lambda params: optim.Adam(params, lr=1e-4),
                    key="termination")
    c.set_net_fn("actor-critic", tc.oac_conv_shared(num_options=num_options))
    c.set_net_fn("termination-critic",
                 tc.tc_conv_shared(num_options=num_options))
    c.save_freq = None
    return c
コード例 #4
0
ファイル: ppo_cartpole.py プロジェクト: kngwyu/Rainy
def main(
    envname: str = "CartPole-v0", use_rnn: bool = False, use_separated: bool = False
) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: ClassicControl(envname))
    c.max_steps = int(1e5)
    c.nworkers = 8
    c.nsteps = 32
    c.set_parallel_env(MultiProcEnv)
    c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4))
    c.value_loss_weight = 0.2
    c.entropy_weight = 0.001
    c.grad_clip = 0.1
    c.gae_lambda = 0.95
    c.ppo_minibatch_size = 64
    c.use_gae = True
    c.ppo_clip = 0.2
    c.eval_freq = 5000
    c.eval_times = 1
    c.episode_log_freq = 100
    if use_rnn:
        c.set_net_fn(
            "actor-critic", rainy.net.actor_critic.fc_shared(rnn=rainy.net.GruBlock)
        )
    elif use_separated:
        c.set_net_fn("actor-critic", rainy.net.actor_critic.fc_separated())
    return c
コード例 #5
0
def config(envname: str = "MontezumaRevenge") -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: Atari(envname, cfg=atari_config(), frame_stack=False))
    c.set_parallel_env(atari_parallel())
    c.set_net_fn("actor-critic", rainy.net.actor_critic.ac_conv())
    c.set_optimizer(lambda params: Adam(params, lr=1.0e-4, eps=1.0e-8))
    c.max_steps = int(1e8) * 6
    c.grad_clip = 1.0
    # ppo params
    c.discount_factor = 0.999
    c.entropy_weight = 0.001
    c.ppo_epochs = 4
    c.ppo_clip = 0.1
    c.use_gae = True
    c.nworkers = 128
    c.nsteps = 128
    c.value_loss_weight = 0.5
    c.gae_lambda = 0.95
    c.ppo_minibatch_size = (c.nworkers * c.nsteps) // 4
    c.use_reward_monitor = True
    # eval settings
    c.eval_env = Atari(envname, cfg=atari_config())
    c.episode_log_freq = 100
    c.eval_freq = None
    c.save_freq = None
    return c
コード例 #6
0
ファイル: ppo_atari.py プロジェクト: kngwyu/Rainy
def main(
    envname: str = "Breakout",
    use_rnn: bool = False,
) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: Atari(envname, frame_stack=False))
    if use_rnn:
        c.set_net_fn("actor-critic",
                     rainy.net.actor_critic.conv_shared(rnn=net.GruBlock))
    else:
        c.set_net_fn("actor-critic", rainy.net.actor_critic.conv_shared())
    c.set_parallel_env(atari_parallel())
    c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4))
    c.max_steps = int(2e7)
    c.grad_clip = 0.5
    # ppo params
    c.nworkers = 8
    c.nsteps = 128
    c.value_loss_weight = 1.0
    c.gae_lambda = 0.95
    c.ppo_minibatch_size = 32 * 8
    c.ppo_clip = 0.1
    c.ppo_epochs = 3
    c.use_gae = True
    c.lr_min = None  # set 0.0 if you decrease ppo_clip
    # eval settings
    c.eval_env = Atari(envname)
    c.episode_log_freq = 100
    c.eval_freq = None
    c.save_freq = None
    return c
コード例 #7
0
def main(
    envname: str = "Hopper",
    nworkers: int = 1,
    mujoco: bool = False,
) -> rainy.Config:
    c = rainy.Config()
    if mujoco:
        c.set_env(lambda: rainy.envs.Mujoco(envname))
    else:
        c.set_env(lambda: rainy.envs.PyBullet(envname))
    c.max_steps = int(1e6)
    c.set_optimizer(lambda params: Adam(params, lr=1e-3), key="actor")
    c.set_optimizer(lambda params: Adam(params, lr=1e-3), key="critic")
    c.replay_size = int(1e6)
    c.train_start = int(1e4)
    c.set_explorer(lambda: explore.GaussianNoise())
    c.set_explorer(lambda: explore.Greedy(), key="eval")
    c.set_explorer(
        lambda: explore.GaussianNoise(explore.DummyCooler(0.2), 0.5),
        key="target")
    c.eval_deterministic = True
    c.eval_freq = c.max_steps // 10
    c.grad_clip = None
    c.nworkers = nworkers
    c.replay_batch_size = 100 * nworkers
    return c
コード例 #8
0
ファイル: ppoc_cartpole.py プロジェクト: kngwyu/Rainy
def main(
    envname: str = "CartPole-v0",
    num_options: int = 2,
    opt_delib_cost: float = 0.0,
    opt_beta_adv_merginal: float = 0.01,
    opt_avg_baseline: bool = False,
    proximal_update_for_mu: bool = False,
) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: ClassicControl(envname))
    c.max_steps = int(4e5)
    # Option settings
    c.opt_delib_cost = opt_delib_cost
    c.opt_beta_adv_merginal = opt_beta_adv_merginal
    c.set_net_fn(
        "option-critic",
        rainy.net.option_critic.fc_shared(num_options=num_options,
                                          has_mu=True),
    )
    # PPO params
    c.nworkers = 12
    c.nsteps = 64
    c.set_parallel_env(MultiProcEnv)
    c.set_optimizer(lambda params: optim.Adam(params, lr=2.5e-4, eps=1.0e-4))
    c.grad_clip = 1.0
    c.eval_freq = 10000
    c.entropy_weight = 0.01
    c.value_loss_weight = 1.0
    c.use_gae = True
    return c
コード例 #9
0
def config() -> rainy.Config:
    c = rainy.Config()
    c.set_parallel_env(lambda _env_gen, _num_w: ParallelRogueEnvExt(
        ParallelRogueEnv(
            [rogue_config(2)] * c.nworkers,
            max_steps=500,
            image_setting=EXPAND,
        )))
    c.max_steps = int(2e7) * 2
    c.save_freq = None
    c.eval_freq = None
    c.eval_env = RogueEnvExt(
        RogueEnv(
            config_dict=rogue_config(2),
            mex_steps=500,
            stair_reward=50.0,
            image_setting=EXPAND,
        ))
    c.set_optimizer(lambda params: Adam(params, lr=1.0e-4, eps=1.0e-8))
    CNN_PARAM = [(8, 1), (4, 1), (3, 1)]
    c.set_net_fn(
        "actor-critic",
        rainy.net.actor_critic.ac_conv(
            kernel_and_strides=CNN_PARAM,
            output_dim=256,
        ),
    )
    c.nworkers = 32
    c.nsteps = 125
    c.value_loss_weight = 0.5
    c.gae_lambda = 0.95
    c.ppo_minibatch_size = (c.nworkers * c.nsteps) // 4
    return c
コード例 #10
0
def main(
    envname: str = "CartPole-v0",
    tau: float = 12 * 20,
    update_freq: int = 10,
) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: ClassicControl(envname))
    c.max_steps = int(4e5)
    c.nworkers = 12
    c.nsteps = 20
    c.set_parallel_env(MultiProcEnv)
    c.set_optimizer(kfac.default_sgd(eta_max=0.1))
    c.set_preconditioner(lambda net: kfac.KfacPreConditioner(
        net,
        tau=tau,
        update_freq=update_freq,
        norm_scaler=kfac.SquaredFisherScaler(eta_max=0.1, delta=0.001),
    ))
    c.gae_lambda = 0.95
    c.use_gae = False
    c.lr_min = 0.0
    c.value_loss_weight = 0.2
    c.entropy_weight = 0.01
    c.eval_freq = None
    return c
コード例 #11
0
ファイル: dqn_cartpole.py プロジェクト: kngwyu/Rainy
def main(envname: str = "CartPole-v0",
         max_steps: int = 100000,
         nworkers: int = 1) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: ClassicControl(envname))
    c.set_parallel_env(MultiProcEnv)
    c.max_steps = max_steps
    c.episode_log_freq = 100
    c.nworkers = nworkers
    c.replay_batch_size = 64 * nworkers
    return c
コード例 #12
0
ファイル: test_hooks.py プロジェクト: kngwyu/Rainy
def test_video_hook_atari() -> None:
    c = rainy.Config()
    c.eval_hooks.append(lib.hooks.VideoWriterHook(video_name="BreakoutVideo"))
    c.set_net_fn("dqn", net.value.dqn_conv())
    c.set_env(lambda: envs.Atari("Breakout"))
    c.eval_env = envs.Atari("Breakout")
    ag = agents.DQNAgent(c)
    c.initialize_hooks()
    _ = ag.eval_episode()
    ag.close()
    videopath = c.logger.logdir.joinpath("BreakoutVideo-0.avi")
    assert videopath.exists()
コード例 #13
0
ファイル: test_hooks.py プロジェクト: kngwyu/Rainy
def test_qvalue_hook(make_ag: callable, is_bootdqn: bool) -> None:
    c = rainy.Config()
    hook = QValueHook()
    c.eval_hooks.append(hook)
    if is_bootdqn:
        c.set_replay_buffer(lambda capacity: replay.UniformReplayBuffer(
            replay.BootDQNReplayFeed))
    ag = make_ag(c)
    c.initialize_hooks()
    _ = ag.eval_episode()
    ag.close()
    assert len(hook.q_values) == 0
    assert tuple(hook.q_value_mean.shape) == (c.action_dim, )
コード例 #14
0
def test_image_hook_atari() -> None:
    c = rainy.Config()
    hook = ImageWriterHook(out_dir="/tmp/rainy-acvp/imagehook-test")
    c.eval_hooks.append(ImageWriterHook(out_dir="/tmp/rainy-acvp/imagehook-test"))
    c.set_net_fn("dqn", net.value.dqn_conv())
    c.set_env(lambda: envs.Atari("Breakout"))
    c.eval_env = envs.Atari("Breakout")
    ag = agents.DQNAgent(c)
    c.initialize_hooks()
    _ = ag.eval_episode()
    ag.close()
    images = np.load(hook.out_dir.joinpath("ep1.npz"))
    assert images["states"][0].shape == (210, 160, 3)
    assert len(images["actions"].shape) == 1
コード例 #15
0
ファイル: test_agent.py プロジェクト: kngwyu/Rainy
def test_nstep_train(make_ag: callable) -> None:
    c = rainy.Config()
    c.logger.setup_logdir()
    c.nworkers = 6
    c.nsteps = 4
    c.ppo_minibatch_size = 12
    c.set_parallel_env(rainy.envs.DummyParallelEnv)
    c.set_net_fn("actor-critic", net.actor_critic.fc_shared(units=[32, 32]))
    c.set_net_fn("option-critic", net.option_critic.fc_shared(units=[32, 32]))
    c.set_env(partial(DummyEnvDeterministic, flatten=True))
    ag = make_ag(c)
    res = next(ag.train_episodes(1))
    assert len(res) == c.nworkers
    ag.close()
コード例 #16
0
ファイル: test_agent.py プロジェクト: kngwyu/Rainy
def test_eval_parallel(n: int, make_ag: callable) -> None:
    c = rainy.Config()
    c.nworkers = 6
    c.nsteps = 5
    c.set_parallel_env(rainy.envs.DummyParallelEnv)
    c.set_net_fn("actor-critic", net.actor_critic.fc_shared(units=[32, 32]))
    c.set_net_fn("option-critic", net.option_critic.fc_shared(units=[32, 32]))
    c.set_env(partial(DummyEnvDeterministic, flatten=True))
    ag = make_ag(c)
    res = ag.eval_parallel(n=n)
    assert len(res) == n
    for r in res:
        assert r.return_ == 20.0
        assert r.length == 3
    ag.close()
コード例 #17
0
def config() -> rainy.Config:
    c = rainy.Config()
    c.max_steps = int(1e6)
    c.nworkers = 12
    c.nsteps = 5
    c.set_parallel_env(MultiProcEnv)
    c.set_optimizer(lambda params: Adam(params, lr=0.001))
    c.grad_clip = 0.1
    c.value_loss_weight = 0.1
    c.use_gae = False
    c.eval_deterministic = True
    c.eval_freq = None
    c.entropy_weight = 0.001
    # c.set_net_fn('actor-critic', rainy.net.actor_critic.fc_shared(rnn=rainy.net.GruBlock))
    return c
コード例 #18
0
ファイル: test_hooks.py プロジェクト: kngwyu/Rainy
def test_video_hook_pybullet() -> None:
    c = rainy.Config()
    c.eval_hooks.append(lib.hooks.VideoWriterHook(video_name="HopperVideo"))
    c.set_env(lambda: envs.PyBullet("Hopper"))
    c.set_explorer(lambda: lib.explore.GaussianNoise())
    c.set_explorer(lambda: lib.explore.Greedy(), key="eval")
    c.set_optimizer(lambda params: torch.optim.Adam(params, lr=1e-3),
                    key="actor")
    c.set_optimizer(lambda params: torch.optim.Adam(params, lr=1e-3),
                    key="critic")
    ag = agents.DDPGAgent(c)
    c.initialize_hooks()
    _ = ag.eval_episode()
    ag.close()
    videopath = c.logger.logdir.joinpath("HopperVideo-0.avi")
    assert videopath.exists()
コード例 #19
0
def config() -> rainy.Config:
    c = rainy.Config()
    c.max_steps = int(1e6)
    c.nworkers = 8
    c.nsteps = 32
    c.set_parallel_env(MultiProcEnv)
    c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4))
    c.value_loss_weight = 0.1
    c.grad_clip = 0.1
    c.gae_lambda = 0.95
    c.ppo_minibatch_size = 64
    c.use_gae = True
    c.ppo_clip = 0.2
    c.eval_freq = None
    # c.set_net_fn('actor-critic', rainy.net.actor_critic.fc_shared(rnn=rainy.net.GruBlock))
    return c
コード例 #20
0
def test_eval_parallel(n: int) -> None:
    c = rainy.Config()
    c.nworkers = 6
    c.nsteps = 5
    c.set_parallel_env(rainy.envs.MultiProcEnv)
    c.set_net_fn('actor-critic',
                 rainy.net.actor_critic.fc_shared(units=[32, 32]))
    c.set_env(partial(DummyEnvDeterministic, flatten=True))
    agent = rainy.agents.A2cAgent(c)
    entropy = np.zeros(c.nworkers)
    res = agent.eval_parallel(n, entropy=entropy)
    assert len(res) == n
    for r in res:
        assert r.reward == 20.0
        assert r.length == 3
    agent.close()
コード例 #21
0
ファイル: actc_4rooms.py プロジェクト: kngwyu/Rainy
def main(
    envname: str = "4Rooms",
    obs_type: str = "image",
    num_options: int = 4,
    visualize_beta: bool = False,
) -> rainy.Config:
    c = rainy.Config()
    if visualize_beta:
        c.eval_hooks.append(OptionVisualizeHook(num_options))
    c.set_env(lambda: RLPyGridWorld(envname, obs_type))
    c.max_steps = int(4e5)
    c.nworkers = 12
    c.nsteps = 5
    c.set_parallel_env(MultiProcEnv)
    c.set_optimizer(lambda params: optim.RMSprop(params, lr=0.0007))
    c.set_optimizer(lambda params: optim.Adam(params, lr=1e-4),
                    key="termination")
    c.grad_clip = 1.0
    c.eval_freq = c.max_steps // 20
    c.network_log_freq = (c.max_steps // c.batch_size) // 10
    c.entropy_weight = 0.001
    c.value_loss_weight = 1.0
    c.tc_exact_pmu = "GridWorld" in envname
    if obs_type == "image":
        CONV_ARGS = dict(
            hidden_channels=(8, 8),
            feature_dim=128,
            cnn_params=[(4, 1), (2, 1)],
        )
        c.set_net_fn(
            "actor-critic",
            tc.oac_conv_shared(num_options=num_options, **CONV_ARGS),
        )
        c.set_net_fn(
            "termination-critic",
            tc.tc_conv_shared(num_options=num_options, **CONV_ARGS),
        )
    else:
        c.set_net_fn(
            "actor-critic",
            tc.oac_fc_shared(num_options=num_options),
        )
        c.set_net_fn(
            "termination-critic",
            tc.tc_fc_shared(num_options=num_options),
        )
    return c
コード例 #22
0
ファイル: sac_cartpole.py プロジェクト: kngwyu/Rainy
def main(envname: str = "CartPoleSwingUpContinuous-v0",
         nworkers: int = 1) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: ClassicControl(envname))
    c.max_steps = int(1e5)
    c.set_optimizer(lambda params: Adam(params, lr=3e-4), key="actor")
    c.set_optimizer(lambda params: Adam(params, lr=3e-4), key="critic")
    c.set_optimizer(lambda params: Adam(params, lr=3e-4), key="entropy")
    c.replay_size = int(1e5)
    c.train_start = int(1e4)
    c.eval_deterministic = True
    c.eval_freq = c.max_steps // 10
    c.sync_freq = 1
    c.grad_clip = None
    c.nworkers = nworkers
    c.replay_batch_size = 256 * nworkers
    return c
コード例 #23
0
ファイル: test_hooks.py プロジェクト: kngwyu/Rainy
def test_state_hook_pybullet() -> None:
    c = rainy.Config()
    hook = lib.hooks.StateWriterHook(
        out_dir="/tmp/rainy-acvp/imagehook-pybullet")
    c.eval_hooks.append(hook)
    c.set_env(lambda: envs.PyBullet("Hopper"))
    c.set_explorer(lambda: lib.explore.GaussianNoise())
    c.set_explorer(lambda: lib.explore.Greedy(), key="eval")
    c.set_optimizer(lambda params: torch.optim.Adam(params, lr=1e-3),
                    key="actor")
    c.set_optimizer(lambda params: torch.optim.Adam(params, lr=1e-3),
                    key="critic")
    ag = agents.DDPGAgent(c)
    c.initialize_hooks()
    _ = ag.eval_episode()
    ag.close()
    episodes = np.load(hook.out_dir.joinpath("ep1.npz"))
    assert episodes["states"][0].shape == (15, )  # state space
    assert episodes["actions"][0].shape == (3, )  # action space
コード例 #24
0
def main(envname: str = "Hopper") -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: PyBullet(envname))
    c.set_net_fn(
        "actor-critic", rainy.net.actor_critic.fc_shared(policy=SeparateStdGaussianDist)
    )
    c.set_parallel_env(pybullet_parallel())
    c.max_steps = int(1e6)
    c.nworkers = 12
    c.nsteps = 5
    c.set_optimizer(lambda params: Adam(params, lr=0.001))
    c.grad_clip = 0.5
    c.gae_lambda = 0.95
    c.value_loss_weight = 0.5
    c.entropy_weight = 0.0
    c.use_gae = False
    c.eval_deterministic = False
    c.eval_freq = None
    return c
コード例 #25
0
ファイル: a2c_atari.py プロジェクト: kngwyu/Rainy
def main(envname: str = "Breakout") -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: Atari(envname, frame_stack=False))
    c.set_optimizer(
        lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5))
    # c.set_net_fn('actor-critic', rainy.net.actor_critic.conv_shared(rnn=net.GruBlock))
    c.set_net_fn("actor-critic", rainy.net.actor_critic.conv_shared())
    c.nworkers = 16
    c.nsteps = 5
    c.set_parallel_env(atari_parallel())
    c.grad_clip = 0.5
    c.value_loss_weight = 1.0
    c.use_gae = False
    c.max_steps = int(2e7)
    c.eval_env = Atari(envname)
    c.eval_deterministic = False
    c.episode_log_freq = 100
    c.eval_freq = None
    c.save_freq = None
    return c
コード例 #26
0
ファイル: aoc_4rooms.py プロジェクト: kngwyu/Rainy
def main(
    envname: str = "4Rooms",
    num_options: int = 4,
    opt_delib_cost: float = 0.0,
    opt_beta_adv_merginal: float = 0.01,
    obs_type: str = "image",
    use_gae: bool = False,
    opt_avg_baseline: bool = False,
    visualize_beta: bool = False,
) -> rainy.Config:
    c = rainy.Config()
    if visualize_beta:
        c.eval_hooks.append(OptionVisualizeHook(num_options))
    c.set_env(lambda: RLPyGridWorld(envname, obs_type))
    c.max_steps = int(4e5)
    c.nworkers = 12
    c.nsteps = 5
    c.set_parallel_env(MultiProcEnv)
    c.set_optimizer(lambda params: optim.RMSprop(params, lr=0.0007))
    c.grad_clip = 1.0
    c.eval_freq = c.max_steps // 20
    c.network_log_freq = (c.max_steps // c.batch_size) // 10
    c.entropy_weight = 0.001
    c.value_loss_weight = 1.0
    c.opt_delib_cost = opt_delib_cost
    c.opt_beta_adv_merginal = opt_beta_adv_merginal
    c.opt_avg_baseline = opt_avg_baseline
    c.use_gae = use_gae
    if obs_type == "image" or obs_type == "binary-image":
        c.set_net_fn(
            "option-critic",
            oc.conv_shared(
                num_options=num_options,
                hidden_channels=(8, 8),
                feature_dim=128,
                cnn_params=[(4, 1), (2, 1)],
            ),
        )
    else:
        c.set_net_fn("option-critic", oc.fc_shared(num_options=num_options))
    return c
コード例 #27
0
ファイル: ppo_halfcheetah.py プロジェクト: kngwyu/Rainy
def main(envname: str = "HalfCheetah") -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: PyBullet(envname))
    c.set_net_fn(
        "actor-critic", rainy.net.actor_critic.fc_shared(policy=SeparateStdGaussianDist)
    )
    c.set_parallel_env(pybullet_parallel())
    c.set_optimizer(lambda params: Adam(params, lr=3.0e-4, eps=1.0e-4))
    c.max_steps = int(2e6)
    c.grad_clip = 0.5
    # ppo params
    c.value_loss_weight = 0.5
    c.entropy_weight = 0.0
    c.gae_lambda = 0.95
    c.nworkers = 16
    c.nsteps = 128
    c.ppo_minibatch_size = (16 * 128) // 16
    c.ppo_clip = 0.2
    c.use_gae = True
    c.eval_freq = None
    return c
コード例 #28
0
def main(
    envname: str = "Hopper",
    nworkers: int = 1,
    mujoco: bool = False,
) -> rainy.Config:
    c = rainy.Config()
    if mujoco:
        c.set_env(lambda: rainy.envs.Mujoco(envname))
    else:
        c.set_env(lambda: rainy.envs.PyBullet(envname))
    c.max_steps = int(1e6)
    c.set_optimizer(lambda params: Adam(params, lr=3e-4), key="actor")
    c.set_optimizer(lambda params: Adam(params, lr=3e-4), key="critic")
    c.set_optimizer(lambda params: Adam(params, lr=3e-4), key="entropy")
    c.replay_size = int(1e6)
    c.train_start = int(1e4)
    c.eval_deterministic = True
    c.eval_freq = c.max_steps // 100
    c.sync_freq = 1
    c.grad_clip = None
    c.nworkers = nworkers
    c.replay_batch_size = 256 * nworkers
    return c
コード例 #29
0
ファイル: ppoc_atari.py プロジェクト: kngwyu/Rainy
def main(
    envname: str = "Breakout",
    num_options: int = 4,
    opt_delib_cost: float = 0.0,
    opt_beta_adv_merginal: float = 0.01,
    opt_avg_baseline: bool = False,
    proximal_update_for_mu: bool = False,
) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: Atari(envname, frame_stack=False))
    c.set_parallel_env(atari_parallel())
    c.set_optimizer(lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5))
    c.max_steps = int(2e7)
    c.grad_clip = 0.5
    # Option settings
    c.opt_delib_cost = opt_delib_cost
    c.opt_beta_adv_merginal = opt_beta_adv_merginal
    c.set_net_fn(
        "option-critic",
        rainy.net.option_critic.conv_shared(num_options=num_options, has_mu=True),
    )
    # PPO params
    c.nworkers = 8
    c.nsteps = 128
    c.value_loss_weight = 1.0
    c.gae_lambda = 0.95
    c.ppo_minibatch_size = 32 * 8
    c.ppo_clip = 0.1
    c.ppo_epochs = 3
    c.use_gae = True
    # Eval settings
    c.eval_env = Atari(envname)
    c.eval_deterministic = False
    c.episode_log_freq = 100
    c.eval_freq = c.max_steps // 10
    c.save_freq = None
    return c
コード例 #30
0
ファイル: ppoc_hopper.py プロジェクト: kngwyu/Rainy
def main(
    envname: str = "Hopper",
    num_options: int = 2,
    opt_delib_cost: float = 0.0,
    opt_beta_adv_merginal: float = 0.01,
    opt_avg_baseline: bool = False,
    proximal_update_for_mu: bool = False,
) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: PyBullet(envname))
    c.set_parallel_env(
        pybullet_parallel(normalize_obs=True, normalize_reward=True))
    c.set_optimizer(lambda params: Adam(params, lr=3.0e-4, eps=1.0e-4))
    c.max_steps = int(1e6)
    c.grad_clip = 0.5
    # Option settings
    c.opt_delib_cost = opt_delib_cost
    c.opt_beta_adv_merginal = opt_beta_adv_merginal
    c.set_net_fn(
        "option-critic",
        rainy.net.option_critic.fc_shared(num_options=num_options,
                                          policy=PerOptionStdGaussianDist,
                                          has_mu=True),
    )
    # PPO params
    c.nworkers = 4
    c.nsteps = 512
    c.ppo_minibatch_size = (4 * 512) // 8
    c.ppo_clip = 0.2
    c.use_gae = True
    c.eval_freq = c.max_steps // 10
    c.entropy_weight = 0.01
    c.value_loss_weight = 1.0
    c.eval_deterministic = True
    c.eval_times = 4
    c.proximal_update_for_mu = proximal_update_for_mu
    return c