Beispiel #1
0
def main(
    envname: str = "CartPole-v0", use_rnn: bool = False, use_separated: bool = False
) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: ClassicControl(envname))
    c.max_steps = int(1e5)
    c.nworkers = 8
    c.nsteps = 32
    c.set_parallel_env(MultiProcEnv)
    c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4))
    c.value_loss_weight = 0.2
    c.entropy_weight = 0.001
    c.grad_clip = 0.1
    c.gae_lambda = 0.95
    c.ppo_minibatch_size = 64
    c.use_gae = True
    c.ppo_clip = 0.2
    c.eval_freq = 5000
    c.eval_times = 1
    c.episode_log_freq = 100
    if use_rnn:
        c.set_net_fn(
            "actor-critic", rainy.net.actor_critic.fc_shared(rnn=rainy.net.GruBlock)
        )
    elif use_separated:
        c.set_net_fn("actor-critic", rainy.net.actor_critic.fc_separated())
    return c
Beispiel #2
0
def main(
    envname: str = "CartPole-v0",
    num_options: int = 2,
    opt_delib_cost: float = 0.0,
    opt_beta_adv_merginal: float = 0.01,
    use_gae: bool = False,
    opt_avg_baseline: bool = False,
) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: ClassicControl(envname))
    c.max_steps = int(4e5)
    c.nworkers = 12
    c.nsteps = 5
    c.set_parallel_env(MultiProcEnv)
    c.set_optimizer(lambda params: optim.RMSprop(params, lr=0.0007))
    c.grad_clip = 1.0
    c.eval_freq = c.max_steps // 10
    c.network_log_freq = (c.max_steps // c.batch_size) // 10
    c.entropy_weight = 0.001
    c.value_loss_weight = 1.0
    c.opt_delib_cost = opt_delib_cost
    c.opt_beta_adv_merginal = opt_beta_adv_merginal
    c.opt_avg_baseline = opt_avg_baseline
    c.use_gae = use_gae
    c.set_net_fn("option-critic",
                 rainy.net.option_critic.fc_shared(num_options=num_options))
    return c
Beispiel #3
0
def main(
    envname: str = "CartPole-v0",
    num_options: int = 2,
    opt_delib_cost: float = 0.0,
    opt_beta_adv_merginal: float = 0.01,
    opt_avg_baseline: bool = False,
    proximal_update_for_mu: bool = False,
) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: ClassicControl(envname))
    c.max_steps = int(4e5)
    # Option settings
    c.opt_delib_cost = opt_delib_cost
    c.opt_beta_adv_merginal = opt_beta_adv_merginal
    c.set_net_fn(
        "option-critic",
        rainy.net.option_critic.fc_shared(num_options=num_options,
                                          has_mu=True),
    )
    # PPO params
    c.nworkers = 12
    c.nsteps = 64
    c.set_parallel_env(MultiProcEnv)
    c.set_optimizer(lambda params: optim.Adam(params, lr=2.5e-4, eps=1.0e-4))
    c.grad_clip = 1.0
    c.eval_freq = 10000
    c.entropy_weight = 0.01
    c.value_loss_weight = 1.0
    c.use_gae = True
    return c
Beispiel #4
0
def main(
    envname: str = "CartPole-v0",
    tau: float = 12 * 20,
    update_freq: int = 10,
) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: ClassicControl(envname))
    c.max_steps = int(4e5)
    c.nworkers = 12
    c.nsteps = 20
    c.set_parallel_env(MultiProcEnv)
    c.set_optimizer(kfac.default_sgd(eta_max=0.1))
    c.set_preconditioner(lambda net: kfac.KfacPreConditioner(
        net,
        tau=tau,
        update_freq=update_freq,
        norm_scaler=kfac.SquaredFisherScaler(eta_max=0.1, delta=0.001),
    ))
    c.gae_lambda = 0.95
    c.use_gae = False
    c.lr_min = 0.0
    c.value_loss_weight = 0.2
    c.entropy_weight = 0.01
    c.eval_freq = None
    return c
Beispiel #5
0
def main(envname: str = "CartPole-v0",
         max_steps: int = 100000,
         nworkers: int = 1) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: ClassicControl(envname))
    c.set_parallel_env(MultiProcEnv)
    c.max_steps = max_steps
    c.episode_log_freq = 100
    c.nworkers = nworkers
    c.replay_batch_size = 64 * nworkers
    return c
Beispiel #6
0
def main(envname: str = "CartPoleSwingUpContinuous-v0",
         nworkers: int = 1) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: ClassicControl(envname))
    c.max_steps = int(1e5)
    c.set_optimizer(lambda params: Adam(params, lr=3e-4), key="actor")
    c.set_optimizer(lambda params: Adam(params, lr=3e-4), key="critic")
    c.set_optimizer(lambda params: Adam(params, lr=3e-4), key="entropy")
    c.replay_size = int(1e5)
    c.train_start = int(1e4)
    c.eval_deterministic = True
    c.eval_freq = c.max_steps // 10
    c.sync_freq = 1
    c.grad_clip = None
    c.nworkers = nworkers
    c.replay_batch_size = 256 * nworkers
    return c
Beispiel #7
0
def main(
    envname: str = "CartPole-v0",
    max_steps: int = 1000000,
    rpf: bool = False,
    replay_prob: float = 0.5,
    prior_scale: float = 1.0,
) -> Config:
    c = Config()
    c.set_optimizer(lambda params: optim.Adam(params))
    c.set_explorer(lambda: explore.Greedy())
    c.set_explorer(lambda: explore.Greedy(), key="eval")
    c.set_env(lambda: ClassicControl(envname))
    c.max_steps = max_steps
    c.episode_log_freq = 100
    c.replay_prob = replay_prob
    if rpf:
        c.set_net_fn("bootdqn",
                     bootstrap.rpf_fc_separated(10, prior_scale=prior_scale))
    c.set_replay_buffer(lambda capacity: UniformReplayBuffer(
        BootDQNReplayFeed, capacity=capacity))
    return c
Beispiel #8
0
def main(envname: str = "CartPole-v0", num_options: int = 2) -> rainy.Config:
    c = rainy.Config()
    c.set_env(lambda: ClassicControl(envname))
    c.max_steps = int(4e5)
    c.nworkers = 12
    c.nsteps = 5
    c.set_parallel_env(MultiProcEnv)
    c.set_optimizer(lambda params: optim.Adam(params))
    c.set_optimizer(lambda params: optim.Adam(params), key="termination")
    c.set_explorer(lambda: rainy.lib.explore.EpsGreedy(0.1))
    c.grad_clip = 0.5
    c.eval_freq = c.max_steps // 10
    c.network_log_freq = (c.max_steps // c.batch_size) // 10
    c.entropy_weight = 0.001
    c.value_loss_weight = 1.0
    c.set_net_fn(
        "actor-critic",
        tc.oac_fc_shared(num_options=num_options),
    )
    c.set_net_fn(
        "termination-critic",
        tc.tc_fc_shared(num_options=num_options),
    )
    return c
Beispiel #9
0
def main(envname: str = "CartPole-v0") -> Config:
    c = Config()
    c.set_env(lambda: ClassicControl(envname))
    c.max_steps = 100000
    return c