def main( envname: str = "CartPole-v0", num_options: int = 2, opt_delib_cost: float = 0.0, opt_beta_adv_merginal: float = 0.01, use_gae: bool = False, opt_avg_baseline: bool = False, ) -> rainy.Config: c = rainy.Config() c.set_env(lambda: ClassicControl(envname)) c.max_steps = int(4e5) c.nworkers = 12 c.nsteps = 5 c.set_parallel_env(MultiProcEnv) c.set_optimizer(lambda params: optim.RMSprop(params, lr=0.0007)) c.grad_clip = 1.0 c.eval_freq = c.max_steps // 10 c.network_log_freq = (c.max_steps // c.batch_size) // 10 c.entropy_weight = 0.001 c.value_loss_weight = 1.0 c.opt_delib_cost = opt_delib_cost c.opt_beta_adv_merginal = opt_beta_adv_merginal c.opt_avg_baseline = opt_avg_baseline c.use_gae = use_gae c.set_net_fn("option-critic", rainy.net.option_critic.fc_shared(num_options=num_options)) return c
def main( envname: str = "Breakout", tau: float = 32 * 20 // 2, update_freq: int = 10, ) -> rainy.Config: c = rainy.Config() c.set_env(lambda: Atari(envname, frame_stack=False)) c.set_optimizer(kfac.default_sgd(eta_max=0.2)) c.set_preconditioner(lambda net: kfac.KfacPreConditioner( net, tau=tau, update_freq=update_freq, norm_scaler=kfac.SquaredFisherScaler(eta_max=0.2, delta=0.001), )) c.set_net_fn("actor-critic", rainy.net.actor_critic.conv_shared()) c.nworkers = 32 c.nsteps = 20 c.set_parallel_env(atari_parallel()) c.value_loss_weight = 1.0 c.use_gae = True c.lr_min = 0.0 c.max_steps = int(2e7) c.eval_env = Atari(envname) c.eval_freq = None c.episode_log_freq = 100 c.eval_deterministic = False return c
def main(envname: str = "Breakout", num_options: int = 4) -> rainy.Config: c = rainy.Config() c.set_env(lambda: rainy.envs.Atari(envname, frame_stack=False)) c.set_parallel_env(rainy.envs.atari_parallel()) c.eval_env = rainy.envs.Atari(envname) c.max_steps = int(2e7) c.nworkers = 16 c.nsteps = 5 c.grad_clip = 1.0 c.eval_freq = c.max_steps // 20 c.network_log_freq = (c.max_steps // c.batch_size) // 10 c.entropy_weight = 0.001 c.value_loss_weight = 1.0 c.set_optimizer( lambda params: optim.RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5)) c.set_optimizer(lambda params: optim.Adam(params, lr=1e-4), key="termination") c.set_net_fn("actor-critic", tc.oac_conv_shared(num_options=num_options)) c.set_net_fn("termination-critic", tc.tc_conv_shared(num_options=num_options)) c.save_freq = None return c
def main( envname: str = "CartPole-v0", use_rnn: bool = False, use_separated: bool = False ) -> rainy.Config: c = rainy.Config() c.set_env(lambda: ClassicControl(envname)) c.max_steps = int(1e5) c.nworkers = 8 c.nsteps = 32 c.set_parallel_env(MultiProcEnv) c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4)) c.value_loss_weight = 0.2 c.entropy_weight = 0.001 c.grad_clip = 0.1 c.gae_lambda = 0.95 c.ppo_minibatch_size = 64 c.use_gae = True c.ppo_clip = 0.2 c.eval_freq = 5000 c.eval_times = 1 c.episode_log_freq = 100 if use_rnn: c.set_net_fn( "actor-critic", rainy.net.actor_critic.fc_shared(rnn=rainy.net.GruBlock) ) elif use_separated: c.set_net_fn("actor-critic", rainy.net.actor_critic.fc_separated()) return c
def config(envname: str = "MontezumaRevenge") -> rainy.Config: c = rainy.Config() c.set_env(lambda: Atari(envname, cfg=atari_config(), frame_stack=False)) c.set_parallel_env(atari_parallel()) c.set_net_fn("actor-critic", rainy.net.actor_critic.ac_conv()) c.set_optimizer(lambda params: Adam(params, lr=1.0e-4, eps=1.0e-8)) c.max_steps = int(1e8) * 6 c.grad_clip = 1.0 # ppo params c.discount_factor = 0.999 c.entropy_weight = 0.001 c.ppo_epochs = 4 c.ppo_clip = 0.1 c.use_gae = True c.nworkers = 128 c.nsteps = 128 c.value_loss_weight = 0.5 c.gae_lambda = 0.95 c.ppo_minibatch_size = (c.nworkers * c.nsteps) // 4 c.use_reward_monitor = True # eval settings c.eval_env = Atari(envname, cfg=atari_config()) c.episode_log_freq = 100 c.eval_freq = None c.save_freq = None return c
def main( envname: str = "Breakout", use_rnn: bool = False, ) -> rainy.Config: c = rainy.Config() c.set_env(lambda: Atari(envname, frame_stack=False)) if use_rnn: c.set_net_fn("actor-critic", rainy.net.actor_critic.conv_shared(rnn=net.GruBlock)) else: c.set_net_fn("actor-critic", rainy.net.actor_critic.conv_shared()) c.set_parallel_env(atari_parallel()) c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4)) c.max_steps = int(2e7) c.grad_clip = 0.5 # ppo params c.nworkers = 8 c.nsteps = 128 c.value_loss_weight = 1.0 c.gae_lambda = 0.95 c.ppo_minibatch_size = 32 * 8 c.ppo_clip = 0.1 c.ppo_epochs = 3 c.use_gae = True c.lr_min = None # set 0.0 if you decrease ppo_clip # eval settings c.eval_env = Atari(envname) c.episode_log_freq = 100 c.eval_freq = None c.save_freq = None return c
def main( envname: str = "Hopper", nworkers: int = 1, mujoco: bool = False, ) -> rainy.Config: c = rainy.Config() if mujoco: c.set_env(lambda: rainy.envs.Mujoco(envname)) else: c.set_env(lambda: rainy.envs.PyBullet(envname)) c.max_steps = int(1e6) c.set_optimizer(lambda params: Adam(params, lr=1e-3), key="actor") c.set_optimizer(lambda params: Adam(params, lr=1e-3), key="critic") c.replay_size = int(1e6) c.train_start = int(1e4) c.set_explorer(lambda: explore.GaussianNoise()) c.set_explorer(lambda: explore.Greedy(), key="eval") c.set_explorer( lambda: explore.GaussianNoise(explore.DummyCooler(0.2), 0.5), key="target") c.eval_deterministic = True c.eval_freq = c.max_steps // 10 c.grad_clip = None c.nworkers = nworkers c.replay_batch_size = 100 * nworkers return c
def main( envname: str = "CartPole-v0", num_options: int = 2, opt_delib_cost: float = 0.0, opt_beta_adv_merginal: float = 0.01, opt_avg_baseline: bool = False, proximal_update_for_mu: bool = False, ) -> rainy.Config: c = rainy.Config() c.set_env(lambda: ClassicControl(envname)) c.max_steps = int(4e5) # Option settings c.opt_delib_cost = opt_delib_cost c.opt_beta_adv_merginal = opt_beta_adv_merginal c.set_net_fn( "option-critic", rainy.net.option_critic.fc_shared(num_options=num_options, has_mu=True), ) # PPO params c.nworkers = 12 c.nsteps = 64 c.set_parallel_env(MultiProcEnv) c.set_optimizer(lambda params: optim.Adam(params, lr=2.5e-4, eps=1.0e-4)) c.grad_clip = 1.0 c.eval_freq = 10000 c.entropy_weight = 0.01 c.value_loss_weight = 1.0 c.use_gae = True return c
def config() -> rainy.Config: c = rainy.Config() c.set_parallel_env(lambda _env_gen, _num_w: ParallelRogueEnvExt( ParallelRogueEnv( [rogue_config(2)] * c.nworkers, max_steps=500, image_setting=EXPAND, ))) c.max_steps = int(2e7) * 2 c.save_freq = None c.eval_freq = None c.eval_env = RogueEnvExt( RogueEnv( config_dict=rogue_config(2), mex_steps=500, stair_reward=50.0, image_setting=EXPAND, )) c.set_optimizer(lambda params: Adam(params, lr=1.0e-4, eps=1.0e-8)) CNN_PARAM = [(8, 1), (4, 1), (3, 1)] c.set_net_fn( "actor-critic", rainy.net.actor_critic.ac_conv( kernel_and_strides=CNN_PARAM, output_dim=256, ), ) c.nworkers = 32 c.nsteps = 125 c.value_loss_weight = 0.5 c.gae_lambda = 0.95 c.ppo_minibatch_size = (c.nworkers * c.nsteps) // 4 return c
def main( envname: str = "CartPole-v0", tau: float = 12 * 20, update_freq: int = 10, ) -> rainy.Config: c = rainy.Config() c.set_env(lambda: ClassicControl(envname)) c.max_steps = int(4e5) c.nworkers = 12 c.nsteps = 20 c.set_parallel_env(MultiProcEnv) c.set_optimizer(kfac.default_sgd(eta_max=0.1)) c.set_preconditioner(lambda net: kfac.KfacPreConditioner( net, tau=tau, update_freq=update_freq, norm_scaler=kfac.SquaredFisherScaler(eta_max=0.1, delta=0.001), )) c.gae_lambda = 0.95 c.use_gae = False c.lr_min = 0.0 c.value_loss_weight = 0.2 c.entropy_weight = 0.01 c.eval_freq = None return c
def main(envname: str = "CartPole-v0", max_steps: int = 100000, nworkers: int = 1) -> rainy.Config: c = rainy.Config() c.set_env(lambda: ClassicControl(envname)) c.set_parallel_env(MultiProcEnv) c.max_steps = max_steps c.episode_log_freq = 100 c.nworkers = nworkers c.replay_batch_size = 64 * nworkers return c
def test_video_hook_atari() -> None: c = rainy.Config() c.eval_hooks.append(lib.hooks.VideoWriterHook(video_name="BreakoutVideo")) c.set_net_fn("dqn", net.value.dqn_conv()) c.set_env(lambda: envs.Atari("Breakout")) c.eval_env = envs.Atari("Breakout") ag = agents.DQNAgent(c) c.initialize_hooks() _ = ag.eval_episode() ag.close() videopath = c.logger.logdir.joinpath("BreakoutVideo-0.avi") assert videopath.exists()
def test_qvalue_hook(make_ag: callable, is_bootdqn: bool) -> None: c = rainy.Config() hook = QValueHook() c.eval_hooks.append(hook) if is_bootdqn: c.set_replay_buffer(lambda capacity: replay.UniformReplayBuffer( replay.BootDQNReplayFeed)) ag = make_ag(c) c.initialize_hooks() _ = ag.eval_episode() ag.close() assert len(hook.q_values) == 0 assert tuple(hook.q_value_mean.shape) == (c.action_dim, )
def test_image_hook_atari() -> None: c = rainy.Config() hook = ImageWriterHook(out_dir="/tmp/rainy-acvp/imagehook-test") c.eval_hooks.append(ImageWriterHook(out_dir="/tmp/rainy-acvp/imagehook-test")) c.set_net_fn("dqn", net.value.dqn_conv()) c.set_env(lambda: envs.Atari("Breakout")) c.eval_env = envs.Atari("Breakout") ag = agents.DQNAgent(c) c.initialize_hooks() _ = ag.eval_episode() ag.close() images = np.load(hook.out_dir.joinpath("ep1.npz")) assert images["states"][0].shape == (210, 160, 3) assert len(images["actions"].shape) == 1
def test_nstep_train(make_ag: callable) -> None: c = rainy.Config() c.logger.setup_logdir() c.nworkers = 6 c.nsteps = 4 c.ppo_minibatch_size = 12 c.set_parallel_env(rainy.envs.DummyParallelEnv) c.set_net_fn("actor-critic", net.actor_critic.fc_shared(units=[32, 32])) c.set_net_fn("option-critic", net.option_critic.fc_shared(units=[32, 32])) c.set_env(partial(DummyEnvDeterministic, flatten=True)) ag = make_ag(c) res = next(ag.train_episodes(1)) assert len(res) == c.nworkers ag.close()
def test_eval_parallel(n: int, make_ag: callable) -> None: c = rainy.Config() c.nworkers = 6 c.nsteps = 5 c.set_parallel_env(rainy.envs.DummyParallelEnv) c.set_net_fn("actor-critic", net.actor_critic.fc_shared(units=[32, 32])) c.set_net_fn("option-critic", net.option_critic.fc_shared(units=[32, 32])) c.set_env(partial(DummyEnvDeterministic, flatten=True)) ag = make_ag(c) res = ag.eval_parallel(n=n) assert len(res) == n for r in res: assert r.return_ == 20.0 assert r.length == 3 ag.close()
def config() -> rainy.Config: c = rainy.Config() c.max_steps = int(1e6) c.nworkers = 12 c.nsteps = 5 c.set_parallel_env(MultiProcEnv) c.set_optimizer(lambda params: Adam(params, lr=0.001)) c.grad_clip = 0.1 c.value_loss_weight = 0.1 c.use_gae = False c.eval_deterministic = True c.eval_freq = None c.entropy_weight = 0.001 # c.set_net_fn('actor-critic', rainy.net.actor_critic.fc_shared(rnn=rainy.net.GruBlock)) return c
def test_video_hook_pybullet() -> None: c = rainy.Config() c.eval_hooks.append(lib.hooks.VideoWriterHook(video_name="HopperVideo")) c.set_env(lambda: envs.PyBullet("Hopper")) c.set_explorer(lambda: lib.explore.GaussianNoise()) c.set_explorer(lambda: lib.explore.Greedy(), key="eval") c.set_optimizer(lambda params: torch.optim.Adam(params, lr=1e-3), key="actor") c.set_optimizer(lambda params: torch.optim.Adam(params, lr=1e-3), key="critic") ag = agents.DDPGAgent(c) c.initialize_hooks() _ = ag.eval_episode() ag.close() videopath = c.logger.logdir.joinpath("HopperVideo-0.avi") assert videopath.exists()
def config() -> rainy.Config: c = rainy.Config() c.max_steps = int(1e6) c.nworkers = 8 c.nsteps = 32 c.set_parallel_env(MultiProcEnv) c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4)) c.value_loss_weight = 0.1 c.grad_clip = 0.1 c.gae_lambda = 0.95 c.ppo_minibatch_size = 64 c.use_gae = True c.ppo_clip = 0.2 c.eval_freq = None # c.set_net_fn('actor-critic', rainy.net.actor_critic.fc_shared(rnn=rainy.net.GruBlock)) return c
def test_eval_parallel(n: int) -> None: c = rainy.Config() c.nworkers = 6 c.nsteps = 5 c.set_parallel_env(rainy.envs.MultiProcEnv) c.set_net_fn('actor-critic', rainy.net.actor_critic.fc_shared(units=[32, 32])) c.set_env(partial(DummyEnvDeterministic, flatten=True)) agent = rainy.agents.A2cAgent(c) entropy = np.zeros(c.nworkers) res = agent.eval_parallel(n, entropy=entropy) assert len(res) == n for r in res: assert r.reward == 20.0 assert r.length == 3 agent.close()
def main( envname: str = "4Rooms", obs_type: str = "image", num_options: int = 4, visualize_beta: bool = False, ) -> rainy.Config: c = rainy.Config() if visualize_beta: c.eval_hooks.append(OptionVisualizeHook(num_options)) c.set_env(lambda: RLPyGridWorld(envname, obs_type)) c.max_steps = int(4e5) c.nworkers = 12 c.nsteps = 5 c.set_parallel_env(MultiProcEnv) c.set_optimizer(lambda params: optim.RMSprop(params, lr=0.0007)) c.set_optimizer(lambda params: optim.Adam(params, lr=1e-4), key="termination") c.grad_clip = 1.0 c.eval_freq = c.max_steps // 20 c.network_log_freq = (c.max_steps // c.batch_size) // 10 c.entropy_weight = 0.001 c.value_loss_weight = 1.0 c.tc_exact_pmu = "GridWorld" in envname if obs_type == "image": CONV_ARGS = dict( hidden_channels=(8, 8), feature_dim=128, cnn_params=[(4, 1), (2, 1)], ) c.set_net_fn( "actor-critic", tc.oac_conv_shared(num_options=num_options, **CONV_ARGS), ) c.set_net_fn( "termination-critic", tc.tc_conv_shared(num_options=num_options, **CONV_ARGS), ) else: c.set_net_fn( "actor-critic", tc.oac_fc_shared(num_options=num_options), ) c.set_net_fn( "termination-critic", tc.tc_fc_shared(num_options=num_options), ) return c
def main(envname: str = "CartPoleSwingUpContinuous-v0", nworkers: int = 1) -> rainy.Config: c = rainy.Config() c.set_env(lambda: ClassicControl(envname)) c.max_steps = int(1e5) c.set_optimizer(lambda params: Adam(params, lr=3e-4), key="actor") c.set_optimizer(lambda params: Adam(params, lr=3e-4), key="critic") c.set_optimizer(lambda params: Adam(params, lr=3e-4), key="entropy") c.replay_size = int(1e5) c.train_start = int(1e4) c.eval_deterministic = True c.eval_freq = c.max_steps // 10 c.sync_freq = 1 c.grad_clip = None c.nworkers = nworkers c.replay_batch_size = 256 * nworkers return c
def test_state_hook_pybullet() -> None: c = rainy.Config() hook = lib.hooks.StateWriterHook( out_dir="/tmp/rainy-acvp/imagehook-pybullet") c.eval_hooks.append(hook) c.set_env(lambda: envs.PyBullet("Hopper")) c.set_explorer(lambda: lib.explore.GaussianNoise()) c.set_explorer(lambda: lib.explore.Greedy(), key="eval") c.set_optimizer(lambda params: torch.optim.Adam(params, lr=1e-3), key="actor") c.set_optimizer(lambda params: torch.optim.Adam(params, lr=1e-3), key="critic") ag = agents.DDPGAgent(c) c.initialize_hooks() _ = ag.eval_episode() ag.close() episodes = np.load(hook.out_dir.joinpath("ep1.npz")) assert episodes["states"][0].shape == (15, ) # state space assert episodes["actions"][0].shape == (3, ) # action space
def main(envname: str = "Hopper") -> rainy.Config: c = rainy.Config() c.set_env(lambda: PyBullet(envname)) c.set_net_fn( "actor-critic", rainy.net.actor_critic.fc_shared(policy=SeparateStdGaussianDist) ) c.set_parallel_env(pybullet_parallel()) c.max_steps = int(1e6) c.nworkers = 12 c.nsteps = 5 c.set_optimizer(lambda params: Adam(params, lr=0.001)) c.grad_clip = 0.5 c.gae_lambda = 0.95 c.value_loss_weight = 0.5 c.entropy_weight = 0.0 c.use_gae = False c.eval_deterministic = False c.eval_freq = None return c
def main(envname: str = "Breakout") -> rainy.Config: c = rainy.Config() c.set_env(lambda: Atari(envname, frame_stack=False)) c.set_optimizer( lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5)) # c.set_net_fn('actor-critic', rainy.net.actor_critic.conv_shared(rnn=net.GruBlock)) c.set_net_fn("actor-critic", rainy.net.actor_critic.conv_shared()) c.nworkers = 16 c.nsteps = 5 c.set_parallel_env(atari_parallel()) c.grad_clip = 0.5 c.value_loss_weight = 1.0 c.use_gae = False c.max_steps = int(2e7) c.eval_env = Atari(envname) c.eval_deterministic = False c.episode_log_freq = 100 c.eval_freq = None c.save_freq = None return c
def main( envname: str = "4Rooms", num_options: int = 4, opt_delib_cost: float = 0.0, opt_beta_adv_merginal: float = 0.01, obs_type: str = "image", use_gae: bool = False, opt_avg_baseline: bool = False, visualize_beta: bool = False, ) -> rainy.Config: c = rainy.Config() if visualize_beta: c.eval_hooks.append(OptionVisualizeHook(num_options)) c.set_env(lambda: RLPyGridWorld(envname, obs_type)) c.max_steps = int(4e5) c.nworkers = 12 c.nsteps = 5 c.set_parallel_env(MultiProcEnv) c.set_optimizer(lambda params: optim.RMSprop(params, lr=0.0007)) c.grad_clip = 1.0 c.eval_freq = c.max_steps // 20 c.network_log_freq = (c.max_steps // c.batch_size) // 10 c.entropy_weight = 0.001 c.value_loss_weight = 1.0 c.opt_delib_cost = opt_delib_cost c.opt_beta_adv_merginal = opt_beta_adv_merginal c.opt_avg_baseline = opt_avg_baseline c.use_gae = use_gae if obs_type == "image" or obs_type == "binary-image": c.set_net_fn( "option-critic", oc.conv_shared( num_options=num_options, hidden_channels=(8, 8), feature_dim=128, cnn_params=[(4, 1), (2, 1)], ), ) else: c.set_net_fn("option-critic", oc.fc_shared(num_options=num_options)) return c
def main(envname: str = "HalfCheetah") -> rainy.Config: c = rainy.Config() c.set_env(lambda: PyBullet(envname)) c.set_net_fn( "actor-critic", rainy.net.actor_critic.fc_shared(policy=SeparateStdGaussianDist) ) c.set_parallel_env(pybullet_parallel()) c.set_optimizer(lambda params: Adam(params, lr=3.0e-4, eps=1.0e-4)) c.max_steps = int(2e6) c.grad_clip = 0.5 # ppo params c.value_loss_weight = 0.5 c.entropy_weight = 0.0 c.gae_lambda = 0.95 c.nworkers = 16 c.nsteps = 128 c.ppo_minibatch_size = (16 * 128) // 16 c.ppo_clip = 0.2 c.use_gae = True c.eval_freq = None return c
def main( envname: str = "Hopper", nworkers: int = 1, mujoco: bool = False, ) -> rainy.Config: c = rainy.Config() if mujoco: c.set_env(lambda: rainy.envs.Mujoco(envname)) else: c.set_env(lambda: rainy.envs.PyBullet(envname)) c.max_steps = int(1e6) c.set_optimizer(lambda params: Adam(params, lr=3e-4), key="actor") c.set_optimizer(lambda params: Adam(params, lr=3e-4), key="critic") c.set_optimizer(lambda params: Adam(params, lr=3e-4), key="entropy") c.replay_size = int(1e6) c.train_start = int(1e4) c.eval_deterministic = True c.eval_freq = c.max_steps // 100 c.sync_freq = 1 c.grad_clip = None c.nworkers = nworkers c.replay_batch_size = 256 * nworkers return c
def main( envname: str = "Breakout", num_options: int = 4, opt_delib_cost: float = 0.0, opt_beta_adv_merginal: float = 0.01, opt_avg_baseline: bool = False, proximal_update_for_mu: bool = False, ) -> rainy.Config: c = rainy.Config() c.set_env(lambda: Atari(envname, frame_stack=False)) c.set_parallel_env(atari_parallel()) c.set_optimizer(lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5)) c.max_steps = int(2e7) c.grad_clip = 0.5 # Option settings c.opt_delib_cost = opt_delib_cost c.opt_beta_adv_merginal = opt_beta_adv_merginal c.set_net_fn( "option-critic", rainy.net.option_critic.conv_shared(num_options=num_options, has_mu=True), ) # PPO params c.nworkers = 8 c.nsteps = 128 c.value_loss_weight = 1.0 c.gae_lambda = 0.95 c.ppo_minibatch_size = 32 * 8 c.ppo_clip = 0.1 c.ppo_epochs = 3 c.use_gae = True # Eval settings c.eval_env = Atari(envname) c.eval_deterministic = False c.episode_log_freq = 100 c.eval_freq = c.max_steps // 10 c.save_freq = None return c
def main( envname: str = "Hopper", num_options: int = 2, opt_delib_cost: float = 0.0, opt_beta_adv_merginal: float = 0.01, opt_avg_baseline: bool = False, proximal_update_for_mu: bool = False, ) -> rainy.Config: c = rainy.Config() c.set_env(lambda: PyBullet(envname)) c.set_parallel_env( pybullet_parallel(normalize_obs=True, normalize_reward=True)) c.set_optimizer(lambda params: Adam(params, lr=3.0e-4, eps=1.0e-4)) c.max_steps = int(1e6) c.grad_clip = 0.5 # Option settings c.opt_delib_cost = opt_delib_cost c.opt_beta_adv_merginal = opt_beta_adv_merginal c.set_net_fn( "option-critic", rainy.net.option_critic.fc_shared(num_options=num_options, policy=PerOptionStdGaussianDist, has_mu=True), ) # PPO params c.nworkers = 4 c.nsteps = 512 c.ppo_minibatch_size = (4 * 512) // 8 c.ppo_clip = 0.2 c.use_gae = True c.eval_freq = c.max_steps // 10 c.entropy_weight = 0.01 c.value_loss_weight = 1.0 c.eval_deterministic = True c.eval_times = 4 c.proximal_update_for_mu = proximal_update_for_mu return c