# Params for action wrapper action_script = 'conditional_displacement_cal' action_scale = {'alpha': 27, 'phi_g': pi, 'phi_e': pi} # nbar of 800 ish to_learn = {'alpha': True, 'phi_g': True, 'phi_e': True} train_batch_size = 200 eval_batch_size = 2 train_episode_length = lambda x: 1 eval_episode_length = lambda x: 1 # Create drivers for data collection from gkp.agents import dynamic_episode_driver_sim_env collect_driver = dynamic_episode_driver_sim_env.DynamicEpisodeDriverSimEnv( env_kwargs, reward_kwargs, train_batch_size, action_script, action_scale, to_learn, train_episode_length) eval_driver = dynamic_episode_driver_sim_env.DynamicEpisodeDriverSimEnv( env_kwargs, reward_kwargs_eval, eval_batch_size, action_script, action_scale, to_learn, eval_episode_length) PPO.train_eval( root_dir=root_dir, random_seed=0, num_epochs=3000, # Params for train normalize_observations=True, normalize_rewards=False, discount_factor=1.0, lr=1e-4,
from gkp.agents import multitask_episode_driver_sim_env collect_driver = multitask_episode_driver_sim_env.MultitaskEpisodeDriverSimEnv( env_kwargs_list, rew_kwargs_list, train_batch_size, action_script, action_scale, to_learn, episode_length_list, env_schedule=env_schedule) from gkp.agents import dynamic_episode_driver_sim_env eval_driver = dynamic_episode_driver_sim_env.DynamicEpisodeDriverSimEnv( env_kwargs_0, rew_kwargs_0, eval_batch_size, action_script, action_scale, to_learn, episode_length_0) PPO.train_eval( root_dir=root_dir, random_seed=0, num_epochs=50000, # Params for train normalize_observations=True, normalize_rewards=False, discount_factor=1.0, lr=1e-3, lr_schedule=lambda x: 1e-3 if x < 500 else 1e-4, num_policy_updates=20, initial_adaptive_kl_beta=0.0, kl_cutoff_factor=0,