def run_task(*_): """Implement the run_task method needed to run experiments with rllab.""" pass_params = (env_name, sumo_params, vehicles, env_params, net_params, initial_config, scenario) env = GymEnv(env_name, record_video=False, register_params=pass_params) horizon = env.horizon env = normalize(env) policy = GaussianGRUPolicy(env_spec=env.spec, hidden_sizes=(64, )) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = PPO( env=env, policy=policy, baseline=baseline, batch_size=horizon * 32 * 2, max_path_length=horizon, # whole_paths=True, n_itr=400, discount=0.999, # step_size=0.01, ) algo.train()
def run_task(*_): # Please note that different environments with different action spaces may # require different policies. For example with a Discrete action space, a # CategoricalMLPPolicy works, but for a Box action space may need to use # a GaussianMLPPolicy (see the trpo_gym_pendulum.py example) env = normalize( GymEnv(env_name="LunarLanderContinuous-v2", force_reset=True)) # policy = CategoricalMLPPolicy( # env_spec=env.spec, # # The neural network policy should have two hidden layers, each with 32 hidden units. # hidden_sizes=(32, 32) # ) policy = GaussianMLPPolicy( env_spec=env.spec, # The neural network policy should have two hidden layers, each with 32 hidden units. hidden_sizes=(64, 64)) baseline = LinearFeatureBaseline(env_spec=env.spec) # max_path_length = env.horizon algo = PPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=300, n_itr=10000, discount=0.99, # step_size=0.02, truncate_local_is_ratio=0.2 # Uncomment both lines (this and the plot parameter below) to enable plotting # plot=True, ) algo.train()
def main(exp_name, ent_wt=1.0): register_custom_envs() env_name = 'Acrobot-v2' env = GymEnv(env_name) policy = GaussianMLPPolicy(env_spec=env, hidden_sizes=(64, 64)) algo = PPO(env=env, policy=policy, n_itr=1500, batch_size=8000, max_path_length=1000, discount=0.95, store_paths=True, entropy_weight=ent_wt, baseline=LinearFeatureBaseline(env_spec=env)) data_path = 'data/acrobat_data_rllab_ppo/%s/' % exp_name os.makedirs(data_path, exist_ok=True) logger.set_snapshot_dir(data_path) algo.train() logger.set_snapshot_dir(None)
def main(exp_name, ent_wt=1.0): register_custom_envs() env_name = 'LunarLanderContinuous-v3' env = GymEnv(env_name) policy = GaussianMLPPolicy(env_spec=env, hidden_sizes=(64, 64)) baseline = GaussianMLPBaseline(env_spec=env) algo = PPO(env=env, policy=policy, n_itr=1500, batch_size=8000, max_path_length=1000, discount=0.99, store_paths=True, entropy_weight=ent_wt, baseline=baseline) data_path = 'data/%s_data_rllab_%s/%s/' % (env_name.replace( '-', '_'), str(algo.__class__.__name__), exp_name) os.makedirs(data_path, exist_ok=True) logger.set_snapshot_dir(data_path) algo.train() logger.set_snapshot_dir(None)
def run_task(*_): env = normalize(GymEnv(models[k])) baseline = LinearFeatureBaseline(env_spec=env.spec) learn_std = True init_std = 1 # hidden_sizes = NN_sizes[i] # hidden_sizes=(8,) # hidden_sizes=(32, 32) hidden_sizes = (100, 50, 25) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=hidden_sizes, learn_std=learn_std, init_std=init_std) # ======================= # Defining the algorithm # ======================= batch_size = 5000 n_itr = 200 gamma = .99 step_size = 0.01 # max_path_length = 96, algo = PPO( env=env, policy=policy, baseline=baseline, batch_size=batch_size, # max_path_length=max_path_length, n_itr=n_itr, discount=gamma, step_size=step_size) algo.train()
#!/usr/bin/python3.5 from pre_maml.envs.quad_falling_down import QuadFallingDown from rllab.envs.normalized_env import normalize from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline from rllab.algos.ppo import PPO from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy env = normalize(env=QuadFallingDown()) policy = GaussianMLPPolicy(env_spec=env.spec, ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = PPO( env=env, policy=policy, baseline=baseline, ) algo.train()