예제 #1
0
def run_task(*_):
    """Implement the run_task method needed to run experiments with rllab."""
    pass_params = (env_name, sumo_params, vehicles, env_params, net_params,
                   initial_config, scenario)

    env = GymEnv(env_name, record_video=False, register_params=pass_params)
    horizon = env.horizon
    env = normalize(env)

    policy = GaussianGRUPolicy(env_spec=env.spec, hidden_sizes=(64, ))

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = PPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=horizon * 32 * 2,
        max_path_length=horizon,
        # whole_paths=True,
        n_itr=400,
        discount=0.999,
        # step_size=0.01,
    )
    algo.train()
예제 #2
0
def run_task(*_):
    # Please note that different environments with different action spaces may
    # require different policies. For example with a Discrete action space, a
    # CategoricalMLPPolicy works, but for a Box action space may need to use
    # a GaussianMLPPolicy (see the trpo_gym_pendulum.py example)
    env = normalize(
        GymEnv(env_name="LunarLanderContinuous-v2", force_reset=True))
    # policy = CategoricalMLPPolicy(
    #     env_spec=env.spec,
    #     # The neural network policy should have two hidden layers, each with 32 hidden units.
    #     hidden_sizes=(32, 32)
    # )
    policy = GaussianMLPPolicy(
        env_spec=env.spec,
        # The neural network policy should have two hidden layers, each with 32 hidden units.
        hidden_sizes=(64, 64))

    baseline = LinearFeatureBaseline(env_spec=env.spec)
    # max_path_length = env.horizon
    algo = PPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=4000,
        max_path_length=300,
        n_itr=10000,
        discount=0.99,
        # step_size=0.02,
        truncate_local_is_ratio=0.2
        # Uncomment both lines (this and the plot parameter below) to enable plotting
        # plot=True,
    )
    algo.train()
def main(exp_name, ent_wt=1.0):
    register_custom_envs()
    env_name = 'Acrobot-v2'
    env = GymEnv(env_name)
    policy = GaussianMLPPolicy(env_spec=env, hidden_sizes=(64, 64))
    algo = PPO(env=env,
               policy=policy,
               n_itr=1500,
               batch_size=8000,
               max_path_length=1000,
               discount=0.95,
               store_paths=True,
               entropy_weight=ent_wt,
               baseline=LinearFeatureBaseline(env_spec=env))
    data_path = 'data/acrobat_data_rllab_ppo/%s/' % exp_name
    os.makedirs(data_path, exist_ok=True)
    logger.set_snapshot_dir(data_path)
    algo.train()
    logger.set_snapshot_dir(None)
def main(exp_name, ent_wt=1.0):
    register_custom_envs()
    env_name = 'LunarLanderContinuous-v3'
    env = GymEnv(env_name)
    policy = GaussianMLPPolicy(env_spec=env, hidden_sizes=(64, 64))
    baseline = GaussianMLPBaseline(env_spec=env)
    algo = PPO(env=env,
               policy=policy,
               n_itr=1500,
               batch_size=8000,
               max_path_length=1000,
               discount=0.99,
               store_paths=True,
               entropy_weight=ent_wt,
               baseline=baseline)
    data_path = 'data/%s_data_rllab_%s/%s/' % (env_name.replace(
        '-', '_'), str(algo.__class__.__name__), exp_name)
    os.makedirs(data_path, exist_ok=True)
    logger.set_snapshot_dir(data_path)
    algo.train()
    logger.set_snapshot_dir(None)
예제 #5
0
        def run_task(*_):
            env = normalize(GymEnv(models[k]))

            baseline = LinearFeatureBaseline(env_spec=env.spec)

            learn_std = True
            init_std = 1

            # hidden_sizes = NN_sizes[i]
            # hidden_sizes=(8,)
            # hidden_sizes=(32, 32)
            hidden_sizes = (100, 50, 25)

            policy = GaussianMLPPolicy(env_spec=env.spec,
                                       hidden_sizes=hidden_sizes,
                                       learn_std=learn_std,
                                       init_std=init_std)

            # =======================
            # Defining the algorithm
            # =======================
            batch_size = 5000
            n_itr = 200
            gamma = .99
            step_size = 0.01
            # max_path_length = 96,

            algo = PPO(
                env=env,
                policy=policy,
                baseline=baseline,
                batch_size=batch_size,
                # max_path_length=max_path_length,
                n_itr=n_itr,
                discount=gamma,
                step_size=step_size)
            algo.train()
예제 #6
0
#!/usr/bin/python3.5

from pre_maml.envs.quad_falling_down import QuadFallingDown

from rllab.envs.normalized_env import normalize
from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
from rllab.algos.ppo import PPO
from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy

env = normalize(env=QuadFallingDown())
policy = GaussianMLPPolicy(env_spec=env.spec, )
baseline = LinearFeatureBaseline(env_spec=env.spec)

algo = PPO(
    env=env,
    policy=policy,
    baseline=baseline,
)
algo.train()