예제 #1
0
def run_task(*_):
    env_name = "Ant"
    hidden_sizes = (32,32)
    env = TheanoEnv(normalize(SwimmerEnv()))
    policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=hidden_sizes)
    backup_policy = GaussianMLPPolicy(env.spec, hidden_sizes=hidden_sizes)
    mix_policy = GaussianMLPPolicy(env.spec, hidden_sizes=hidden_sizes)
    pos_eps_policy = GaussianMLPPolicy(env.spec, hidden_sizes=hidden_sizes)
    neg_eps_policy = GaussianMLPPolicy(env.spec, hidden_sizes=hidden_sizes)

    baseline = ZeroBaseline(env_spec=env.spec)

    algo = CAPG(
        env=env,
        policy=policy,
        backup_policy=backup_policy,
        mix_policy=mix_policy,
        pos_eps_policy=pos_eps_policy,
        neg_eps_policy=neg_eps_policy,
        n_timestep=5e6,
        learning_rate=0.01,
        batch_size=5000,
        minibatch_size=500,
        n_sub_itr = 10,
        baseline=baseline,
        max_path_length=500,
        discount=0.99,
        decay_learing_rate=True,
        log_dir='./logs/' + env_name,
    )
    algo.train()
예제 #2
0
    def test_dynamics_rand(self):
        variations = Variations()
        variations.randomize() \
                .at_xpath(".//geom[@name='torso']") \
                .attribute("density") \
                .with_method(Method.COEFFICIENT) \
                .sampled_from(Distribution.UNIFORM) \
                .with_range(0.5, 1.5) \
                .add()

        env = randomize(SwimmerEnv(), variations)

        for i in range(5):
            env.reset()
            for j in range(5):
                env.step(env.action_space.sample())
예제 #3
0
from garage.envs.box2d import CartpoleEnv
from garage.envs.mujoco import SwimmerEnv
from garage.theano.algos.capg import CAPG
from garage.theano.envs import TheanoEnv
from garage.theano.baselines import GaussianMLPBaseline
from garage.theano.policies import GaussianMLPPolicy
from garage.misc.instrument import run_experiment
from garage.misc.ext import set_seed
import numpy as np
for batchsize in [5000]:
    for learning_rate in [0.05, 0.01]:
        for i in range(3):
            seed = np.random.randint(1, 10000)
            env_name = "SGD_Swimmer_-t"
            hidden_sizes = (32, 32)
            env = TheanoEnv(normalize(SwimmerEnv()))
            policy = GaussianMLPPolicy(env_spec=env.spec,
                                       hidden_sizes=hidden_sizes)
            backup_policy = GaussianMLPPolicy(env.spec,
                                              hidden_sizes=hidden_sizes)
            mix_policy = GaussianMLPPolicy(env.spec, hidden_sizes=hidden_sizes)
            pos_eps_policy = GaussianMLPPolicy(env.spec,
                                               hidden_sizes=hidden_sizes)
            neg_eps_policy = GaussianMLPPolicy(env.spec,
                                               hidden_sizes=hidden_sizes)

            baseline = LinearFeatureBaseline(env_spec=env.spec)

            algo = CAPG(
                env=env,
                policy=policy,
예제 #4
0
from garage.baselines import LinearFeatureBaseline
from garage.envs import normalize
from garage.envs.mujoco import SwimmerEnv
from garage.tf.algos import TRPO
from garage.tf.envs import TfEnv
from garage.tf.policies import GaussianMLPPolicy

env = TfEnv(
    normalize(
        SwimmerEnv(),
        normalize_obs=True,
        normalize_reward=True,
    ))

policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32))

baseline = LinearFeatureBaseline(env_spec=env.spec)

algo = TRPO(
    env=env,
    policy=policy,
    baseline=baseline,
    batch_size=4000,
    max_path_length=500,
    n_itr=40,
    discount=0.99,
    step_size=0.01,
    plot=False)

algo.train()
예제 #5
0
from garage.envs.mujoco import SwimmerEnv
from garage.envs.mujoco.randomization import Distribution
from garage.envs.mujoco.randomization import Method
from garage.envs.mujoco.randomization import randomize
from garage.envs.mujoco.randomization import Variations

variations = Variations()
variations.randomize() \
        .at_xpath(".//geom[@name='torso']") \
        .attribute("density") \
        .with_method(Method.COEFFICIENT) \
        .sampled_from(Distribution.UNIFORM) \
        .with_range(0.5, 1.5) \
        .add()

env = randomize(SwimmerEnv(), variations)

for i in range(1000):
    env.reset()
    for j in range(1000):
        env.step(env.action_space.sample())
예제 #6
0
from garage.baselines import LinearFeatureBaseline
from garage.envs import normalize
from garage.envs.box2d import CartpoleEnv
from garage.envs.mujoco import SwimmerEnv
from garage.tf.algos import VPG
from garage.tf.envs import TfEnv
from garage.tf.policies import GaussianMLPPolicy
from garage.misc.instrument import run_experiment

env = TfEnv(normalize(SwimmerEnv()))

policy = GaussianMLPPolicy(name="policy",
                           env_spec=env.spec,
                           hidden_sizes=(32, 32))

baseline = LinearFeatureBaseline(env_spec=env.spec)

algo = VPG(env=env,
           policy=policy,
           baseline=baseline,
           batch_size=5000,
           max_path_length=500,
           n_itr=40,
           discount=0.995,
           optimizer_args=dict(tf_optimizer_args=dict(learning_rate=1e-4, )))

run_experiment(algo.train(),
               n_parallel=1,
               snapshot_mode="last",
               seed=1,
               use_gpu=True,
예제 #7
0
 def test_can_create_env(self):
     # Fixes https://github.com/rlworkgroup/garage/pull/420
     env = normalize(SwimmerEnv())
     assert env