def test_variant_generator():

        vg = instrument.VariantGenerator()
        vg.add("key1", [1, 2, 3])
        vg.add("key2", [True, False])
        vg.add("key3", lambda key2: [1] if key2 else [1, 2])
        it.assertEqual(len(vg.variants()), 9)

        class VG(instrument.VariantGenerator):
            @instrument.variant
            def key1(self):
                return [1, 2, 3]

            @instrument.variant
            def key2(self):
                yield True
                yield False

            @instrument.variant
            def key3(self, key2):
                if key2:
                    yield 1
                else:
                    yield 1
                    yield 2

        it.assertEqual(len(VG().variants()), 9)
Beispiel #2
0
from rllab.misc import instrument
import sys

instrument.stub(globals())

env = normalize(GymEnv("Swimmer-v1", record_video=False))

policy = GaussianMLPPolicy(
    env_spec=env.spec,
    # The neural network policy should have two hidden layers, each with 42 hidden units.
    hidden_sizes=(42, 42))

# baseline = LinearFeatureBaseline(env_spec=env.spec)
baseline = ZeroBaseline(env_spec=env.spec)

vg = instrument.VariantGenerator()
vg.add("seed", range(1))

variants = vg.variants()

for variant in variants:
    algo = ERWR(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=50000,
        max_path_length=500,
        n_itr=500,
        discount=0.99,
        plot=True,
    )