Beispiel #1
0
    experiment_name='reacher-image-mpc',
    experiment_type='train_vae',
    env=env_params,
    model=dict(
        do=do, du=du, ds=ds, da=da, horizon=horizon,
        state_encoder=(nn.Reshape(do, [64, 64, 3])
                    >> nn.Convolution([7, 7, 64], strides=(1, 1)) >> nn.Relu()
                    >> nn.Convolution([5, 5, 32], strides=(2, 2))
                    >> nn.Convolution([3, 3, 8], strides=(2, 2))
                    >> nn.Flatten() >> nn.Relu(256) >> nn.Gaussian(ds)),
        state_decoder=(nn.Relu(ds, 512) >> nn.Reshape([8, 8, 8])
                    >> nn.Deconvolution([3, 3, 32])
                    >> nn.Deconvolution([5, 5, 64])
                    >> nn.Deconvolution([7, 7, 3])
                    >> nn.Flatten() >> nn.Bernoulli()),
        action_encoder=nn.IdentityVariance(variance=1e-4),
        action_decoder=nn.IdentityVariance(variance=1e-4),
        prior=dict(
            prior_type='nnds',
            network=(
                nn.Relu(ds + da, 200) >> nn.Relu(200) >> nn.Gaussian(ds)
            )
        ),
        cost=dict(
            cost_type='nn',
            network=(
                nn.Relu(ds, 200) >> nn.Relu(200) >> nn.Gaussian(1)
            ),
        ),

    ),
Beispiel #2
0
     do=do,
     du=du,
     ds=ds,
     da=da,
     horizon=horizon,
     # state_encoder=(nn.Reshape(do, [32, 32, 2])
     # >> nn.Convolution([3, 3, 32]) >> nn.Relu()
     # >> nn.Convolution([3, 3, 32]) >> nn.Relu()
     # >> nn.Convolution([3, 3, 32]) >> nn.Relu()
     # >> nn.Flatten() >> nn.Relu(200) >> nn.Gaussian(ds)),
     # state_decoder=(nn.Relu(ds, 1024) >> nn.Reshape([16, 16, 4])
     # >> nn.Convolution([3, 3, 32]) >> nn.Relu()
     # >> nn.Deconvolution([2, 2, 32]) >> nn.Relu()
     # >> nn.Convolution([3, 3, 32]) >> nn.Relu()
     # >> nn.Convolution([2, 2, 2]) >> nn.Flatten() >> nn.Bernoulli()),
     state_encoder=nn.IdentityVariance(),
     state_decoder=nn.IdentityVariance(),
     action_encoder=nn.IdentityVariance(),
     action_decoder=nn.IdentityVariance(),
     # prior={'prior_type': 'none'},
     # prior={'prior_type': 'normal'},
     # prior={'prior_type': 'lds'},
     # prior={'prior_type': 'blds'},
     prior={
         'prior_type': 'nnds',
         'network': nn.Relu(ds + da, 200) >> nn.Relu(200) >> nn.Gaussian(ds)
     },
 ),
 train=dict(
     num_epochs=4000,
     learning_rate=1e-3,