experiment_name='reacher-image-mpc', experiment_type='train_vae', env=env_params, model=dict( do=do, du=du, ds=ds, da=da, horizon=horizon, state_encoder=(nn.Reshape(do, [64, 64, 3]) >> nn.Convolution([7, 7, 64], strides=(1, 1)) >> nn.Relu() >> nn.Convolution([5, 5, 32], strides=(2, 2)) >> nn.Convolution([3, 3, 8], strides=(2, 2)) >> nn.Flatten() >> nn.Relu(256) >> nn.Gaussian(ds)), state_decoder=(nn.Relu(ds, 512) >> nn.Reshape([8, 8, 8]) >> nn.Deconvolution([3, 3, 32]) >> nn.Deconvolution([5, 5, 64]) >> nn.Deconvolution([7, 7, 3]) >> nn.Flatten() >> nn.Bernoulli()), action_encoder=nn.IdentityVariance(variance=1e-4), action_decoder=nn.IdentityVariance(variance=1e-4), prior=dict( prior_type='nnds', network=( nn.Relu(ds + da, 200) >> nn.Relu(200) >> nn.Gaussian(ds) ) ), cost=dict( cost_type='nn', network=( nn.Relu(ds, 200) >> nn.Relu(200) >> nn.Gaussian(1) ), ), ),
do=do, du=du, ds=ds, da=da, horizon=horizon, # state_encoder=(nn.Reshape(do, [32, 32, 2]) # >> nn.Convolution([3, 3, 32]) >> nn.Relu() # >> nn.Convolution([3, 3, 32]) >> nn.Relu() # >> nn.Convolution([3, 3, 32]) >> nn.Relu() # >> nn.Flatten() >> nn.Relu(200) >> nn.Gaussian(ds)), # state_decoder=(nn.Relu(ds, 1024) >> nn.Reshape([16, 16, 4]) # >> nn.Convolution([3, 3, 32]) >> nn.Relu() # >> nn.Deconvolution([2, 2, 32]) >> nn.Relu() # >> nn.Convolution([3, 3, 32]) >> nn.Relu() # >> nn.Convolution([2, 2, 2]) >> nn.Flatten() >> nn.Bernoulli()), state_encoder=nn.IdentityVariance(), state_decoder=nn.IdentityVariance(), action_encoder=nn.IdentityVariance(), action_decoder=nn.IdentityVariance(), # prior={'prior_type': 'none'}, # prior={'prior_type': 'normal'}, # prior={'prior_type': 'lds'}, # prior={'prior_type': 'blds'}, prior={ 'prior_type': 'nnds', 'network': nn.Relu(ds + da, 200) >> nn.Relu(200) >> nn.Gaussian(ds) }, ), train=dict( num_epochs=4000, learning_rate=1e-3,