do = env.get_state_dim() ds = 10 du = da = env.get_action_dim() horizon = 50 experiment = dict( experiment_name='reacher-image-mpc', experiment_type='train_vae', env=env_params, model=dict( do=do, du=du, ds=ds, da=da, horizon=horizon, state_encoder=(nn.Reshape(do, [64, 64, 3]) >> nn.Convolution([7, 7, 64], strides=(1, 1)) >> nn.Relu() >> nn.Convolution([5, 5, 32], strides=(2, 2)) >> nn.Convolution([3, 3, 8], strides=(2, 2)) >> nn.Flatten() >> nn.Relu(256) >> nn.Gaussian(ds)), state_decoder=(nn.Relu(ds, 512) >> nn.Reshape([8, 8, 8]) >> nn.Deconvolution([3, 3, 32]) >> nn.Deconvolution([5, 5, 64]) >> nn.Deconvolution([7, 7, 3]) >> nn.Flatten() >> nn.Bernoulli()), action_encoder=nn.IdentityVariance(variance=1e-4), action_decoder=nn.IdentityVariance(variance=1e-4), prior=dict( prior_type='nnds', network=( nn.Relu(ds + da, 200) >> nn.Relu(200) >> nn.Gaussian(ds) ) ), cost=dict( cost_type='nn',
horizon = 200 experiment = dict( experiment_name='pendulum-image-blds-smooth', experiment_type='train_vae', env=env_params, model=dict( do=do, du=du, ds=ds, da=da, horizon=horizon, state_encoder=(nn.Reshape(do, [32, 32, 3]) >> nn.Convolution( [5, 5, 32], strides=(1, 1)) >> nn.Relu() >> nn.Convolution( [3, 3, 8], strides=(2, 2)) >> nn.Flatten() >> nn.Relu(256) >> nn.Gaussian(ds)), state_decoder=(nn.Relu(ds, 512) >> nn.Reshape([8, 8, 8]) >> nn.Deconvolution([3, 3, 32]) >> nn.Deconvolution( [5, 5, 3]) >> nn.Flatten() >> nn.Bernoulli()), action_encoder=nn.IdentityVariance(variance=1e-4), action_decoder=nn.IdentityVariance(variance=1e-4), prior=sweep([ dict(prior_type='blds', smooth=True), dict(prior_type='blds', smooth=False), ], ['blds-smooth', 'blds']), ), train=dict( num_epochs=1000, learning_rate=1e-3, model_learning_rate=1e-5 / 4, beta_start=1e-4,
# state_decoder=(nn.Relu(ds, 1024) >> nn.Reshape([16, 16, 4]) # >> nn.Convolution([3, 3, 32]) >> nn.Relu() # >> nn.Deconvolution([2, 2, 32]) >> nn.Relu() # >> nn.Convolution([3, 3, 32]) >> nn.Relu() # >> nn.Convolution([2, 2, 2]) >> nn.Flatten() >> nn.Bernoulli()), state_encoder=nn.IdentityVariance(), state_decoder=nn.IdentityVariance(), action_encoder=nn.IdentityVariance(), action_decoder=nn.IdentityVariance(), # prior={'prior_type': 'none'}, # prior={'prior_type': 'normal'}, # prior={'prior_type': 'lds'}, # prior={'prior_type': 'blds'}, prior={ 'prior_type': 'nnds', 'network': nn.Relu(ds + da, 200) >> nn.Relu(200) >> nn.Gaussian(ds) }, ), train=dict( num_epochs=4000, learning_rate=1e-3, batch_size=16, dump_every=1000, summary_every=50 / 2, beta_start=1.0, beta_rate=0, ), data=dict( num_rollouts=100, init_std=.2, ),
experiment = dict( experiment_name='vae', experiment_type='train_vae', env=env_params, model=dict( do=do, du=du, ds=ds, da=da, horizon=horizon, state_encoder=(nn.Reshape(do, [64, 64, 3]) >> nn.Convolution( [7, 7, 64], strides=(1, 1)) >> nn.Relu() >> nn.Convolution( [5, 5, 32], strides=(2, 2)) >> nn.Convolution([3, 3, 8], strides=(2, 2)) >> nn.Flatten() >> nn.Relu(256) >> nn.Gaussian(ds)), state_decoder=( nn.Relu(ds, 512) >> nn.Reshape([8, 8, 8]) >> nn.Deconvolution( [3, 3, 32]) >> nn.Deconvolution([5, 5, 64]) >> nn.Deconvolution([7, 7, 3]) >> nn.Flatten() >> nn.Bernoulli()), action_encoder=nn.IdentityVariance(variance=1e-4), action_decoder=nn.IdentityVariance(variance=1e-4), prior=sweep([ dict(prior_type='blds', smooth=True, time_varying=True), dict(prior_type='normal'), ], ['blds-tv-smooth', 'normal']), cost=dict(cost_type='quadratic', learn_stdev=sweep([False, True])), ), train=dict( num_epochs=1000, learning_rate=1e-3,
# if x[0] < 0 and x[1] >= 0: # X[i, t] = X[i, t-1] + [-0.5, -0.5] # if x[0] < 0 and x[1] < 0: # X[i, t] = X[i, t-1] + [0.5, 0.5] # if x[0] >= 0 and x[1] < 0: # X[i, t] = X[i, t-1] + [0.5, 0.5] X += np.random.normal(size=X.shape, scale=np.sqrt(noise)) return X data = generate_data(1000) N = data.shape[0] yt, yt1 = data[:, :-1], data[:, 1:] yt, yt1 = yt.reshape([-1, D]), yt1.reshape([-1, D]) transition_net = Tanh(D, 500) >> Tanh(500) >> nn.Gaussian(D) transition_net.initialize() rec_net = Tanh(D, 500) >> Tanh(500) >> nn.Gaussian(D) rec_net.initialize() Yt = T.placeholder(T.floatx(), [None, D]) Yt1 = T.placeholder(T.floatx(), [None, D]) batch_size = T.shape(Yt)[0] num_batches = N / T.to_float(batch_size) Yt_message = Gaussian.pack([ T.tile(T.eye(D)[None] * noise, [batch_size, 1, 1]), T.einsum('ab,ib->ia', T.eye(D) * noise, Yt) ])
# >> nn.Convolution([3, 3, 32]) >> nn.Relu() # >> nn.Flatten() >> nn.Relu(200) >> nn.Gaussian(ds)), # state_decoder=(nn.Relu(ds, 1024) >> nn.Reshape([16, 16, 4]) # >> nn.Convolution([3, 3, 32]) >> nn.Relu() # >> nn.Deconvolution([2, 2, 32]) >> nn.Relu() # >> nn.Convolution([3, 3, 32]) >> nn.Relu() # >> nn.Convolution([2, 2, 2]) >> nn.Flatten() >> nn.Bernoulli()), state_encoder=nn.IdentityVariance(), state_decoder=nn.IdentityVariance(), action_encoder=nn.IdentityVariance(), action_decoder=nn.IdentityVariance(), # prior={'prior_type': 'none'}, # prior={'prior_type': 'normal'}, # prior={'prior_type': 'lds'}, # prior={'prior_type': 'blds'}, prior={'prior_type': 'nnds', 'network': nn.Relu(ds + da, 200) >> nn.Relu(200) >> nn.Gaussian(ds)}, ), train=dict( num_epochs=4000, learning_rate=1e-4, batch_size=2, dump_every=50, summary_every=50 / 2, ), data=dict( num_rollouts=100, init_std=0.1, ), out_dir='s3://parasol-experiments/vae/reacher-noimage', # out_dir='temp2/pm-noimage', )