예제 #1
0
파일: mpc.py 프로젝트: yuchen8807/parasol
do = env.get_state_dim()
ds = 10
du = da = env.get_action_dim()
horizon = 50

experiment = dict(
    experiment_name='reacher-image-mpc',
    experiment_type='train_vae',
    env=env_params,
    model=dict(
        do=do, du=du, ds=ds, da=da, horizon=horizon,
        state_encoder=(nn.Reshape(do, [64, 64, 3])
                    >> nn.Convolution([7, 7, 64], strides=(1, 1)) >> nn.Relu()
                    >> nn.Convolution([5, 5, 32], strides=(2, 2))
                    >> nn.Convolution([3, 3, 8], strides=(2, 2))
                    >> nn.Flatten() >> nn.Relu(256) >> nn.Gaussian(ds)),
        state_decoder=(nn.Relu(ds, 512) >> nn.Reshape([8, 8, 8])
                    >> nn.Deconvolution([3, 3, 32])
                    >> nn.Deconvolution([5, 5, 64])
                    >> nn.Deconvolution([7, 7, 3])
                    >> nn.Flatten() >> nn.Bernoulli()),
        action_encoder=nn.IdentityVariance(variance=1e-4),
        action_decoder=nn.IdentityVariance(variance=1e-4),
        prior=dict(
            prior_type='nnds',
            network=(
                nn.Relu(ds + da, 200) >> nn.Relu(200) >> nn.Gaussian(ds)
            )
        ),
        cost=dict(
            cost_type='nn',
예제 #2
0
horizon = 200

experiment = dict(
    experiment_name='pendulum-image-blds-smooth',
    experiment_type='train_vae',
    env=env_params,
    model=dict(
        do=do,
        du=du,
        ds=ds,
        da=da,
        horizon=horizon,
        state_encoder=(nn.Reshape(do, [32, 32, 3]) >> nn.Convolution(
            [5, 5, 32], strides=(1, 1)) >> nn.Relu() >> nn.Convolution(
                [3, 3, 8], strides=(2, 2)) >> nn.Flatten() >> nn.Relu(256) >>
                       nn.Gaussian(ds)),
        state_decoder=(nn.Relu(ds, 512) >> nn.Reshape([8, 8, 8]) >>
                       nn.Deconvolution([3, 3, 32]) >> nn.Deconvolution(
                           [5, 5, 3]) >> nn.Flatten() >> nn.Bernoulli()),
        action_encoder=nn.IdentityVariance(variance=1e-4),
        action_decoder=nn.IdentityVariance(variance=1e-4),
        prior=sweep([
            dict(prior_type='blds', smooth=True),
            dict(prior_type='blds', smooth=False),
        ], ['blds-smooth', 'blds']),
    ),
    train=dict(
        num_epochs=1000,
        learning_rate=1e-3,
        model_learning_rate=1e-5 / 4,
        beta_start=1e-4,
예제 #3
0
     # state_decoder=(nn.Relu(ds, 1024) >> nn.Reshape([16, 16, 4])
     # >> nn.Convolution([3, 3, 32]) >> nn.Relu()
     # >> nn.Deconvolution([2, 2, 32]) >> nn.Relu()
     # >> nn.Convolution([3, 3, 32]) >> nn.Relu()
     # >> nn.Convolution([2, 2, 2]) >> nn.Flatten() >> nn.Bernoulli()),
     state_encoder=nn.IdentityVariance(),
     state_decoder=nn.IdentityVariance(),
     action_encoder=nn.IdentityVariance(),
     action_decoder=nn.IdentityVariance(),
     # prior={'prior_type': 'none'},
     # prior={'prior_type': 'normal'},
     # prior={'prior_type': 'lds'},
     # prior={'prior_type': 'blds'},
     prior={
         'prior_type': 'nnds',
         'network': nn.Relu(ds + da, 200) >> nn.Relu(200) >> nn.Gaussian(ds)
     },
 ),
 train=dict(
     num_epochs=4000,
     learning_rate=1e-3,
     batch_size=16,
     dump_every=1000,
     summary_every=50 / 2,
     beta_start=1.0,
     beta_rate=0,
 ),
 data=dict(
     num_rollouts=100,
     init_std=.2,
 ),
예제 #4
0
experiment = dict(
    experiment_name='vae',
    experiment_type='train_vae',
    env=env_params,
    model=dict(
        do=do,
        du=du,
        ds=ds,
        da=da,
        horizon=horizon,
        state_encoder=(nn.Reshape(do, [64, 64, 3]) >> nn.Convolution(
            [7, 7, 64], strides=(1, 1)) >> nn.Relu() >> nn.Convolution(
                [5, 5, 32], strides=(2, 2)) >> nn.Convolution([3, 3, 8],
                                                              strides=(2, 2))
                       >> nn.Flatten() >> nn.Relu(256) >> nn.Gaussian(ds)),
        state_decoder=(
            nn.Relu(ds, 512) >> nn.Reshape([8, 8, 8]) >> nn.Deconvolution(
                [3, 3, 32]) >> nn.Deconvolution([5, 5, 64]) >>
            nn.Deconvolution([7, 7, 3]) >> nn.Flatten() >> nn.Bernoulli()),
        action_encoder=nn.IdentityVariance(variance=1e-4),
        action_decoder=nn.IdentityVariance(variance=1e-4),
        prior=sweep([
            dict(prior_type='blds', smooth=True, time_varying=True),
            dict(prior_type='normal'),
        ], ['blds-tv-smooth', 'normal']),
        cost=dict(cost_type='quadratic', learn_stdev=sweep([False, True])),
    ),
    train=dict(
        num_epochs=1000,
        learning_rate=1e-3,
예제 #5
0
파일: nlds.py 프로젝트: sharadmv/nvmp
            # if x[0] < 0 and x[1] >= 0:
            # X[i, t] = X[i, t-1] + [-0.5, -0.5]
            # if x[0] < 0 and x[1] < 0:
            # X[i, t] = X[i, t-1] + [0.5, 0.5]
            # if x[0] >= 0 and x[1] < 0:
            # X[i, t] = X[i, t-1] + [0.5, 0.5]
    X += np.random.normal(size=X.shape, scale=np.sqrt(noise))
    return X


data = generate_data(1000)
N = data.shape[0]
yt, yt1 = data[:, :-1], data[:, 1:]
yt, yt1 = yt.reshape([-1, D]), yt1.reshape([-1, D])

transition_net = Tanh(D, 500) >> Tanh(500) >> nn.Gaussian(D)
transition_net.initialize()

rec_net = Tanh(D, 500) >> Tanh(500) >> nn.Gaussian(D)
rec_net.initialize()

Yt = T.placeholder(T.floatx(), [None, D])
Yt1 = T.placeholder(T.floatx(), [None, D])
batch_size = T.shape(Yt)[0]
num_batches = N / T.to_float(batch_size)

Yt_message = Gaussian.pack([
    T.tile(T.eye(D)[None] * noise, [batch_size, 1, 1]),
    T.einsum('ab,ib->ia',
             T.eye(D) * noise, Yt)
])
예제 #6
0
                       # >> nn.Convolution([3, 3, 32]) >> nn.Relu()
                       # >> nn.Flatten() >> nn.Relu(200) >> nn.Gaussian(ds)),
        # state_decoder=(nn.Relu(ds, 1024) >> nn.Reshape([16, 16, 4])
                       # >> nn.Convolution([3, 3, 32]) >> nn.Relu()
                       # >> nn.Deconvolution([2, 2, 32]) >> nn.Relu()
                       # >> nn.Convolution([3, 3, 32]) >> nn.Relu()
                       # >> nn.Convolution([2, 2, 2]) >> nn.Flatten() >> nn.Bernoulli()),
        state_encoder=nn.IdentityVariance(),
        state_decoder=nn.IdentityVariance(),
        action_encoder=nn.IdentityVariance(),
        action_decoder=nn.IdentityVariance(),
        # prior={'prior_type': 'none'},
        # prior={'prior_type': 'normal'},
        # prior={'prior_type': 'lds'},
        # prior={'prior_type': 'blds'},
        prior={'prior_type': 'nnds', 'network': nn.Relu(ds + da, 200) >> nn.Relu(200) >> nn.Gaussian(ds)},
    ),
    train=dict(
        num_epochs=4000,
        learning_rate=1e-4,
        batch_size=2,
        dump_every=50,
        summary_every=50 / 2,
    ),
    data=dict(
        num_rollouts=100,
        init_std=0.1,
    ),
    out_dir='s3://parasol-experiments/vae/reacher-noimage',
    # out_dir='temp2/pm-noimage',
)