Example #1
0

# pendulum env
env = gym.make('QQube-TO-v1')
env._max_episode_steps = 500

alg = MBGPS(env, nb_steps=500,
            kl_bound=0.01,
            init_ctl_sigma=100.,
            activation={'shift': 450, 'mult': 0.5})

# run gps
trace = alg.run(nb_iter=10, verbose=True)

# plot dists
alg.plot()

# plot objective
import matplotlib.pyplot as plt

plt.figure()
plt.plot(trace)
plt.show()

# sample and plot one trajectory
data = alg.sample(nb_episodes=1, stoch=True)

plt.figure()

plt.subplot(5, 1, 1)
plt.plot(data['x'][0, :], '-b')
Example #2
0
# pendulum env
env = gym.make('Pendulum-TO-v0')
env._max_episode_steps = 100
env.unwrapped.dt = 0.05

env.seed(1337)

solver = MBGPS(env,
               nb_steps=100,
               init_state=env.init(),
               init_action_sigma=5.0,
               kl_bound=1e1,
               slew_rate=False,
               action_penalty=1,
               activation={
                   'mult': 1.,
                   'shift': 80
               })

trace = solver.run(nb_iter=25, verbose=True)

# plot dists
solver.plot()

# plot objective

plt.figure()
plt.plot(trace)
plt.show()