# pendulum env env = gym.make('QQube-TO-v1') env._max_episode_steps = 500 alg = MBGPS(env, nb_steps=500, kl_bound=0.01, init_ctl_sigma=100., activation={'shift': 450, 'mult': 0.5}) # run gps trace = alg.run(nb_iter=10, verbose=True) # plot dists alg.plot() # plot objective import matplotlib.pyplot as plt plt.figure() plt.plot(trace) plt.show() # sample and plot one trajectory data = alg.sample(nb_episodes=1, stoch=True) plt.figure() plt.subplot(5, 1, 1) plt.plot(data['x'][0, :], '-b')
# pendulum env env = gym.make('Pendulum-TO-v0') env._max_episode_steps = 100 env.unwrapped.dt = 0.05 env.seed(1337) solver = MBGPS(env, nb_steps=100, init_state=env.init(), init_action_sigma=5.0, kl_bound=1e1, slew_rate=False, action_penalty=1, activation={ 'mult': 1., 'shift': 80 }) trace = solver.run(nb_iter=25, verbose=True) # plot dists solver.plot() # plot objective plt.figure() plt.plot(trace) plt.show()