# @Filename: mf_pendulum.py # @Date: 2019-06-16-18-38 # @Author: Hany Abdulsamad # @Contact: [email protected] import gym from trajopt.gps import MFGPS # pendulum env env = gym.make('Pendulum-TO-v0') env._max_episode_steps = 150 alg = MFGPS(env, nb_steps=150, kl_bound=25., init_ctl_sigma=1.0, activation=range(100, 150)) # run gps trace = alg.run(nb_episodes=10) # plot dists alg.plot() # execute and plot nb_episodes = 25 data = alg.sample(nb_episodes, stoch=False) import matplotlib.pyplot as plt plt.figure()
import gym from trajopt.gps import MFGPS # lqr task env = gym.make('LQR-TO-v0') env._max_episode_steps = 100 alg = MFGPS(env, nb_steps=100, kl_bound=10., init_ctl_sigma=50.) # run gps trace = alg.run(nb_episodes=10, nb_iter=5) # plot dists alg.plot() # execute and plot nb_episodes = 25 data = alg.sample(nb_episodes, stoch=False) import matplotlib.pyplot as plt plt.figure() for k in range(alg.dm_state): plt.subplot(alg.dm_state + alg.dm_act, 1, k + 1) plt.plot(data['x'][k, ...]) for k in range(alg.dm_act): plt.subplot(alg.dm_state + alg.dm_act, 1, alg.dm_state + k + 1)
# pendulum task env = gym.make('DoublePendulum-TO-v0') env._max_episode_steps = 100 env.unwrapped.dt = 0.05 env.seed(1337) # prior = {'K': 1e-3, 'psi': 1e-8, 'nu': 0.1} prior = {'K': 1e-6} solver = MFGPS(env, nb_steps=100, init_state=env.init(), init_action_sigma=10., kl_bound=1e1, slew_rate=False, action_penalty=1, prior=prior, activation={ 'mult': 1., 'shift': 80 }) # run gps trace = solver.run(nb_learning_episodes=25, nb_evaluation_episodes=25, nb_iter=50, verbose=True) # execute and plot data = solver.rollout(25, stoch=True)
import warnings warnings.filterwarnings("ignore") np.random.seed(1337) # lqr task env = gym.make('LQR-TO-v1') env._max_episode_steps = 100000 env.seed(1337) prior = {'K': 1e-6, 'psi': 1e6, 'nu': 0.1} alg = MFGPS(env, nb_steps=60, init_state=env.init(), init_action_sigma=100., kl_bound=1e0, prior=prior) # run gps trace = alg.run(nb_learning_episodes=25, nb_evaluation_episodes=25, nb_iter=25, verbose=True) # plot dists alg.plot_distributions() # execute and plot data = alg.rollout(25, stoch=True) plt.figure()
import gym from trajopt.gps import MFGPS # pendulum env env = gym.make('Pendulum-TO-v0') env._max_episode_steps = 100 env.unwrapped._dt = 0.05 alg = MFGPS(env, nb_steps=100, kl_bound=1., init_ctl_sigma=2.5, activation={ 'shift': 90, 'mult': 2. }) # run gps trace = alg.run(nb_episodes=10) # plot dists alg.plot() # execute and plot nb_episodes = 25 data = alg.sample(nb_episodes, stoch=False) import matplotlib.pyplot as plt plt.figure() for k in range(alg.dm_state):
prior = {'K': 1e-6} # mass-damper rgps = MFRGPS(env, nb_steps=100, policy_kl_bound=5., param_kl_bound=1e1, init_state=env.init(), init_action_sigma=100., prior=prior) np.random.seed(1337) env.seed(1337) rgps.run(nb_iter=15, nb_learning_episodes=10, nb_evaluation_episodes=100, verbose=True) gps = MFGPS(env, nb_steps=100, kl_bound=5., init_state=env.init(), init_action_sigma=100., dyn_prior=prior) np.random.seed(1337) env.seed(1337) gps.run(nb_iter=15, nb_learning_episodes=10, nb_evaluation_episodes=100, verbose=True) gps.plot()
init_action_sigma=25., action_penalty=1e-1, activation={'mult': 1., 'shift': 80}, prior=prior) rgps_trace = rgps.run(nb_learning_episodes=10, nb_evaluation_episodes=100, nb_iter=50, verbose=True) np.random.seed(1337) env.seed(1337) gps = MFGPS(env, nb_steps=100, kl_bound=1e1, init_state=env.init(), init_action_sigma=25., action_penalty=1e-1, activation={'mult': 1., 'shift': 80}, dyn_prior=prior) # run gps gps_trace = gps.run(nb_learning_episodes=10, nb_evaluation_episodes=100, nb_iter=50, verbose=True) np.random.seed(1337) env.seed(1337) rgps_data = rgps.rollout(100, stoch=False) print("Cost of Rbst. Ctl.:", np.mean(np.sum(rgps_data['c'], axis=0))) np.random.seed(1337)