Ejemplo n.º 1
0
# @Filename: mf_pendulum.py
# @Date: 2019-06-16-18-38
# @Author: Hany Abdulsamad
# @Contact: [email protected]


import gym
from trajopt.gps import MFGPS


# pendulum env
env = gym.make('Pendulum-TO-v0')
env._max_episode_steps = 150

alg = MFGPS(env, nb_steps=150,
            kl_bound=25.,
            init_ctl_sigma=1.0,
            activation=range(100, 150))

# run gps
trace = alg.run(nb_episodes=10)

# plot dists
alg.plot()

# execute and plot
nb_episodes = 25
data = alg.sample(nb_episodes, stoch=False)

import matplotlib.pyplot as plt

plt.figure()
Ejemplo n.º 2
0
import gym
from trajopt.gps import MFGPS


# lqr task
env = gym.make('LQR-TO-v0')
env._max_episode_steps = 100

alg = MFGPS(env, nb_steps=100,
            kl_bound=10.,
            init_ctl_sigma=50.)

# run gps
trace = alg.run(nb_episodes=10, nb_iter=5)

# plot dists
alg.plot()

# execute and plot
nb_episodes = 25
data = alg.sample(nb_episodes, stoch=False)

import matplotlib.pyplot as plt

plt.figure()
for k in range(alg.dm_state):
    plt.subplot(alg.dm_state + alg.dm_act, 1, k + 1)
    plt.plot(data['x'][k, ...])

for k in range(alg.dm_act):
    plt.subplot(alg.dm_state + alg.dm_act, 1, alg.dm_state + k + 1)
Ejemplo n.º 3
0
# pendulum task
env = gym.make('DoublePendulum-TO-v0')
env._max_episode_steps = 100
env.unwrapped.dt = 0.05

env.seed(1337)

# prior = {'K': 1e-3, 'psi': 1e-8, 'nu': 0.1}
prior = {'K': 1e-6}

solver = MFGPS(env,
               nb_steps=100,
               init_state=env.init(),
               init_action_sigma=10.,
               kl_bound=1e1,
               slew_rate=False,
               action_penalty=1,
               prior=prior,
               activation={
                   'mult': 1.,
                   'shift': 80
               })

# run gps
trace = solver.run(nb_learning_episodes=25,
                   nb_evaluation_episodes=25,
                   nb_iter=50,
                   verbose=True)

# execute and plot
data = solver.rollout(25, stoch=True)
Ejemplo n.º 4
0
import warnings
warnings.filterwarnings("ignore")

np.random.seed(1337)

# lqr task
env = gym.make('LQR-TO-v1')
env._max_episode_steps = 100000

env.seed(1337)

prior = {'K': 1e-6, 'psi': 1e6, 'nu': 0.1}

alg = MFGPS(env, nb_steps=60,
            init_state=env.init(),
            init_action_sigma=100.,
            kl_bound=1e0,
            prior=prior)

# run gps
trace = alg.run(nb_learning_episodes=25,
                nb_evaluation_episodes=25,
                nb_iter=25, verbose=True)

# plot dists
alg.plot_distributions()

# execute and plot
data = alg.rollout(25, stoch=True)

plt.figure()
Ejemplo n.º 5
0
import gym
from trajopt.gps import MFGPS

# pendulum env
env = gym.make('Pendulum-TO-v0')
env._max_episode_steps = 100
env.unwrapped._dt = 0.05

alg = MFGPS(env,
            nb_steps=100,
            kl_bound=1.,
            init_ctl_sigma=2.5,
            activation={
                'shift': 90,
                'mult': 2.
            })

# run gps
trace = alg.run(nb_episodes=10)

# plot dists
alg.plot()

# execute and plot
nb_episodes = 25
data = alg.sample(nb_episodes, stoch=False)

import matplotlib.pyplot as plt

plt.figure()
for k in range(alg.dm_state):
Ejemplo n.º 6
0
prior = {'K': 1e-6}

# mass-damper
rgps = MFRGPS(env, nb_steps=100,
              policy_kl_bound=5.,
              param_kl_bound=1e1,
              init_state=env.init(),
              init_action_sigma=100.,
              prior=prior)

np.random.seed(1337)
env.seed(1337)

rgps.run(nb_iter=15, nb_learning_episodes=10,
         nb_evaluation_episodes=100, verbose=True)

gps = MFGPS(env, nb_steps=100,
            kl_bound=5.,
            init_state=env.init(),
            init_action_sigma=100.,
            dyn_prior=prior)

np.random.seed(1337)
env.seed(1337)

gps.run(nb_iter=15, nb_learning_episodes=10,
        nb_evaluation_episodes=100, verbose=True)

gps.plot()
Ejemplo n.º 7
0
              init_action_sigma=25.,
              action_penalty=1e-1,
              activation={'mult': 1., 'shift': 80},
              prior=prior)

rgps_trace = rgps.run(nb_learning_episodes=10,
                      nb_evaluation_episodes=100,
                      nb_iter=50, verbose=True)

np.random.seed(1337)
env.seed(1337)

gps = MFGPS(env, nb_steps=100,
            kl_bound=1e1,
            init_state=env.init(),
            init_action_sigma=25.,
            action_penalty=1e-1,
            activation={'mult': 1., 'shift': 80},
            dyn_prior=prior)

# run gps
gps_trace = gps.run(nb_learning_episodes=10,
                    nb_evaluation_episodes=100,
                    nb_iter=50, verbose=True)

np.random.seed(1337)
env.seed(1337)
rgps_data = rgps.rollout(100, stoch=False)
print("Cost of Rbst. Ctl.:", np.mean(np.sum(rgps_data['c'], axis=0)))

np.random.seed(1337)