Beispiel #1
0
def create_job(kwargs):
    import warnings
    warnings.filterwarnings("ignore")

    # pendulum env
    env = gym.make('Pendulum-TO-v1')
    env._max_episode_steps = 100000
    env.unwrapped.dt = 0.05

    dm_state = env.observation_space.shape[0]
    dm_act = env.action_space.shape[0]

    horizon, nb_steps = 25, 100
    state = np.zeros((dm_state, nb_steps + 1))
    action = np.zeros((dm_act, nb_steps))

    state[:, 0] = env.reset()
    for t in range(nb_steps):
        solver = iLQR(env, init_state=state[:, t],
                      nb_steps=horizon, action_penalty=np.array([1e-5]))
        solver.run(nb_iter=10, verbose=False)

        action[:, t] = solver.uref[:, 0]
        state[:, t + 1], _, _, _ = env.step(action[:, t])

    return state[:, :-1].T, action.T
Beispiel #2
0
warnings.filterwarnings("ignore")

# lqr task
env = gym.make('LQR-TO-v0')
env._max_episode_steps = 100000

dm_state = env.observation_space.shape[0]
dm_act = env.action_space.shape[0]

horizon, nb_steps = 25, 100
state = np.zeros((dm_state, nb_steps + 1))
action = np.zeros((dm_act, nb_steps))

state[:, 0] = env.reset()
for t in range(nb_steps):
    solver = iLQR(env, init_state=state[:, t], nb_steps=horizon)
    trace = solver.run(nb_iter=5, verbose=False)

    action[:, t] = solver.uref[:, 0]
    state[:, t + 1], _, _, _ = env.step(action[:, t])

    print('Time Step:', t, 'Cost:', trace[-1])

import matplotlib.pyplot as plt

plt.figure()

plt.subplot(3, 1, 1)
plt.plot(state[0, :], '-b')
plt.subplot(3, 1, 2)
plt.plot(state[1, :], '-b')
Beispiel #3
0
env._max_episode_steps = 100000
env.unwrapped._dt = 0.05

dm_state = env.observation_space.shape[0]
dm_act = env.action_space.shape[0]

horizon, nb_steps = 10, 150
state = np.zeros((dm_state, nb_steps + 1))
action = np.zeros((dm_act, nb_steps))
init_action = np.zeros((dm_act, horizon))

nb_iter = 5

state[:, 0] = env.reset()
for t in range(nb_steps):
    solver = iLQR(env, init_state=state[:, t],
                  init_action=None, nb_steps=horizon)
    trace = solver.run(nb_iter=nb_iter, verbose=False)

    _nominal_state = solver.xref
    _nominal_action = solver.uref

    action[:, t] = _nominal_action[:, 0]
    state[:, t + 1], _, _, _ = env.step(action[:, t])

    init_action = np.hstack((_nominal_action[:, 1:], np.zeros((dm_act, 1))))
    print('Time Step:', t, 'Cost:', trace[-1])

import matplotlib.pyplot as plt

plt.figure()
Beispiel #4
0
# pendulum env
env = gym.make('Pendulum-TO-v0')
env._max_episode_steps = 100000
env.unwrapped.dt = 0.05

dm_state = env.observation_space.shape[0]
dm_act = env.action_space.shape[0]

horizon, nb_steps = 25, 100
state = np.zeros((dm_state, nb_steps + 1))
action = np.zeros((dm_act, nb_steps))

state[:, 0] = env.reset()
for t in range(nb_steps):
    solver = iLQR(env, init_state=state[:, t],
                  nb_steps=horizon, action_penalty=np.array([1e-5]))
    trace = solver.run(nb_iter=10, verbose=False)

    action[:, t] = solver.uref[:, 0]
    state[:, t + 1], _, _, _ = env.step(action[:, t])

    print('Time Step:', t, 'Cost:', trace[-1])

import matplotlib.pyplot as plt

plt.figure()

plt.subplot(3, 1, 1)
plt.plot(state[0, :], '-b')
plt.subplot(3, 1, 2)
plt.plot(state[1, :], '-b')
Beispiel #5
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Filename: pendulum.py
# @Date: 2019-06-16-18-38
# @Author: Hany Abdulsamad
# @Contact: [email protected]


import gym
from trajopt.ilqr import iLQR

# pendulum env
env = gym.make('Pendulum-TO-v0')
env._max_episode_steps = 150

alg = iLQR(env, nb_steps=150,
           activation=range(100, 150))

# run iLQR
trace = alg.run(nb_iter=25)

# plot forward pass
alg.plot()

# plot objective
import matplotlib.pyplot as plt

plt.figure()
plt.plot(trace)
plt.show()
Beispiel #6
0
import gym
from trajopt.ilqr import iLQR

# cartpole env
env = gym.make('Quanser-Cartpole-TO-v0')
env._max_episode_steps = 500

alg = iLQR(env, nb_steps=500, activation={'shift': 450, 'mult': 2.})

# run iLQR
trace = alg.run()

# plot forward pass
alg.plot()

# plot objective
import matplotlib.pyplot as plt

plt.figure()
plt.plot(trace)
plt.show()
Beispiel #7
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Filename: cartpole.py
# @Date: 2019-06-16-18-38
# @Author: Hany Abdulsamad
# @Contact: [email protected]

import gym
from trajopt.ilqr import iLQR

# cartpole env
env = gym.make('Quanser-Qube-TO-v0')
env._max_episode_steps = 200

alg = iLQR(env, nb_steps=200, activation=range(150, 200))

# run iLQR
trace = alg.run()

# plot forward pass
alg.plot()

# plot objective
import matplotlib.pyplot as plt

plt.figure()
plt.plot(trace)
plt.show()
Beispiel #8
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Filename: cartpole.py
# @Date: 2019-06-16-18-38
# @Author: Hany Abdulsamad
# @Contact: [email protected]


import gym
from trajopt.ilqr import iLQR

# cartpole env
env = gym.make('Cartpole-TO-v0')
env._max_episode_steps = 700

alg = iLQR(env, nb_steps=700,
           activation=range(600, 700))

# run iLQR
trace = alg.run()

# plot forward pass
alg.plot()

# plot objective
import matplotlib.pyplot as plt

plt.figure()
plt.plot(trace)
plt.show()
Beispiel #9
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Filename: cartpole.py
# @Date: 2019-06-16-18-38
# @Author: Hany Abdulsamad
# @Contact: [email protected]

import gym
from trajopt.ilqr import iLQR

# cartpole env
env = gym.make('Quanser-Cartpole-TO-v0')
env._max_episode_steps = 250

alg = iLQR(env, nb_steps=250, activation=range(200, 250))

# run iLQR
trace = alg.run()

# plot forward pass
alg.plot()

# plot objective
import matplotlib.pyplot as plt

plt.figure()
plt.plot(trace)
plt.show()
Beispiel #10
0
import gym
from trajopt.ilqr import iLQR

import warnings
warnings.filterwarnings("ignore")

# pendulum env
env = gym.make('QQube-TO-v1')
env._max_episode_steps = 500

alg = iLQR(env, nb_steps=500, activation={'shift': 250, 'mult': 0.01})

# run gps
trace = alg.run(nb_iter=5, verbose=True)

# plot dists
alg.plot()

# plot objective
import matplotlib.pyplot as plt

plt.figure()
plt.plot(trace)
plt.show()

state, action, _ = alg.forward_pass(ctl=alg.ctl, alpha=1.)

plt.figure()

plt.subplot(5, 1, 1)
plt.plot(state[0, :], '-b')
Beispiel #11
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Filename: lqr.py
# @Date: 2019-06-16-18-38
# @Author: Hany Abdulsamad
# @Contact: [email protected]


import gym
from trajopt.ilqr import iLQR

# lqr task
env = gym.make('LQR-TO-v0')
env._max_episode_steps = 100

alg = iLQR(env, nb_steps=60,
           activation=range(60))

# run iLQR
trace = alg.run()

# plot forward pass
alg.plot()

# plot objective
import matplotlib.pyplot as plt

plt.figure()
plt.plot(trace)
plt.show()