def __init__(self, environment, horizon, dt, path, init_iterations=500, step_iterations=1, fcost=None, constrained=True, fastForward=False, tolGrad = 1e-4, tolFun = 1e-7, save_interval=10, printing=True, saving=True, init_optim = True, finite_diff=True, noise_gain=0.05, add_noise=False, ou_theta=0.15, ou_sigma=0.2): super(MPCAgent, self).__init__(environment.uDim) self.traj_optimizer = iLQR(environment, horizon, dt, path=path, fcost=fcost, constrained=constrained, fastForward=fastForward, save_interval=save_interval, tolGrad = tolGrad, tolFun = tolFun, printing=printing, finite_diff=finite_diff, file_prefix='ilqr_', init=False, saving=saving, reset_mu=True) self.uMax = self.traj_optimizer.environment.uMax self.init_iterations = init_iterations self.step_iterations = step_iterations self.x0 = environment.x if init_optim==True: self.init_optim(environment.x) self.noise_gain = noise_gain self.add_noise = add_noise self.action_noise = OUnoise(self.uDim, dt, theta=ou_theta, sigma=ou_sigma)
def c_N(x): x1, x2, x3, x4, x5, x6 = x c = 100 * x1**2 + 100 * x2**2 + 100 * x3**2 + 10 * x4**2 + 10 * x5**2 + 10 * x6**2 return c # initial state value x0 = [0, np.pi, np.pi, 0, 0, 0] t = 3. # simulation time dt = 0.005 # time step-size env = CartPoleDoubleSerial(c_k, x0, dt) path = '../../../results/ilqr/cart_pole_double_serial2/' algorithm = iLQR(env, t, dt, constrained=True, fcost=c_N, path=path, maxIters=1000) # instance of the iLQR algorithm #algorithm.run_disk(x0) algorithm.run_optim() # run trajectory optimization # plot trajectories algorithm.plot() plt.show() # create an animation algorithm.animation()
c = 0.5 * (2 * x1**2 + 10. * x2**2 + 10. * x3**2 + 0.5 * x4**2 + .1 * u1**2) return c # define the final cost at step N def c_N(x): x1, x2, x3, x4, x5, x6 = x c = 0.5 * (100. * x1**2 + 100. * x2**2 + 100. * x3**2) return c # initial state value x0 = [0, np.pi, np.pi, 0, 0, 0] t = 8 # simulation time dt = 0.01 # time step-size env = CartPoleDoubleParallel(c_k, x0, dt) path = '../../../results/ilqr/cart_pole_double_parallel/' # path, where results are saved algorithm = iLQR(env, t, dt, constrained=True, fcost=c_N, path=path) algorithm.run_optim() # run trajectory optimization # plot trajectories algorithm.plot() plt.show() # create an animation algorithm.animation()
x4 - mod.pi)**2 + 10 * x5**2 + 10 * x6**2 + 10 * x7**2 + 10 * x8**2 return c # initial state value x0 = [0, np.pi, np.pi, np.pi, 0, 0, 0, 0] t = 2.0 # simulation time dt = 0.002 # time step-size env = CartPoleTriple(c_k, x0, dt) env.uMax = 20 path = '~/tripilqr/' # path, where results are saved algorithm = iLQR(env, t, dt, constrained=True, fcost=c_N, path=path, maxIters=10, fastForward=False) # instance of the iLQR algorithm# #algorithm.run_disk(x0) algorithm.run_optim() # run trajectory optimization # plot trajectories algorithm.plot() # create an animation #system algorithm.animation()
# define the final cost at step N def c_N(x): x1, x2, x3, x4 = x c = 100 * x1**2 + 100 * x2**2 + 10 * x3**2 + 10 * x4**2 return c # initial state value x0 = [0, np.pi, 0, 0] t = 5 # simulation time dt = 0.02 # time step-size env = CartPole(c_k, x0, dt) env.uMax = 4 * env.uMax path = './results/ilqr/cart_pole5/' # path, where results are saved algorithm = iLQR(env, t, dt, path=path, fcost=None, constrained=True, maxIters=500) # instance of the iLQR algorithm #algorithm.run_disk(x0) algorithm.run_optim() # run trajectory optimization # plot trajectories algorithm.plot() plt.show() # create an animation algorithm.animation()
x1, x2 = x c = 100 * (1 - np.cos(x1)) + 0.1 * x2**2 return c # initial state value x0 = [np.pi, 0] t = 5 # simulation time dt = 0.05 # time step-size env = Pendulum(c_k, x0, dt) path = '../../../results/pendulum/ilqr/' # path, where results are saved algorithm = iLQR(env, t, dt, fcost=c_N, constrained=True, path=path, finite_diff=True) # instance of the iLQR algorithm algorithm.run_optim() # run trajectory optimization # plot trajectories algorithm.plot() plt.show() # create an animation algorithm.animation()