B[0, 1] = 1; #Q = make_symmetric_random_psd(state_dim) #R = make_symmetric_random_psd(control_dim) Q = np.identity(state_dim) R = np.identity(control_dim) dyn_func = linear_dynamics(A, B); cost_func = quadtratic_cost(Q, R); T = 4 x0 = np.asarray((3, 2, 1)) #x0 = np.random.random(state_dim) lqr = LQR(A, B, Q, R, T) lqr.solve() lqr_states, lqr_controls, lqr_costs = lqr.forward_pass(x0) #Us2 = np.random.random((T, control_dim)) Us2 = np.ones((T, control_dim)) #Us2 = np.asarray(lqr_controls) xt = x0.copy(); Xs2 = xt; for t in range(0, T-1): xt1 = dyn_func(xt, Us2[t]) Xs2 = np.vstack((Xs2, xt1)) xt = xt1 #Xs = lqr_states; Us = lqr_controls; Xs = Xs2; Us = Us2; original_ilqr_cost = np.sum([cost_func(x, u) for (x,u) in zip(Xs, Us)])
def integrate(self, use_jac=False, linearize=True, interpolate=True, **kwargs): keys = kwargs.keys() xinit = kwargs['xinit'] if 'xinit' in keys else self.xinit lin = linearize interp = interpolate if self.ufun is not None: func = lambda t, x: self.f(t, x, self.ufun(t, x)) dfdx = lambda t, x: self.dfdx(t, x, self.ufun(t, x)) dfdu = lambda t, x: self.dfdu(t, x, self.ufun(t, x)) else: func = self.f dfdx = self.dfdx dfdu = self.dfdu jac = dfdx if use_jac else None #Tracer()() if self.delf is not None: delfunc = lambda t, x: self.delf(t, x, self.ufun(t, x)) (t, x, jumps) = sysIntegrate(func, self.xinit, tlims=self.tlims, phi=self.phi, jac=jac, delfunc=delfunc) else: (t, x, jumps) = sysIntegrate(func, self.xinit, tlims=self.tlims, phi=self.phi, jac=jac) #Tracer()() components = ['x'] if 'ufun' in self.__dict__.keys(): components.append('u') if lin: components.append('A') components.append('B') traj = Trajectory(*components) for (tt, xx) in zip(t, x): names = {'x': xx} if 'u' in components: names['u'] = self.ufun(tt, xx) if 'A' in components: names['A'] = dfdx(tt, xx) if 'B' in components: names['B'] = dfdu(tt, xx) traj.addpoint(tt, **names) # interpolate, unless requested; # saves a few manual calls if interp: traj.interpolate() if lin: print("linearizing...") self.lintraj = traj self.regulator = LQR(self.tlims, traj.A, traj.B) #, jumps=jumps) self.regulator.solve() traj.feasible = True traj.tlims = self.tlims traj.jumps = jumps return traj
# -------- Settings ------------ NUM_OF_TIMESTAMP = 50 # -------- Main Code ---------- pos_setpoint = 5 vel_setpoint = 0 Q = np.array([[1, 0], [0, 1]]) # controls how much state difference cost R = 0.01 # controls how much control cost. state_setpoint = np.array([[pos_setpoint], [vel_setpoint]]) initial_covariance = np.eye(2) my_controller = LQR(env.A, env.B, Q, R) gain = my_controller.get_K(10) # note that the minus sign is omitted here because of the way we calculate error. print('gain: {}'.format(gain)) true_state = [env.state] accel_cmd = [0] for i in range(NUM_OF_TIMESTAMP): accel = np.dot(gain, (state_setpoint - env.state))[0, 0] meas = env.control_and_sense(accel) print("True state: {}".format(env.state)) print("accel : {}".format(accel)) true_state.append(env.state) accel_cmd.append(accel)
# State vector = [x, y, vx, vy] x_init = np.array([10, 30, 10, -5.0], dtype='float64').transpose() A = np.eye(4) A[2, 2] = (1 - dt * friction) / mass A[3, 3] = (1 - dt * friction) / mass A[0, 2] = dt A[1, 3] = dt B = np.array([[0, 0], [0, 0], [dt / mass, 0], [0, dt / mass]], dtype='float64') Q = 0.01 * np.eye(4) R = np.eye(2) lqr = LQR(A, B, Q, R) K = lqr.compute_policy_gains(T, dt) x = x_init X = np.zeros((T, 4), dtype='float64') U = np.zeros((T, 2), dtype='float64') for i in range(T): u = np.dot(K[i], x) # This is essentially the simulator of the vehicle x = np.dot(A, x) + np.dot(B, u) X[i, :] = x.transpose() U[i, :] = u.transpose()