def __init__(self, sys, initial_state, T, Q = None, R = None, x_f = None, u_f = None): self.INITIAL_STATE = self.x = initial_state self.sys = sys sys.add_robot(self) if Q is None: Q = np.zeros((sys.xdims, sys.xdims)) self.Q = Q if R is None: R = np.zeros((sys.udims, sys.udims)) self.R = R if x_f is None: x_f = np.zeros((sys.xdims, 1)) self.x_f = x_f if u_f is None: u_f = np.zeros((sys.udims, 1)) self.u_f = u_f self.lqr = LQR(sys, self, sys.T)
B[0, 1] = 1; #Q = make_symmetric_random_psd(state_dim) #R = make_symmetric_random_psd(control_dim) Q = np.identity(state_dim) R = np.identity(control_dim) dyn_func = linear_dynamics(A, B); cost_func = quadtratic_cost(Q, R); T = 4 x0 = np.asarray((3, 2, 1)) #x0 = np.random.random(state_dim) lqr = LQR(A, B, Q, R, T) lqr.solve() lqr_states, lqr_controls, lqr_costs = lqr.forward_pass(x0) #Us2 = np.random.random((T, control_dim)) Us2 = np.ones((T, control_dim)) #Us2 = np.asarray(lqr_controls) xt = x0.copy(); Xs2 = xt; for t in range(0, T-1): xt1 = dyn_func(xt, Us2[t]) Xs2 = np.vstack((Xs2, xt1)) xt = xt1 #Xs = lqr_states; Us = lqr_controls; Xs = Xs2; Us = Us2; original_ilqr_cost = np.sum([cost_func(x, u) for (x,u) in zip(Xs, Us)])
# -------- Settings ------------ NUM_OF_TIMESTAMP = 50 # -------- Main Code ---------- pos_setpoint = 5 vel_setpoint = 0 Q = np.array([[1, 0], [0, 1]]) # controls how much state difference cost R = 0.01 # controls how much control cost. state_setpoint = np.array([[pos_setpoint], [vel_setpoint]]) initial_covariance = np.eye(2) my_controller = LQR(env.A, env.B, Q, R) gain = my_controller.get_K(10) # note that the minus sign is omitted here because of the way we calculate error. print('gain: {}'.format(gain)) true_state = [env.state] accel_cmd = [0] for i in range(NUM_OF_TIMESTAMP): accel = np.dot(gain, (state_setpoint - env.state))[0, 0] meas = env.control_and_sense(accel) print("True state: {}".format(env.state)) print("accel : {}".format(accel)) true_state.append(env.state) accel_cmd.append(accel)
def integrate(self, use_jac=False, linearize=True, interpolate=True, **kwargs): keys = kwargs.keys() xinit = kwargs['xinit'] if 'xinit' in keys else self.xinit lin = linearize interp = interpolate if self.ufun is not None: func = lambda t, x: self.f(t, x, self.ufun(t, x)) dfdx = lambda t, x: self.dfdx(t, x, self.ufun(t, x)) dfdu = lambda t, x: self.dfdu(t, x, self.ufun(t, x)) else: func = self.f dfdx = self.dfdx dfdu = self.dfdu jac = dfdx if use_jac else None #Tracer()() if self.delf is not None: delfunc = lambda t, x: self.delf(t, x, self.ufun(t, x)) (t, x, jumps) = sysIntegrate(func, self.xinit, tlims=self.tlims, phi=self.phi, jac=jac, delfunc=delfunc) else: (t, x, jumps) = sysIntegrate(func, self.xinit, tlims=self.tlims, phi=self.phi, jac=jac) #Tracer()() components = ['x'] if 'ufun' in self.__dict__.keys(): components.append('u') if lin: components.append('A') components.append('B') traj = Trajectory(*components) for (tt, xx) in zip(t, x): names = {'x': xx} if 'u' in components: names['u'] = self.ufun(tt, xx) if 'A' in components: names['A'] = dfdx(tt, xx) if 'B' in components: names['B'] = dfdu(tt, xx) traj.addpoint(tt, **names) # interpolate, unless requested; # saves a few manual calls if interp: traj.interpolate() if lin: print("linearizing...") self.lintraj = traj self.regulator = LQR(self.tlims, traj.A, traj.B) self.regulator.solve() traj.feasible = True traj.tlims = self.tlims traj.jumps = jumps return traj
class System(object): """ a collection of callables, mostly """ def __init__(self, func, tlims=(0, 10), **kwargs): # TODO add an option to pass a symSystem directly # skipping a bunch of middleman bullshit that might # otherwise have to happen self.tlims = tlims keys = kwargs.keys() if 'xinit' in keys: self.xinit = kwargs['xinit'] self.dim = len(self.xinit) elif 'dim' in keys: self.dim = kwargs['dim'] else: self.dim = 1 self.f = func self.ufun = kwargs['controller'] if 'controller' in keys else None self.dfdx = kwargs['dfdx'] if 'dfdx' in keys else None self.dfdu = kwargs['dfdu'] if 'dfdu' in keys else None self.phi = kwargs['phi'] if 'phi' in keys else None self.delf = kwargs['delf'] if 'delf' in kwargs else None if self.ufun is None: self.dimu = 0 else: self.dimu = len(self.dimu(tlims[0])) # to be called after a reference has been set def build_cost(self, **kwargs): self.cost = CostFunction(self.dim, self.dimu, self.ref, projector=self.project, **kwargs) return self.cost def set_u(self, controller): if 'ufun' in self.__dict__.keys(): self._uold = self.ufun self.ufun = controller def reset_u(self): if '_uold' in self.__dict__.keys(): self.u = self._uold else: print("Nothing to reset to. Not doing anything.") # TODO make sure this works in all combinations of linearization # or not, and controlled or not; # Major cleanup needed. def integrate(self, use_jac=False, linearize=True, interpolate=True, **kwargs): keys = kwargs.keys() xinit = kwargs['xinit'] if 'xinit' in keys else self.xinit lin = linearize interp = interpolate if self.ufun is not None: func = lambda t, x: self.f(t, x, self.ufun(t, x)) dfdx = lambda t, x: self.dfdx(t, x, self.ufun(t, x)) dfdu = lambda t, x: self.dfdu(t, x, self.ufun(t, x)) else: func = self.f dfdx = self.dfdx dfdu = self.dfdu jac = dfdx if use_jac else None #Tracer()() if self.delf is not None: delfunc = lambda t, x: self.delf(t, x, self.ufun(t, x)) (t, x, jumps) = sysIntegrate(func, self.xinit, tlims=self.tlims, phi=self.phi, jac=jac, delfunc=delfunc) else: (t, x, jumps) = sysIntegrate(func, self.xinit, tlims=self.tlims, phi=self.phi, jac=jac) #Tracer()() components = ['x'] if 'ufun' in self.__dict__.keys(): components.append('u') if lin: components.append('A') components.append('B') traj = Trajectory(*components) for (tt, xx) in zip(t, x): names = {'x': xx} if 'u' in components: names['u'] = self.ufun(tt, xx) if 'A' in components: names['A'] = dfdx(tt, xx) if 'B' in components: names['B'] = dfdu(tt, xx) traj.addpoint(tt, **names) # interpolate, unless requested; # saves a few manual calls if interp: traj.interpolate() if lin: print("linearizing...") self.lintraj = traj self.regulator = LQR(self.tlims, traj.A, traj.B) self.regulator.solve() traj.feasible = True traj.tlims = self.tlims traj.jumps = jumps return traj @timeout(30000) def project(self, traj, tlims=None, lin=False): if traj.feasible: return traj if tlims is None: tlims = self.tlims self.xinit = traj.x(tlims[0]) if 'regulator' in self.__dict__: # print("regular projection") ltj = self.lintraj reg = self.regulator control = Controller(reference=traj, K=reg.K) self.set_u(control) # print(lin) return self.integrate(linearize=lin) else: print("integrating and linearizing for the first time") control = Controller(reference=traj) self.set_u(control) nutraj = self.integrate(linearize=True) return self.project(nutraj, tlims=tlims, lin=lin)
def integrate(self, use_jac=False, linearize=True, interpolate=True, **kwargs): keys = kwargs.keys() xinit = kwargs['xinit'] if 'xinit' in keys else self.xinit lin = linearize interp = interpolate if self.ufun is not None: func = lambda t, x: self.f(t, x, self.ufun(t, x)) dfdx = lambda t, x: self.dfdx(t, x, self.ufun(t, x)) dfdu = lambda t, x: self.dfdu(t, x, self.ufun(t, x)) else: func = self.f dfdx = self.dfdx dfdu = self.dfdu jac = dfdx if use_jac else None #Tracer()() if self.delf is not None: delfunc = lambda t, x: self.delf(t, x, self.ufun(t, x)) (t, x, jumps) = sysIntegrate(func, self.xinit, tlims=self.tlims, phi=self.phi, jac=jac, delfunc=delfunc) else: (t, x, jumps) = sysIntegrate(func, self.xinit, tlims=self.tlims, phi=self.phi, jac=jac) #Tracer()() components = ['x'] if 'ufun' in self.__dict__.keys(): components.append('u') if lin: components.append('A') components.append('B') traj = Trajectory(*components) for (tt, xx) in zip(t, x): names = {'x': xx} if 'u' in components: names['u'] = self.ufun(tt, xx) if 'A' in components: names['A'] = dfdx(tt, xx) if 'B' in components: names['B'] = dfdu(tt, xx) traj.addpoint(tt, **names) # interpolate, unless requested; # saves a few manual calls if interp: traj.interpolate() if lin: print("linearizing...") self.lintraj = traj self.regulator = LQR(self.tlims, traj.A, traj.B) #, jumps=jumps) self.regulator.solve() traj.feasible = True traj.tlims = self.tlims traj.jumps = jumps return traj
class System(object): """ a collection of callables, mostly """ def __init__(self, func, tlims=(0, 10), **kwargs): # TODO add an option to pass a symSystem directly # skipping a bunch of middleman bullshit that might # otherwise have to happen self.tlims = tlims keys = kwargs.keys() if 'xinit' in keys: self.xinit = kwargs['xinit'] self.dim = len(self.xinit) elif 'dim' in keys: self.dim = kwargs['dim'] else: self.dim = 1 self.f = func self.ufun = kwargs['controller'] if 'controller' in keys else None self.dfdx = kwargs['dfdx'] if 'dfdx' in keys else None self.dfdu = kwargs['dfdu'] if 'dfdu' in keys else None self.phi = kwargs['phi'] if 'phi' in keys else None self.delf = kwargs['delf'] if 'delf' in kwargs else None if self.ufun is None: self.dimu = 0 else: self.dimu = len(self.dimu(tlims[0])) # to be called after a reference has been set def build_cost(self, **kwargs): self.cost = CostFunction(self.dim, self.dimu, self.ref, projector=self.project, **kwargs) return self.cost def set_u(self, controller): if 'ufun' in self.__dict__.keys(): self._uold = self.ufun self.ufun = controller def reset_u(self): if '_uold' in self.__dict__.keys(): self.u = self._uold else: print("Nothing to reset to. Not doing anything.") # TODO make sure this works in all combinations of linearization # or not, and controlled or not; # Major cleanup needed. def integrate(self, use_jac=False, linearize=True, interpolate=True, **kwargs): keys = kwargs.keys() xinit = kwargs['xinit'] if 'xinit' in keys else self.xinit lin = linearize interp = interpolate if self.ufun is not None: func = lambda t, x: self.f(t, x, self.ufun(t, x)) dfdx = lambda t, x: self.dfdx(t, x, self.ufun(t, x)) dfdu = lambda t, x: self.dfdu(t, x, self.ufun(t, x)) else: func = self.f dfdx = self.dfdx dfdu = self.dfdu jac = dfdx if use_jac else None #Tracer()() if self.delf is not None: delfunc = lambda t, x: self.delf(t, x, self.ufun(t, x)) (t, x, jumps) = sysIntegrate(func, self.xinit, tlims=self.tlims, phi=self.phi, jac=jac, delfunc=delfunc) else: (t, x, jumps) = sysIntegrate(func, self.xinit, tlims=self.tlims, phi=self.phi, jac=jac) #Tracer()() components = ['x'] if 'ufun' in self.__dict__.keys(): components.append('u') if lin: components.append('A') components.append('B') traj = Trajectory(*components) for (tt, xx) in zip(t, x): names = {'x': xx} if 'u' in components: names['u'] = self.ufun(tt, xx) if 'A' in components: names['A'] = dfdx(tt, xx) if 'B' in components: names['B'] = dfdu(tt, xx) traj.addpoint(tt, **names) # interpolate, unless requested; # saves a few manual calls if interp: traj.interpolate() if lin: print("linearizing...") self.lintraj = traj self.regulator = LQR(self.tlims, traj.A, traj.B) #, jumps=jumps) self.regulator.solve() traj.feasible = True traj.tlims = self.tlims traj.jumps = jumps return traj @timeout(100) def project(self, traj, tlims=None, lin=False): if traj.feasible: return traj if tlims is None: tlims = self.tlims self.xinit = traj.x(tlims[0]) if 'regulator' in self.__dict__: # print("regular projection") ltj = self.lintraj reg = self.regulator control = Controller(reference=traj, K=reg.K) self.set_u(control) # print(lin) return self.integrate(linearize=lin) else: print("integrating and linearizing for the first time") control = Controller(reference=traj) self.set_u(control) nutraj = self.integrate(linearize=True) return self.project(nutraj, tlims=tlims, lin=lin)
# State vector = [x, y, vx, vy] x_init = np.array([10, 30, 10, -5.0], dtype='float64').transpose() A = np.eye(4) A[2, 2] = (1 - dt * friction) / mass A[3, 3] = (1 - dt * friction) / mass A[0, 2] = dt A[1, 3] = dt B = np.array([[0, 0], [0, 0], [dt / mass, 0], [0, dt / mass]], dtype='float64') Q = 0.01 * np.eye(4) R = np.eye(2) lqr = LQR(A, B, Q, R) K = lqr.compute_policy_gains(T, dt) x = x_init X = np.zeros((T, 4), dtype='float64') U = np.zeros((T, 2), dtype='float64') for i in range(T): u = np.dot(K[i], x) # This is essentially the simulator of the vehicle x = np.dot(A, x) + np.dot(B, u) X[i, :] = x.transpose() U[i, :] = u.transpose()
class RobotLTI(): def __init__(self, sys, initial_state, T, Q = None, R = None, x_f = None, u_f = None): self.INITIAL_STATE = self.x = initial_state self.sys = sys sys.add_robot(self) if Q is None: Q = np.zeros((sys.xdims, sys.xdims)) self.Q = Q if R is None: R = np.zeros((sys.udims, sys.udims)) self.R = R if x_f is None: x_f = np.zeros((sys.xdims, 1)) self.x_f = x_f if u_f is None: u_f = np.zeros((sys.udims, 1)) self.u_f = u_f self.lqr = LQR(sys, self, sys.T) def Qt(self, t = None): return self.Q def Rt(self, t = None): return self.R def reg_lti(self): self.lqr.converge() return self.lqr.Kt(0) def control(self, u): x_p = self.sys.step(self.x - self.x_f, u - self.u_f) cost = self.cost(self.x - self.x_f, u - self.u_f) self.x = x_p + self.x_f return self.x, cost def pi(self, x, t): return self.u_f + matmul(self.lqr.Kt(t), x - self.x_f) def cost(self, x, u): return (matmul(x.T, self.Q, x) + matmul(u.T, self.R, u))[0,0] def rollout(self, verbose=False): T = self.sys.T states = [self.x] controls = [] costs = [] for t in range(T): u = self.pi(self.x, t) controls.append(u) x, cost = self.control(u) if verbose: print ("\nt = " + str(t) + "\ncost: " + str(cost) + "\ncontrol: " + str(u) + "\nstate: " + str(x)) states.append(self.x) costs.append(cost) return states, controls, costs def rollout_learner(self, learner, verbose=False): T = self.sys.T states = [self.x] controls = [] costs = [] for t in range(T): u = learner.predict(self.x) controls.append(u) x, cost = self.control(u) if verbose: print ("\nt = " + str(t) + "\ncost: " + str(cost) + "\ncontrol: " + str(u) + "\nstate: " + str(x)) states.append(self.x) costs.append(cost) return states, controls, costs