def ctfh_lqr_dual(A, B, Q, R, Qf_inv, ts): (n, m) = lqr_dim(A, B, Q, R) assert Qf_inv.shape == (n, n) def matrix_to_state(m): return np.array(m).flatten() def state_to_matrix(s): return np.matrix(np.reshape(s, newshape=(n, n))) def dSdt(S_, t): S = state_to_matrix(S_) dSdt_ = A * S + S * A.T - B * R.I * B.T + S * Q * S return matrix_to_state(dSdt_) def back_dSdt(S, t): return -dSdt(S, -t) S_ts = scipy.integrate.odeint(func=back_dSdt, y0=matrix_to_state(Qf_inv), t=ts) Ss = np.zeros(shape=(len(ts), n, n)) for i in range(len(ts)): Ss[i] = state_to_matrix(S_ts[len(ts) - i - 1]) return Ss
def __init__(self,A,B,Q,R,max_time_horizon): (self.n,self.m) = lqr_dim(A,B,Q,R) (self.A,self.B,self.Q,self.R) = (A,B,Q,R) self.max_time_horizon = max_time_horizon def action_state_valid(x,u): return True self.action_state_valid = action_state_valid self.max_nodes_per_extension = None self.max_steer_cost = np.inf
def __init__(self, A, B, Q, R, max_time_horizon): (self.n, self.m) = lqr_dim(A, B, Q, R) (self.A, self.B, self.Q, self.R) = (A, B, Q, R) self.max_time_horizon = max_time_horizon def action_state_valid(x, u): return True self.action_state_valid = action_state_valid self.max_nodes_per_extension = None self.max_steer_cost = np.inf
def ctfh_lqr_dual(A,B,Q,R,Qf_inv,ts): (n,m) = lqr_dim(A,B,Q,R) assert Qf_inv.shape == (n,n) def matrix_to_state(m): return np.array(m).flatten() def state_to_matrix(s): return np.matrix(np.reshape(s,newshape=(n,n))) def dSdt(S_,t): S = state_to_matrix(S_) dSdt_ = A * S + S * A.T - B * R.I * B.T + S * Q * S return matrix_to_state(dSdt_) def back_dSdt(S,t): return -dSdt(S,-t) S_ts = scipy.integrate.odeint(func=back_dSdt,y0=matrix_to_state(Qf_inv),t=ts) Ss = np.zeros(shape=(len(ts),n,n)) for i in range(len(ts)): Ss[i] = state_to_matrix(S_ts[len(ts)-i-1]) return Ss
#final-value constrained LQR Qdualfinal = np.zeros_like(Q) cost_to_go_dual = ctfh_lqr_dual(A,B,Q,R,Qdualfinal,ts) cost_to_go_dual_inv = np.empty_like(cost_to_go_dual) for i in range(cost_to_go_dual.shape[0]): print np.linalg.cond(cost_to_go_dual[i]) cost_to_go_dual_inv[i] = np.linalg.pinv(cost_to_go_dual[i]) #drive LTI CT finite-horizon LQR n_ts = cost_to_go_dual_inv.shape[0] n,m = lqr_dim(A,B,Q,R) gain_schedule = np.zeros(shape=(n_ts,m,n)) for i in range(n_ts): gain_schedule[i] = -np.dot((R.I * B.T), cost_to_go_dual_inv[i]) ts_sim = np.linspace(0,T,1000) x0 = np.array([1,0]) desired = np.array([2,-2]) traj =simulate_lti_fb(A,B,x0=x0,ts=ts_sim, gain_schedule=gain_schedule, gain_schedule_ts=ts, setpoint = desired) print traj[-1] plt.plot(ts_sim,traj[:,0])
T = 5 #time horizon for LQR ts = np.linspace(0, T, 8000) #final-value constrained LQR Qdualfinal = np.zeros_like(Q) cost_to_go_dual = ctfh_lqr_dual(A, B, Q, R, Qdualfinal, ts) cost_to_go_dual_inv = np.empty_like(cost_to_go_dual) for i in range(cost_to_go_dual.shape[0]): print np.linalg.cond(cost_to_go_dual[i]) cost_to_go_dual_inv[i] = np.linalg.pinv(cost_to_go_dual[i]) #drive LTI CT finite-horizon LQR n_ts = cost_to_go_dual_inv.shape[0] n, m = lqr_dim(A, B, Q, R) gain_schedule = np.zeros(shape=(n_ts, m, n)) for i in range(n_ts): gain_schedule[i] = -np.dot((R.I * B.T), cost_to_go_dual_inv[i]) ts_sim = np.linspace(0, T, 1000) x0 = np.array([1, 0]) desired = np.array([2, -2]) traj = simulate_lti_fb(A, B, x0=x0, ts=ts_sim, gain_schedule=gain_schedule, gain_schedule_ts=ts, setpoint=desired)