def node_cache_ctg(self,node): A = self.A B = self.B Q = self.Q R = self.R max_time_horizon = self.max_time_horizon #print 'calculate ctg for', node x = node['state'] #reverse system Ar = A.I Br = -A.I * B kmax = max_time_horizon - x[self.n] +1 assert kmax > 0 #ctg[0] is cost-to-go zero steps -- very sharp quadratic #so ctg[k] with k = max_time_horizon - from_node['state'][self.n] is time to go Fs, Ps = final_value_LQR(Ar,Br,Q,R,x[0:self.n],kmax) #u = -Fs[i] * x + pk, but it is zero since the penalty on actuation is purely quadratic #storing in reverse order is easier to think about. #node['gain'][i] is what you should do with i steps left to go. node['ctg'] = Ps[::-1] node['gain'] = Fs[::-1]
def node_cache_ctg(self,node): A,B,c = self.get_ABc(node['state'][0:self.n],self.u0) Q,R,q,r,d = self.get_QRqrd(node['state'][0:self.n],self.u0) max_time_horizon = self.max_time_horizon #print 'calculate ctg for', node x = node['state'] #reverse system Ar = A.I Br = -A.I * B cr = -A.I * c kmax = max_time_horizon - x[self.n] +1 assert kmax > 0 #ctg[0] is cost-to-go zero steps -- very sharp quadratic #so ctg[k] with k = max_time_horizon - from_node['state'][self.n] is time to go Fs, Ps = final_value_LQR(Ar,Br,Q,R,x[0:self.n],kmax,c=cr,q=q,r=r,d=d) #storing in reverse order is easier to think about. #node['gain'][i] is what you should do with i steps left to go. node['ctg'] = Ps[::-1] node['gain'] = Fs[::-1] node['dynamics'] = (A,B,c) node['reverse_dynamics'] = (Ar,Br,cr)
def steer(A, B, x_from, x_toward): assert len(x_from) == 5 T = x_toward[4] - x_from[4] #how much time to do the steering assert T == int(T) T = int(T) if T <= 0: return (x_from, np.zeros(shape=(0, 1))) #stay here Fs, Ps = final_value_LQR(A, B, Q, R, x_toward[0:4], T) xs = np.zeros(shape=(T + 1, 5)) us = np.zeros(shape=(T, 2)) xs[0] = x_from for i in range(T): us[i] = -1 * (np.dot(Fs[i, :, 0:4], xs[i, 0:4]) + Fs[i, :, 4]) xs[i + 1, 0:4] = np.dot(A, xs[i, 0:4].T) + np.dot(B, us[i].T) xs[i + 1, 4] = xs[i, 4] + 1 x_actual = xs[-1] return (x_actual, us)
def steer(A,B,x_from,x_toward): assert len(x_from) == 5 T = x_toward[4] - x_from[4] #how much time to do the steering assert T == int(T) T = int(T) if T<=0: return (x_from,np.zeros(shape=(0,1))) #stay here Fs, Ps = final_value_LQR(A,B,Q,R,x_toward[0:4],T) xs = np.zeros(shape=(T+1,5)) us = np.zeros(shape=(T,2)) xs[0] = x_from for i in range(T): us[i] = -1 * (np.dot(Fs[i,:,0:4],xs[i,0:4]) + Fs[i,:,4]) xs[i+1,0:4] = np.dot(A,xs[i,0:4].T) + np.dot(B,us[i].T) xs[i+1,4] = xs[i,4] + 1 x_actual = xs[-1] return (x_actual, us)
def node_cache_ctg(self, node): A = self.A B = self.B Q = self.Q R = self.R max_time_horizon = self.max_time_horizon #print 'calculate ctg for', node x = node['state'] #reverse system Ar = A.I Br = -A.I * B kmax = max_time_horizon - x[self.n] + 1 assert kmax > 0 #ctg[0] is cost-to-go zero steps -- very sharp quadratic #so ctg[k] with k = max_time_horizon - from_node['state'][self.n] is time to go Fs, Ps = final_value_LQR(Ar, Br, Q, R, x[0:self.n], kmax) #u = -Fs[i] * x + pk, but it is zero since the penalty on actuation is purely quadratic #storing in reverse order is easier to think about. #node['gain'][i] is what you should do with i steps left to go. node['ctg'] = Ps[::-1] node['gain'] = Fs[::-1]
def node_cache_ctg(self, node): A, B, c = self.get_ABc(node['state'][0:self.n], self.u0) Q, R, q, r, d = self.get_QRqrd(node['state'][0:self.n], self.u0) max_time_horizon = self.max_time_horizon #print 'calculate ctg for', node x = node['state'] #reverse system Ar = A.I Br = -A.I * B cr = -A.I * c kmax = max_time_horizon - x[self.n] + 1 assert kmax > 0 #ctg[0] is cost-to-go zero steps -- very sharp quadratic #so ctg[k] with k = max_time_horizon - from_node['state'][self.n] is time to go Fs, Ps = final_value_LQR(Ar, Br, Q, R, x[0:self.n], kmax, c=cr, q=q, r=r, d=d) #storing in reverse order is easier to think about. #node['gain'][i] is what you should do with i steps left to go. node['ctg'] = Ps[::-1] node['gain'] = Fs[::-1] node['dynamics'] = (A, B, c) node['reverse_dynamics'] = (Ar, Br, cr)
us_backward = us_backward[::-1] xs_forward = run_forward(A, B, x0, us_backward) xs_backward = run_forward(Ar, Br, xf, us_backward[::-1])[::-1] plt.figure() plt.subplot(3, 1, 1) plt.plot(xs_forward[:, 0:4]) plt.subplot(3, 1, 2) plt.plot(xs_backward[:, 0:4]) plt.subplot(3, 1, 3) plt.plot(us_backward) T = xf[4] - x0[4] + 1 T = int(T) Fs, Ps = final_value_LQR(A, B, Q, R, xf[0:4], T) #Ps[T] is the cost-to-go given zero time steps -- #Ps[T-1] is the cost-to-go given 1 time step #Ps[0] is the cost-to-go given T time steps x0m = x0[0:4].reshape(4, 1) x0m = np.vstack([x0m, [[1]]]) direct_cost_forward = cost(A, B, x0, us) #cost of taking x0 to xf cost_to_go_forward = np.squeeze(np.dot(x0m.T, np.dot(Ps[0], x0m))) #same cost #opposite Fsr, Psr = final_value_LQR(Ar, Br, Q, R, x0[0:4], T) xfm = xf[0:4].reshape(4, 1) xfm = np.vstack([xfm, [[1]]])
us_backward = us_backward[::-1] xs_forward = run_forward(A,B,x0,us_backward) xs_backward = run_forward(Ar,Br,xf,us_backward[::-1])[::-1] plt.figure() plt.subplot(3,1,1) plt.plot(xs_forward[:,0:4]) plt.subplot(3,1,2) plt.plot(xs_backward[:,0:4]) plt.subplot(3,1,3) plt.plot(us_backward) T = xf[4]-x0[4] + 1 T = int(T) Fs, Ps = final_value_LQR(A,B,Q,R,xf[0:4],T) #Ps[T] is the cost-to-go given zero time steps -- #Ps[T-1] is the cost-to-go given 1 time step #Ps[0] is the cost-to-go given T time steps x0m = x0[0:4].reshape(4,1) x0m = np.vstack([x0m,[[1]]]) direct_cost_forward = cost(A,B,x0,us) #cost of taking x0 to xf cost_to_go_forward = np.squeeze(np.dot(x0m.T,np.dot(Ps[0],x0m))) #same cost #opposite Fsr, Psr = final_value_LQR(Ar,Br,Q,R,x0[0:4],T)