Beispiel #1
0
    def node_cache_ctg(self,node):
        A = self.A
        B = self.B
        Q = self.Q
        R = self.R
        max_time_horizon = self.max_time_horizon

        #print 'calculate ctg for', node
        x = node['state']        
        #reverse system
        Ar = A.I
        Br = -A.I * B
        kmax = max_time_horizon - x[self.n] +1
        assert kmax > 0

        #ctg[0] is cost-to-go zero steps -- very sharp quadratic
        #so ctg[k] with k = max_time_horizon - from_node['state'][self.n] is time to go


        Fs, Ps = final_value_LQR(Ar,Br,Q,R,x[0:self.n],kmax)
        #u = -Fs[i] * x + pk, but it is zero since the penalty on actuation is purely quadratic
        #storing in reverse order is easier to think about.
        #node['gain'][i] is what you should do with i steps left to go.
        node['ctg'] = Ps[::-1] 
        node['gain'] = Fs[::-1]
Beispiel #2
0
    def node_cache_ctg(self,node):
        A,B,c = self.get_ABc(node['state'][0:self.n],self.u0)
        Q,R,q,r,d = self.get_QRqrd(node['state'][0:self.n],self.u0)

        max_time_horizon = self.max_time_horizon

        #print 'calculate ctg for', node
        x = node['state']        
        #reverse system
        Ar = A.I
        Br = -A.I * B
        cr = -A.I * c

        kmax = max_time_horizon - x[self.n] +1
        assert kmax > 0

        #ctg[0] is cost-to-go zero steps -- very sharp quadratic
        #so ctg[k] with k = max_time_horizon - from_node['state'][self.n] is time to go


        Fs, Ps = final_value_LQR(Ar,Br,Q,R,x[0:self.n],kmax,c=cr,q=q,r=r,d=d)
        #storing in reverse order is easier to think about.
        #node['gain'][i] is what you should do with i steps left to go.
        node['ctg'] = Ps[::-1] 
        node['gain'] = Fs[::-1]

        node['dynamics'] = (A,B,c)
        node['reverse_dynamics'] = (Ar,Br,cr)
Beispiel #3
0
def steer(A, B, x_from, x_toward):
    assert len(x_from) == 5
    T = x_toward[4] - x_from[4]  #how much time to do the steering
    assert T == int(T)
    T = int(T)
    if T <= 0:
        return (x_from, np.zeros(shape=(0, 1)))  #stay here

    Fs, Ps = final_value_LQR(A, B, Q, R, x_toward[0:4], T)

    xs = np.zeros(shape=(T + 1, 5))
    us = np.zeros(shape=(T, 2))
    xs[0] = x_from

    for i in range(T):
        us[i] = -1 * (np.dot(Fs[i, :, 0:4], xs[i, 0:4]) + Fs[i, :, 4])
        xs[i + 1, 0:4] = np.dot(A, xs[i, 0:4].T) + np.dot(B, us[i].T)
        xs[i + 1, 4] = xs[i, 4] + 1
    x_actual = xs[-1]

    return (x_actual, us)
Beispiel #4
0
def steer(A,B,x_from,x_toward):
    assert len(x_from) == 5
    T = x_toward[4] - x_from[4] #how much time to do the steering
    assert T == int(T)
    T = int(T)
    if T<=0:
        return (x_from,np.zeros(shape=(0,1)))   #stay here

    Fs, Ps = final_value_LQR(A,B,Q,R,x_toward[0:4],T)

    xs = np.zeros(shape=(T+1,5))
    us = np.zeros(shape=(T,2))
    xs[0] = x_from

    for i in range(T):
        us[i] = -1 * (np.dot(Fs[i,:,0:4],xs[i,0:4]) + Fs[i,:,4])
        xs[i+1,0:4] = np.dot(A,xs[i,0:4].T) + np.dot(B,us[i].T)
        xs[i+1,4] = xs[i,4] + 1
    x_actual = xs[-1]    
    
    return (x_actual, us)
Beispiel #5
0
    def node_cache_ctg(self, node):
        A = self.A
        B = self.B
        Q = self.Q
        R = self.R
        max_time_horizon = self.max_time_horizon

        #print 'calculate ctg for', node
        x = node['state']
        #reverse system
        Ar = A.I
        Br = -A.I * B
        kmax = max_time_horizon - x[self.n] + 1
        assert kmax > 0

        #ctg[0] is cost-to-go zero steps -- very sharp quadratic
        #so ctg[k] with k = max_time_horizon - from_node['state'][self.n] is time to go

        Fs, Ps = final_value_LQR(Ar, Br, Q, R, x[0:self.n], kmax)
        #u = -Fs[i] * x + pk, but it is zero since the penalty on actuation is purely quadratic
        #storing in reverse order is easier to think about.
        #node['gain'][i] is what you should do with i steps left to go.
        node['ctg'] = Ps[::-1]
        node['gain'] = Fs[::-1]
Beispiel #6
0
    def node_cache_ctg(self, node):
        A, B, c = self.get_ABc(node['state'][0:self.n], self.u0)
        Q, R, q, r, d = self.get_QRqrd(node['state'][0:self.n], self.u0)

        max_time_horizon = self.max_time_horizon

        #print 'calculate ctg for', node
        x = node['state']
        #reverse system
        Ar = A.I
        Br = -A.I * B
        cr = -A.I * c

        kmax = max_time_horizon - x[self.n] + 1
        assert kmax > 0

        #ctg[0] is cost-to-go zero steps -- very sharp quadratic
        #so ctg[k] with k = max_time_horizon - from_node['state'][self.n] is time to go

        Fs, Ps = final_value_LQR(Ar,
                                 Br,
                                 Q,
                                 R,
                                 x[0:self.n],
                                 kmax,
                                 c=cr,
                                 q=q,
                                 r=r,
                                 d=d)
        #storing in reverse order is easier to think about.
        #node['gain'][i] is what you should do with i steps left to go.
        node['ctg'] = Ps[::-1]
        node['gain'] = Fs[::-1]

        node['dynamics'] = (A, B, c)
        node['reverse_dynamics'] = (Ar, Br, cr)
Beispiel #7
0
us_backward = us_backward[::-1]

xs_forward = run_forward(A, B, x0, us_backward)
xs_backward = run_forward(Ar, Br, xf, us_backward[::-1])[::-1]

plt.figure()
plt.subplot(3, 1, 1)
plt.plot(xs_forward[:, 0:4])
plt.subplot(3, 1, 2)
plt.plot(xs_backward[:, 0:4])
plt.subplot(3, 1, 3)
plt.plot(us_backward)

T = xf[4] - x0[4] + 1
T = int(T)
Fs, Ps = final_value_LQR(A, B, Q, R, xf[0:4], T)
#Ps[T] is the cost-to-go given zero time steps --
#Ps[T-1] is the cost-to-go given 1 time step
#Ps[0] is the cost-to-go given T time steps

x0m = x0[0:4].reshape(4, 1)
x0m = np.vstack([x0m, [[1]]])

direct_cost_forward = cost(A, B, x0, us)  #cost of taking x0 to xf
cost_to_go_forward = np.squeeze(np.dot(x0m.T, np.dot(Ps[0], x0m)))  #same cost

#opposite
Fsr, Psr = final_value_LQR(Ar, Br, Q, R, x0[0:4], T)

xfm = xf[0:4].reshape(4, 1)
xfm = np.vstack([xfm, [[1]]])
Beispiel #8
0
us_backward = us_backward[::-1]

xs_forward = run_forward(A,B,x0,us_backward)
xs_backward = run_forward(Ar,Br,xf,us_backward[::-1])[::-1]

plt.figure()
plt.subplot(3,1,1)
plt.plot(xs_forward[:,0:4])
plt.subplot(3,1,2)
plt.plot(xs_backward[:,0:4])
plt.subplot(3,1,3)
plt.plot(us_backward)

T = xf[4]-x0[4] + 1
T = int(T)
Fs, Ps = final_value_LQR(A,B,Q,R,xf[0:4],T)
#Ps[T] is the cost-to-go given zero time steps -- 
#Ps[T-1] is the cost-to-go given 1 time step
#Ps[0] is the cost-to-go given T time steps




x0m = x0[0:4].reshape(4,1)
x0m = np.vstack([x0m,[[1]]])

direct_cost_forward = cost(A,B,x0,us) #cost of taking x0 to xf
cost_to_go_forward = np.squeeze(np.dot(x0m.T,np.dot(Ps[0],x0m))) #same cost

#opposite 
Fsr, Psr = final_value_LQR(Ar,Br,Q,R,x0[0:4],T)