def test_ode_rk23_simpleCase(self): # setup parameters and initial point for testing function x0 = np.array([4.0]) par_a = -0.2 # setup time interval [a,b] for integration a = 2.0 b = 4.0 # setup algorithm parameters for the chosen ode method to be tested tol = 1e-8 maxIter = 300 # call integration methos x, t, failFlag, iter_i = ode.ode_rk23(self.myfun, a, b, x0, tol, maxIter, par_a=par_a) # compare with expected results x_expect = math.exp(-0.4) * 4 self.assertTrue(abs(x_expect - x[-1, 0]) < 1e-6, msg=None)
def test_ode_rk23_normalCase(self): # setup parameters and initial point for testing function x0 = np.array([0.1, -0.3, 1.0]) matrixA = np.array([[-1.0, 0.1, -0.3], [0.2, -1, 0.1], [0.1, 0.4, -1]]) matrixB = np.array([[0.5, -0.6], [-0.7, 0.6], [0.4, 0.8]]) vectorU = np.array([0.2, 0.3]) vectorF = np.array([-0.1, 0 - 0.02, -0.2]) # setup time interval [a,b] for integration a = 0.0 b = 5.0 # setup algorithm parameters for the chosen ode method to be tested tol = 1e-8 maxIter = 300 # call integration methos x, t, failFlag, iter_i = ode.ode_rk23(self.myfun, a, b, x0, tol, maxIter, matrixA=matrixA, matrixB=matrixB, vectorU=vectorU, vectorF=vectorF) # compare with expected results # (note: the values here is not the real expected result, but copied # from the output of ode_rk23, need a way to get expected result) x_expect = np.array( [-3.91136351e-01, -1.45401145e-01, -5.53579890e-01]) self.assertTrue(max(abs(x_expect - x[-1])) < 1e-6, msg=None)
def propagate_u(u_0, qp_vecs, t_start, t_end, sliding_window_instance, q_s_dot, p_l_dot, p_mf_dot, q_mf_dot, q_mf, u_mf, H_l_D): ''' Propagate u based on q and p values for one bucket in time. Inputs: u_0 (1D np.array of dimension self.control_dim): control values at beginning of bucket qp_vecs (list of 1D np.arrays each of dimension self.state_dim*3): list of concatenated state, local costate, and mean field costate vectors from q and p propagation. t_start (np.float): start time of bucket t_end (np.float): end time of bucket sliding_window_instance (instance of user-defined class which inherits SlidingWindow): object defining the dynamics, and the initial conditions and parameters for numerical integration/propagation. Same as agent. q_s_dot (1D np.array of dimension self.state_dim): derivative of local state at end of bucket p_l_dot (1D np.array of dimension self.state_dim): derivative of local state at end of bucket p_mf_dot (1D np.array of dimension self.state_dim): derivative of local state at end of bucket q_mf_dot (1D np.array of dimension total number of states in system): derivative of local state at end of bucket q_mf (1D np.array of dimension of total number of states in system): mean field state, i.e. state vector holding values for ALL states, not just those pertaiing to the current agen. u_mf (1D np.array of dimension self.control_dim): mean field control, i.e. control vector holding values for ALL controls, not just those pertaining to the current agent. H_l_D (np.float): value of local desired Hamiltonian at beginning of window Outputs: u_vecs (list of 1-D numpy arrays): list of propagated control values for entire bucket in time ''' u_vecs = [] u_vec = u_0 n_s = sliding_window_instance.n_s steps = np.linspace(t_start, t_end, n_s + 1) for i in range(n_s): n_start, n_end = steps[i], steps[i + 1] qp_vec = qp_vecs[i] u_vec, t, failFlag, iter_i = ode.ode_rk23( sliding_window_instance.u_rhs, n_start, n_end, u_vec, sliding_window_instance.integrateTol, sliding_window_instance.integrateMaxIter, state_dim=sliding_window_instance.state_dim, Gamma=sliding_window_instance.Gamma, qp_vec=qp_vec, u_0=u_0, q_s_dot=q_s_dot, p_l_dot=p_l_dot, p_mf_dot=p_mf_dot, q_mf_dot=q_mf_dot, q_mf=q_mf, u_mf=u_mf, H_l_D=H_l_D) if len(u_vec) > 1: u_vec_next = u_vec[-1] else: u_vec_next = u_vec u_vecs.append( u_vec_next ) # one u_vec for each step, append them and you have all the u_vecs for one bucket u_vec = u_vec_next return u_vecs
def test_ode_rk23_backwardCase(self): ''' test backward integration of 2x2 matrix dynamics (i.e. in the dx/dt function x is 2x2 matrix) Note: this test case is the reversed test case of TestCaseMatrix('test_ode_rk23_simpleCase') i.e. use result of TestCaseMatrix('test_ode_rk23_simpleCase') as inital point, and integrate from the ending time of TestCaseMatrix('test_ode_rk23_simpleCase') and all the way back to the starting time of TestCaseMatrix('test_ode_rk23_simpleCase'). The expected result of this test case should be very close to the initial point of TestCaseMatrix('test_ode_rk23_simpleCase') ''' # setup parameters and initial point for testing function matrixX0 = np.array([[0.47734137, 0.10362487], [0.11596301, 0.00871929]]) #np.array([[0.5,-0.2],[1.3,2.4]]) matrixA = np.array([[0.2, -0.1], [-0.4, 0.7]]) matrixB = np.array([[0.006, -0.003], [-0.002, 0.008]]) matrixQ = np.array([[0.006, -0.002], [-0.001, 0.005]]) # setup time interval [a,b] for integration (a>b, backward integration) a = 5.0 b = 0.0 # setup algorithm parameters for the chosen ode method to be tested tol = 1e-8 maxIter = 1000 # reshape matrixX0 from 2x2 matrix to 4x1 vector dim_matrixX0 = matrixX0.shape vectorX0 = matrixX0.reshape((dim_matrixX0[0] * dim_matrixX0[1], )) # call integration methos vectorX, t, failFlag, iter_i = ode.ode_rk23(self.myfun, a, b, vectorX0, tol, maxIter, matrixA=matrixA, matrixB=matrixB, matrixQ=matrixQ) # reshape result from 4x1 to 2x2, and compare with expected result # (note: the expected result values here is not the reversed values of real expected result, but copied # from the output of ode_rk23, need a way to get expected result) matrixX_result = vectorX[-1].reshape(matrixX0.shape) matrixX_expect = np.array([[0.5, -0.2], [ 1.3, 2.4 ]]) #np.array([[ 0.47734137, 0.10362487],[0.11596301, 0.00871929]]) self.assertTrue(np.amax(abs(matrixX_expect - matrixX_result)) < 1e-4, msg=None)
def test_ode_rk23_simpleCase(self): ''' test forward integration of 2x2 matrix dynamics (i.e. in the dx/dt function x is 2x2 matrix) ''' # setup parameters and initial point for testing function matrixX0 = np.array([[0.5, -0.2], [1.3, 2.4]]) matrixA = np.array([[0.2, -0.1], [-0.4, 0.7]]) matrixB = np.array([[0.006, -0.003], [-0.002, 0.008]]) matrixQ = np.array([[0.006, -0.002], [-0.001, 0.005]]) # setup time interval [a,b] for integration a = 0 b = 5 # setup algorithm parameters for the chosen ode method to be tested tol = 1e-8 maxIter = 1000 # reshape matrixX0 from 2x2 matrix to 4x1 vector dim_matrixX0 = matrixX0.shape vectorX0 = matrixX0.reshape((dim_matrixX0[0] * dim_matrixX0[1], )) # call integration methos vectorX, t, failFlag, iter_i = ode.ode_rk23(self.myfun, a, b, vectorX0, tol, maxIter, matrixA=matrixA, matrixB=matrixB, matrixQ=matrixQ) # reshape result from 4x1 to 2x2, and compare with expected result # (note: the values here is not the real expected result, but copied # from the output of ode_rk23, need a way to get expected result) matrixX_result = vectorX[-1].reshape(matrixX0.shape) matrixX_expect = np.array([[0.47734137, 0.10362487], [0.11596301, 0.00871929]]) self.assertTrue(np.amax(abs(matrixX_expect - matrixX_result)) < 1e-6, msg=None)
def test_ode_rk23_normaleCase(self): ''' test forward integration of 9x9 matrix dynamics (i.e. in the dx/dt function x is 9x9 matrix) ''' # setup parameters and initial point for testing function matrixX0=np.array([[ 0.76, 0.22, 0.67, 0.19, 0.71, 0.26, 0.53, 0.14, 0.25],\ [ 0.45, 0.17, 0.74, 0.28, 0.17, 0.25, 0.8 , 0.08, 0.43],\ [ 0.69, 0.02, 0.6 , 0.29, 0.92, 0.32, 0.08, 0.56, 0.32], [ 0.99, 0.08, 0.84, 0.24, 0.42, 0.25, 0.24, 0.52, 0.95],\ [ 0.31, 0.33, 0.06, 0.13, 0.38, 0.6 , 0.22, 0.83, 0.51],\ [ 0.2 , 0.21, 0.1 , 0.48, 0.89, 0.96, 0.09, 0.48, 0.25],\ [ 0.36, 0.16, 0.34, 0.49, 0.54, 0.32, 0.5 , 0.8 , 0.24],\ [ 0.69, 0.37, 0.69, 0.88, 0.89, 0.26, 0.36, 0.83, 0.72],\ [ 0.8 , 0.64, 0.93, 0.01, 0.59, 0.85, 0.84, 0.13, 0.92]]) matrixX0 = matrixX0 + np.eye(9) * 10 matrixA=np.array([[ 3.54, 0.5 , 0.3 , 0.06, 0.63, 0.68, 0.57, 0.41, 0.13],\ [ 0.04, 3.05, 0.46, 0.42, 0.57, 0.45, 0.49, 0.32, 0.27],\ [ 0.32, 0.79, 3.66, 0.19, 0.11, 0.37, 0.32, 0.63, 0.1 ],\ [ 0.24, 0.35, 0.59, 3.12, 0.93, 0.51, 0.86, 0.76, 0.48],\ [ 0.97, 0.86, 0.4 , 0.92, 3.35, 0.74, 0.62, 0.42, 0.37],\ [ 0.79, 0.02, 0.64, 0.64, 0.17, 3.42, 0.44, 0.26, 0.96],\ [ 0.48, 0.49, 0.4 , 0.45, 0.3 , 0.61, 3.24, 0.26, 0.65],\ [ 0.7 , 0.17, 0.91, 0.5 , 0.65, 0.55, 0.01, 3.69, 0.11],\ [ 0.15, 0.14, 0.39, 0.25, 0.44, 0.53, 0.93, 0.84, 3. ]]) matrixB=np.array([[-9.68, 0.5 , 0.5 , 0.33, 0.8 , 0. , 0.93, 0.91, 0.85],\ [ 0.79, -9.02, 0.55, 0.56, 0.64, 0. , 0.06, 0.46, 0.25],\ [ 0.62, 0.47, -9.82, 0.18, 0.41, 0.7 , 0.03, 0.84, 0.26],\ [ 0.32, 0.05, 0.59, -9.42, 0.63, 0.99, 0.47, 0.37, 0.2 ],\ [ 0.59, 0.38, 0.71, 0.27, -9.75, 0.82, 0.4 , 1. , 0.38],\ [ 0.85, 0.2 , 0.18, 0.46, 0.22, -9.05, 0.61, 0.97, 0.76],\ [ 0.75, 0.93, 0.53, 0.74, 0.52, 0.54, -9.8 , 0.31, 0.63],\ [ 0.32, 0.62, 0. , 0.62, 0.65, 0.9 , 0.74, -9.68, 0.07],\ [ 0.59, 0.4 , 0.57, 0.49, 0.58, 0.5 , 0.86, 0. , -9.94]]) matrixQ=np.array([[ 0.57, 0.08, 0.05, 0. , 0. , 0.06, 0.01, 0.02, 0.06],\ [ 0.07, 0.5 , 0.01, 0.06, 0.02, 0.09, 0.02, 0.09, 0.06],\ [ 0.02, 0.01, 0.57, 0.01, 0.07, 0.07, 0.05, 0.07, 0.06],\ [ 0. , 0.06, 0.02, 0.54, 0.06, 0.03, 0.08, 0.09, 0.02],\ [ 0.03, 0. , 0.05, 0. , 0.54, 0.04, 0.06, 0.03, 0.02],\ [ 0.05, 0.06, 0.04, 0.02, 0.05, 0.6 , 0.07, 0.08, 0.05],\ [ 0. , 0.03, 0.07, 0.09, 0.06, 0.02, 0.52, 0.04, 0.01],\ [ 0.05, 0.06, 0.03, 0.1 , 0.05, 0.03, 0.03, 0.6 , 0.02],\ [ 0.02, 0.02, 0.04, 0.09, 0.05, 0.01, 0.03, 0. , 0.5 ]]) # setup time interval [a,b] for integration a = 0.0 b = 1.0 # setup algorithm parameters for the chosen ode method to be tested tol = 1e-8 maxIter = 1000 # reshape matrixX0 from 9x9 matrix to 81x1 vector dim_matrixX0 = matrixX0.shape vectorX0 = matrixX0.reshape((dim_matrixX0[0] * dim_matrixX0[1], )) # call integration methos vectorX, t, failFlag, iter_i = ode.ode_rk23(self.myfun, a, b, vectorX0, tol, maxIter, matrixA=matrixA, matrixB=matrixB, matrixQ=matrixQ) # reshape result from 81x1 to 9x9, and compare with expected result # (note: the values here is not the real expected result, but copied # from the output of ode_rk23, need a way to get expected result) matrixX_result = vectorX[-1].reshape(matrixX0.shape) matrixX_expect = np.array([ [-0.05614609, -0.02764642, 0.00442386, -0.03128679, 0.0379626 ,\ -0.00210206, 0.02055731, 0.01708753, -0.01079675],\ [ 0.01076375, -0.075299 , 0.04635011, -0.00160557, 0.02930959,\ -0.01371929, -0.01172549, -0.03845662, 0.00592987],\ [-0.01827535, 0.02592572, -0.06526755, -0.01455321, -0.00696632,\ 0.0123018 , 0.00875977, 0.03707331, -0.02007888],\ [-0.01917939, -0.04928098, 0.01923522, -0.07813147, 0.05341208,\ 0.0136149 , -0.00085168, 0.00241213, 0.0005026 ],\ [ 0.00643727, 0.06033435, -0.04005195, 0.0275839 , -0.07949216,\ 0.00268929, -0.00399012, 0.01671832, -0.00206837],\ [ 0.01743537, 0.00612319, -0.0026365 , 0.01606572, -0.01925 ,\ -0.04091118, -0.00317609, -0.0098151 , 0.00212008],\ [ 0.00996543, 0.00499091, -0.00098197, 0.02704245, -0.01822727,\ -0.00234791, -0.04023058, -0.00917272, -0.00128094],\ [ 0.03103352, 0.002872 , 0.01410333, 0.03743825, -0.0293213 ,\ -0.01646853, -0.01985861, -0.07105586, 0.026175 ],\ [-0.0198831 , -0.00450292, 0.00266541, -0.03444674, 0.01946599,\ 0.00968283, 0.01935828, 0.01385639, -0.04228292]]) self.assertTrue(np.amax(abs(matrixX_expect - matrixX_result)) < 1e-6, msg=None)
def propagate_q_p(qpu_vec, t_start, t_end, sliding_window_instance, q_mf, u_mf): ''' This method propagates q and p to the end of one bucket, with a constant control. Inputs: qpu_vec (1D np.array of dimension self.state_dim*3+control_dim): Most current values for local qpu_vec containing q_s, p_l, p_mf, u_s, concatenated in one array t_start (np.float): start time of bucket t_end (np.float): end time of bucket sliding_window_instance (instance of user-defined class which inherits SlidingWindow): object defining the dynamics, and the initial conditions and parameters for numerical integration/propagation. q_mf (1D np.array of dimension of total number of states): vector containing mean field state values for states not pertaining to this agent, and containing local values otherwise. u_mf (1D np.array of dimension of total number of states): vector containing mean field control values for control variables not pertaining to this agent, and local values otherwise. Outputs: qp_vecs (list of 1-D numpy arrays): holds qp values for each time interval qp_dot_vecs (list of 1-D numpy arrays): holds q_s_dot, p_mf_dot, p_l_dot values for each time interval ''' state_dim = sliding_window_instance.state_dim state_indices = sliding_window_instance.state_indices n_s = sliding_window_instance.n_s q_l_0 = qpu_vec[:state_dim] p_l_0 = qpu_vec[state_dim:2 * state_dim] p_mf_0 = qpu_vec[2 * state_dim:3 * state_dim] u_0 = qpu_vec[3 * state_dim:] qp_vecs = [] qp_vec = np.concatenate([ q_l_0, p_l_0, p_mf_0 ]) # pass in all three: q_0, p_0, u_0, but in the qp_rhs function qp_dot_vecs = [] steps = np.linspace(t_start, t_end, n_s + 1) # pass in values from blackboard as kwargs to qp_rhs for i in range(n_s): n_start, n_end = steps[i], steps[i + 1] # update q_mf with the most recent local values in q_s q_s = qp_vec[:state_dim] q_mf = update_q_mf(q_mf, q_s, sliding_window_instance) qp_vec, t, failFlag, iter_i = ode.ode_rk23( sliding_window_instance.qp_rhs, n_start, n_end, qp_vec, sliding_window_instance.integrateTol, sliding_window_instance.integrateMaxIter, state_dim=sliding_window_instance.state_dim, Gamma=sliding_window_instance.Gamma, u_0=u_0, q_mf=q_mf, u_mf=u_mf) qp_vec_orig = qp_vec # rk23 returns 2 arrays but we remove the first array by doing qp_vec[1] because rk_23 returns the initial value you passed in if len(np.shape(qp_vec_orig)) >= 2: qp_vec = qp_vec[-1] qp_vecs.append(qp_vec) # get time derivatives qp_dot_vec = sliding_window_instance.qp_rhs( 0.0, qp_vec, state_dim=sliding_window_instance.state_dim, Gamma=sliding_window_instance.Gamma, u_0=u_0, q_mf=q_mf, u_mf=u_mf) qp_dot_vecs.append(qp_dot_vec) return qp_vecs, qp_dot_vecs
# - note: set it to be backward constant, i.e., find rows of the first # time index > tStart_j, for numerical purpose, add epsilon as tolerance uTimeIndex = uControl[uControl.index > tbegin + 1e-10].index[0] uControlData = uControl.loc[uTimeIndex] tEnd = min(uTimeIndex, tStop_j) #print ("Var. checking: tbegin={}, tStop_j={} sec. ".format(tbegin, tStop_j, tEnd)) # add noise (varying to each element?) ns = np.random.normal(mu, sigma, len(vectorStateStart)) vectorStateStart = vectorStateStart + ns # call the ode integrator ... fn = system_dyn vector_out, t, failFlag, iter_i = \ ode.ode_rk23(fn, tbegin, tEnd, vectorStateStart, integrateTol, integrateMaxIter, C=C, L=L, k=k, V0=V0, Vu= Vu, Rs=np.float(uControlData)) print( "integration happening from {} - {} sec. with ctrl at {}". format(tbegin, tEnd, uTimeIndex)) # records results with nicer format, # note: do not record the starting point since it is given, not computed countRows = len(t) sumCountRows = sumCountRows + countRows - 1 if initFlag: state_j['Time'] = t[1:countRows] state_j['State'] = vector_out[1:countRows, :] initFlag = False else: state_j['Time'] = np.append(state_j['Time'],
def kfc_propagate_state_and_cov(t0, t1, x0, stateCov0, stateDim, controlDim, observeDim, uControl, yObserve, dynParA, dynParB, dynParF, observeParH, dynCovW, observeCovV, outputNum, integrateTol, integrateMaxIter): ''' # propagate state and state covariance for continuous Kalman Filter by forward integration # (note: propagate state and covariance of the state simultaneously) ''' #### define the inital condition of covariance and state #### note: need to reshape covariance as a vector, and combine covariance vector and state vector as one vector covVectorDim = stateDim*stateDim vectorCovStateStart = construct_cov_state_vector(stateCov0, x0, covVectorDim) ### solve integration eqs of state and covariance of the state simultaneously ### note: (1) evenly divide the time interval between two adjacent observation ### and output filtered state values at those time points ### (2) time index of uControl yObserve may not align each other, ### therefore the time interval of two adjacent time points given by (1) ### may be split into multiple segments which each has different uControl data listFilterResult = [] # it will be a 2D list to record the filtered state and state covariance as well as the time array tStart_i = t0 for i in range(len(yObserve)-1): # iteration i: the time interval of two adjacent observation listFilterResult.append([]) tEnd_i = yObserve.index[i+1] # pull observation data from yObserve for the time interval [tStart_i, tEnd_i], # note: set it to be backward constant, i.e. observation data = yObserve(tEnd_i) yObserveData = np.array(yObserve.iloc[i+1]) # evenly divide the time interval between two adjacent observation # in order to output filtered state values at those time points stepSize = (tEnd_i-tStart_i)/outputNum tStart_j = tStart_i # initiate variable for counting total rows of integration output of the jth iteration sumCountRows = 0 for j in range(outputNum): # setup time and initiate dictionary for recording the results of the jth iteration stateAndCov_j = {"Time" : [], "countRows": 0, "vectorState" : [], "matrixCov" : []} initFlag = True tEnd_j = tStart_j + stepSize tStart = tStart_j while tStart < tEnd_j - 1e-12: # 1e-12: give some tolerance for numerical purpose # pull control data from uControl # - note: set it to be backward constant, i.e., find rows of the first # time index > tStart_j, for numerical purpose, add epsilon as tolerance uTimeIndex = uControl[uControl.index > tStart+1e-10].index[0] uControlData = np.array(uControl.loc[uTimeIndex]) tEnd = min(uTimeIndex, tEnd_j) # To do: need to raise exception when integration is fail vectorCovState, t, failFlag, iter_i = \ ode.ode_rk23(continuous_kalman_filter_dyn, tStart, tEnd, vectorCovStateStart, integrateTol, integrateMaxIter, dynParA = dynParA, dynParB=dynParB, dynParF = dynParF, observeParH=observeParH, uControlData=uControlData, yObserveData=yObserveData, dynCovW=dynCovW, observeCovV=observeCovV, covVectorDim = covVectorDim, stateDim = stateDim) # records results with nicer format, # note: do not record the starting point since it is given, not computed countRows = len(t) sumCountRows = sumCountRows+countRows-1 if initFlag: stateAndCov_j['Time'] = t[1:countRows] stateAndCov_j['vectorState'] = vectorCovState[1:countRows,-stateDim:] stateAndCov_j['matrixCov'] = vectorCovState[1:countRows,0:covVectorDim].reshape((countRows-1, stateDim,stateDim)) initFlag = False else: stateAndCov_j['Time'] = np.append(stateAndCov_j['Time'],t[1:countRows]) stateAndCov_j['vectorState'] = np.append(stateAndCov_j['vectorState'], vectorCovState[1:countRows,-stateDim:], axis=0) stateAndCov_j['matrixCov'] = np.append(stateAndCov_j['matrixCov'], vectorCovState[1:countRows,0:covVectorDim].reshape((countRows-1, stateDim,stateDim)), axis=0) # update tStart and the start point, i.e. vectorCovStateStart tStart = tEnd vectorCovStateStart = vectorCovState[-1] # add results of jth iteration to the list, and update start time of j loop listFilterResult[i].append(stateAndCov_j) tStart_j = tEnd_j # update start time of i loop tStart_i = tEnd_i return listFilterResult
def lqt_propogate_state(x0, t0, t1, stateDim, controlDim, listRiccati, riccatiInvRB, dynParA, dynParB, dynParF, integrateTol, integrateMaxIter): ''' #forward integration of state dynamics with results of riccati equations as input # note: form a given time interval time to be processed, # set results of riccati equations to be constants equal the # values at the end of the time interval ''' vectorStateStart = x0 tStart = t0 listStateAndControl = [] # it will be a 2D list to record the results of state startFlag = True iterNum = len(listRiccati) for i in range(iterNum): # iteration i: work in the ith time interval in which zTrack is a constant listStateAndControl.append([]) listRiccatiRow = iterNum-i-1 if i+1 < iterNum: tEnd = listRiccati[listRiccatiRow-1][-1]['Time'][-1] #zTrack.index[i+1] else: tEnd = t1 # evenly divided time interval [tStart_i, tEnd_i) by outputNum #tStep = (tEnd-tStart)/outputNum ### outputNum = len(listRiccati[listRiccatiRow-1]) # forward integration for each tStep riccatiSteps_j = len(listRiccati[listRiccatiRow-1]) tStart_j = tStart for j in range(riccatiSteps_j): # iteration j: work in the jth sub-time-interval within the zTrack constant interval listRiccatiCol = riccatiSteps_j-j-1 if listRiccatiCol > 0: tEnd_j = listRiccati[listRiccatiRow][listRiccatiCol-1]['Time'][-1] # tStart_j+tStep else: tEnd_j = tEnd # initiate dictionary for recording the results of the jth iteration stateAndControl_j = {"Time" : [], "countRows": 0, "vectorState" : [], "vectorControl" : []} initFlag = True sumCountRows = 0 tStart_k = tStart_j for k in range(listRiccati[listRiccatiRow][listRiccatiCol]['countRows']-1,-1,-1): # iteration k: the smallest time interval in which ricatti numerical results are constant if k > 0: tEnd_k = listRiccati[listRiccatiRow][listRiccatiCol]['Time'][k-1] else: tEnd_k = tEnd_j # construct parameters from results of riccati equations for forward integration riccatiKlq = -riccatiInvRB.dot(listRiccati[listRiccatiRow][listRiccatiCol]['matrixSigma'][k]) riccatiPsi = -riccatiInvRB.dot(listRiccati[listRiccatiRow][listRiccatiCol]['vectorPhi'][k]) stateAPlusBKlq = dynParA+dynParB.dot(riccatiKlq) stateBPsiPlusF = dynParB.dot(riccatiPsi)+dynParF # TO DO: need raise failture message when integration fail # propogate state dynamics with feedback policy vectorState, t, failFlag, iter_i = \ ode.ode_rk23(feedback_policy_state_dot, tStart_k, tEnd_k, vectorStateStart, integrateTol, integrateMaxIter, stateAPlusBKlq=stateAPlusBKlq, stateBPsiPlusF=stateBPsiPlusF) # compute optimal feedback control listControl = [] for tmp_i in range(len(t)): listControl.append(riccatiKlq.dot(vectorState[tmp_i])+riccatiPsi) vectorControl = np.array(listControl) # records results with nicer format for forward integration, # note: if at time tStart_k=t0, record the state and control values at starting time point # if at tStart_k>t0, no need to record the values at tStart_k, # since it has been recorded at the previous iteration countRows = len(t) if startFlag: startRow = 0 sumCountRows = sumCountRows+countRows startFlag = False else: startRow = 1 sumCountRows = sumCountRows+countRows-1 if initFlag: stateAndControl_j['Time'] = t[startRow:countRows] stateAndControl_j['vectorState'] = vectorState[startRow:countRows] stateAndControl_j['vectorControl'] = vectorControl[startRow:countRows] initFlag = False else: stateAndControl_j['Time'] = np.append(stateAndControl_j['Time'],t[startRow:countRows]) stateAndControl_j['vectorState'] = np.append(stateAndControl_j['vectorState'], vectorState[startRow:countRows], axis=0) stateAndControl_j['vectorControl'] = np.append(stateAndControl_j['vectorControl'], vectorControl[startRow:countRows], axis=0) # update tStart_k and the start point, i.e. vectorStateStart tStart_k = tEnd_k vectorStateStart = vectorState[-1] # update tStart_j for outer loop j stateAndControl_j['countRows']=sumCountRows listStateAndControl[i].append(stateAndControl_j) tStart_j = tEnd_j # update tStart for outer loop i tStart = tEnd return listStateAndControl
def lqt_solve_riccati(t1, stateDim, zTrack, dynParA, dynParF, trackParF, trackParQ, riccatiBInvRB, outputNum, integrateTol, integrateMaxIter): ''' # solve riccati equations # i.e. backward integrate sigma and phi dynamics together: # sigma_dot = -sigma*dynParA-Transpose(dynParA)*sigma+sigma*dynParB*Inv(trackParR)*Transpose(dynParB)-trackParQ # phi_dot = (-Transpose(dynParA)+sigma*dynParB*Inv(trackParR)*Transpose(dynParB))*phi-sigma*dynParF+trackParQ*zTrack # with terminal condition sigma_end=trackParF, phi_end=(nx1 zeros) # Note: sigma is a symmetric 2D matrix ''' #### define the terminal condition of sigma and phi #### note: need to reshape sigma as a vector, and make sigma vector and phi as one vector sigmaVectorDim = stateDim*stateDim vectorSigmaPhiBackStart = construct_sigma_phi_vector(trackParF, -trackParF.dot(np.array(zTrack.iloc[-1])), sigmaVectorDim) ### solve riccati equations by backward integration #### i.e. for each piecewise constant of zTrack time interval, #### call ode method to barckward integrate riccati equations iterNum = len(zTrack)-1 tStart = t1 listRiccati = [] # it will be a 2D list to record the results of riccati equations for i in range(iterNum-1,-1,-1): # iteration i: work in the ith time interval in which zTrack is a constant listRiccati.append([]) listRiccatiRow = iterNum-i-1 tEnd = zTrack.index[i] zTrackData = np.array(zTrack.iloc[i]) # evenly divided time interval [tStart_i, tEnd_i) by outputNum tStep = (tEnd-tStart)/outputNum # backward integration for each tStep tStart_j = tStart for j in range(outputNum): # iteration j: work in the jth sub-time-interval within the zTrack constant interval tEnd_j = tStart_j+tStep # TO DO: need raise failure message when integration fail # backward propagate riccati equations vectorSigmaPhiBack, t, failFlag, iter_i = \ ode.ode_rk23(riccati_eqs, tStart_j, tEnd_j, vectorSigmaPhiBackStart, integrateTol, integrateMaxIter, dynParA = dynParA, dynParF = dynParF, trackParQ = trackParQ, riccatiBInvRB = riccatiBInvRB, zTrackData = zTrackData, sigmaVectorDim = sigmaVectorDim, stateDim = stateDim) # records results with nicer format for forward integration, # note: do not record the starting point since it is given, not computed countRows = len(vectorSigmaPhiBack) riccatiResults = {"Time" : t[1:countRows], "countRows":countRows-1, "matrixSigma" : vectorSigmaPhiBack[1:countRows,0:sigmaVectorDim].reshape((countRows-1,stateDim,stateDim)), "vectorPhi" : vectorSigmaPhiBack[1:countRows,-stateDim:]} listRiccati[listRiccatiRow].append(riccatiResults) # update tStart_j and the start point, i.e. vectorSigmaPhiBackStart tStart_j = tEnd_j vectorSigmaPhiBackStart = vectorSigmaPhiBack[-1] # update tStart for outer loop i tStart = tEnd return listRiccati