def gdlyap(problem_data, K, L, show_warn=False, check_pd=False): """ Solve a discrete-time generalized Lyapunov equation for stochastic linear systems with multiplicative noise. """ problem_data_keys = [ 'A', 'B', 'C', 'Ai', 'Bj', 'Ck', 'varAi', 'varBj', 'varCk', 'Q', 'R', 'S' ] A, B, C, Ai, Bj, Ck, varAi, varBj, varCk, Q, R, S = [ problem_data[key] for key in problem_data_keys ] n = A.shape[1] stable = True # Compute matrix and vector for the linear equation solver Alin_P = np.eye(n * n) - cost_operator_P(problem_data, K, L) blin_P = vec(Q) + np.dot(kron(K.T), vec(R)) - np.dot(kron(L.T), vec(S)) # Solve linear equations xlin_P = la.solve(Alin_P, blin_P) # Reshape P = np.reshape(xlin_P, [n, n]) if check_pd: stable = is_pos_def(P) if not stable: P = None if show_warn: warnings.simplefilter('always', UserWarning) warn('System is possibly not mean-square stable') return P
def lse(self, L): """ Performs Least Squares Estimation with input data provided and returns the least squares estimate """ # Unpack the required dynamics data Q = self.Q R = self.R n = self.n m = self.m ell = self.ell kronA = self.kronA kronB = self.kronB # Perform least-squares estimation Q_est = np.copy(Q) R_est = np.copy(R) if self.unknown_params.Q and self.unknown_params.R: S = np.hstack([kronA, kronB]) vecTheta = mdot(la.pinv(S), vec(L)) vecQ_est = vecTheta[0:ell**2] vecR_est = vecTheta[ell**2:] Q_est = np.reshape(vecQ_est, [n, n]) R_est = np.reshape(vecR_est, [m, m]) elif not self.unknown_params.Q and self.unknown_params.R: S = np.copy(kronB) vecCW = mdot(kronA, vec(Q)) vecR_est = mdot(la.pinv(S), vec(L) - vecCW) R_est = np.reshape(vecR_est, [m, m]) elif self.unknown_params.Q and not self.unknown_params.R: S = np.copy(kronA) vecCV = mdot(kronB, vec(R)) vecQ_est = mdot(la.pinv(S), vec(L) - vecCV) Q_est = np.reshape(vecQ_est, [n, n]) return Q_est, R_est
def reshaper(X,m,n,p,q): Y = np.zeros([m*n,p*q]) k = 0 for j in range(n): for i in range(m): Y[k] = vec(X[i*p:(i+1)*p,j*q:(j+1)*q]) k += 1 return Y
def calc_sparsity(K, thresh, PGO): if PGO is None: regstr = 'vec1' else: if PGO.regularizer is None: regstr = 'vec1' else: regstr = PGO.regularizer.regstr # Calculate vals if regstr == 'vec1' or PGO.regularizer.regstr == 'vec_huber': vals = np.abs(vec(K)) elif regstr == 'mr': vals = np.abs(K).max(1) elif regstr == 'mc': vals = np.abs(K).max(0) elif regstr == 'glr' or regstr == 'glr_huber': vals = la.norm(K, ord=2, axis=1) elif regstr == 'glc' or regstr == 'glc_huber': vals = la.norm(K, ord=2, axis=0) binmax = np.max(vals) bin1 = thresh * binmax sparsity = np.sum(vals < bin1) / vals.size print('Sparsity = %.3f' % sparsity) # Calculate black and white sparsity matrix Kbw = np.zeros_like(K) if regstr == 'vec1' or regstr == 'vec_huber': Kbw = np.abs(K) > bin1 elif regstr == 'mr' or regstr == 'glr' or regstr == 'glr_huber': for i in range(K.shape[0]): if vals[i] > bin1: Kbw[i, :] = 1 elif regstr == 'mc' or regstr == 'glc' or regstr == 'glc_huber': for j in range(K.shape[1]): if vals[j] > bin1: Kbw[:, j] = 1 return vals, sparsity, binmax, bin1, Kbw
def example_system_erdos_renyi(n, diffusion_constant=1.0, leakiness_constant=0.1, time_constant=0.05, leaky=True, seed=None, detailed_outputs=False, dirname_out='.'): npr.seed(seed) # ER probability # crp = 7.0 # erp = (np.log(n+1)+crp)/(n+1) # almost surely connected prob=0.999 mean_degree = 4.0 # should be > 1 for giant component to exist erp = mean_degree / (n - 1.0) n_edges = 0 # Create random Erdos-Renyi graph # Adjacency matrix adjacency = np.zeros([n, n]) for i in range(n): for j in range(i + 1, n): if npr.rand() < erp: n_edges += 1 adjacency[i, j] = npr.randint(low=1, high=4) adjacency[j, i] = np.copy(adjacency[i, j]) # Degree matrix degree = np.diag(adjacency.sum(axis=0)) # Graph Laplacian laplacian = degree - adjacency # Continuous-time dynamics matrices Ac = -laplacian * diffusion_constant Bc = np.eye( n ) / time_constant # normalize just to make B = np.eye(n) later in discrete-time if leaky: Fc = leakiness_constant * np.eye(n) Ac = Ac - Fc # Plot visualize_graph_ring(adjacency, n, dirname_out) # Forward Euler discretization A = np.eye(n) + Ac * time_constant B = Bc * time_constant n = np.copy(n) m = np.copy(n) # Multiplicative noises varAi = 0.005 * npr.randint(low=1, high=5, size=n_edges) * np.ones(n_edges) Ai = np.zeros([n_edges, n, n]) k = 0 for i in range(n): for j in range(i + 1, n): if adjacency[i, j] > 0: Ai[k, i, i] = 1 Ai[k, j, j] = 1 Ai[k, i, j] = -1 Ai[k, j, i] = -1 k += 1 varBj = 0.05 * npr.randint(low=1, high=5, size=n) * np.ones(n) Bj = np.zeros([n, n, m]) for i in range(n): Bj[i, i, i] = 1 SigmaA = np.sum( [varAi[i] * np.outer(vec(Ai[i]), vec(Ai[i])) for i in range(n_edges)], axis=0) SigmaB = np.sum( [varBj[j] * np.outer(vec(Bj[j]), vec(Bj[j])) for j in range(n)], axis=0) if detailed_outputs: outputs = n, m, A, B, SigmaA, SigmaB, varAi, varBj, Ai, Bj else: outputs = n, m, A, B, SigmaA, SigmaB return outputs
def run_policy_gradient(SS, PGO, CSO=None): # run_policy_gradient Run policy gradient descent on a system # # Inputs: # SS is an LQRSysMult instance with an initial gain matrix K # PGO is a PolicyGradientOptions instance # # K1_subs is the subscripts of the first gain entry varied in surf plot # K2_subs is the subscripts of the first gain entry varied in surf plot # ax is the axes to plot in # # Outputs: # SS with closed-loop properties at the post-optimization configuration # histlist, a list of data histories over optimization iterations # TEMPORARY - MOVE INTO CLASS OPTIONS IF KEEPING AROUND Kmax = np.max(np.abs(vec(SS.K))) bin1 = 0.01 * Kmax # Initialize stop = False converged = False stop_early = False iterc = 0 sleep(0.5) t_start = time() headerstr = 'Iteration | Stop quant / threshold | Curr obj | Best obj | Norm of gain delta | Stepsize ' if PGO.regularizer is not None: headerstr = headerstr + '| Sparsity' print(headerstr) K = np.copy(SS.K) Kbest = np.copy(SS.K) objfun_best = np.inf Kold = np.copy(SS.K) P = SS.P S = SS.S fbest_repeats = 0 # Initialize history matrices if PGO.keep_hist: mat_shape = list(K.shape) mat_shape.append(PGO.max_iters) mat_shape = tuple(mat_shape) K_hist = np.full(mat_shape, np.inf) grad_hist = np.full(mat_shape, np.inf) c_hist = np.full(PGO.max_iters, np.inf) objfun_hist = np.full(PGO.max_iters, np.inf) rng = rngg() # Iterate while not stop: if PGO.exact: # Calculate gradient (G) # Do this to get combined calculation of P and S, # pass previous P and S to warm-start dlyap iterative algorithm SS.calc_PS(P, S) Glqr = SS.grad P = SS.P S = SS.S else: if any([ PGO.step_direction == 'gradient', PGO.step_direction == 'natural_gradient', PGO.step_direction == 'gauss_newton', PGO.step_direction == 'policy_iteration' ]): # Estimate gradient using zeroth-order optimization # Rollout length nt = 20 # Number of rollouts nr = 100000 # Exploration radius ru = 1e-1 # Draw random initial states x = rng.multivariate_normal(np.zeros(SS.n), SS.S0, nr) # Draw random gain deviations and scale to Frobenius norm ball Uraw = rng.normal(size=[nr, SS.m, SS.n]) U = ru * Uraw / la.norm(Uraw, 'fro', axis=(1, 2))[:, None, None] # Stack dynamics matrices into a 3D array Kd = K + U # Simulate all rollouts together c = np.zeros(nr) for t in range(nt): # Accumulate cost c += np.einsum('...i,...i', x, np.einsum('jk,...k', SS.QK, x)) # Calculate noisy closed-loop dynamics AKr = SS.A + np.einsum('...ik,...kj', SS.B, Kd) for i in range(SS.p): AKr += (SS.a[i]**0.5) * rng.randn( nr)[:, np.newaxis, np.newaxis] * np.repeat( SS.Aa[np.newaxis, :, :, i], nr, axis=0) for j in range(SS.q): AKr += np.einsum( '...ik,...kj', (SS.b[j]**0.5) * rng.randn(nr)[:, np.newaxis, np.newaxis] * np.repeat(SS.Bb[np.newaxis, :, :, j], nr, axis=0), Kd) # Transition the state x = np.einsum('...jk,...k', AKr, x) # Estimate gradient Glqr = np.einsum('i,i...', c, U) Glqr *= K.size / (nr * (ru**2)) # TESTING G_est = Glqr G_act = SS.grad print('estimated gradient: ') print(G_est) print('actual gradient: ') print(G_act) print('error angle') print( np.arccos( np.sum((G_est * G_act)) / (la.norm(G_est) * la.norm(G_act)))) print('error scale') print((la.norm(G_est) / la.norm(G_act))) #INWORK # if step_direction=='natural_gradient' or step_direction=='gauss_newton' or step_direction=='policy_iteration': # # Estimate S # if step_direction=='policy_iteration': # # Estimate R_K # # # NOTE!!!!! only valid for zero multiplicative noise # # for now - also need to estimate the noise terms? or # # are they known a priori? # # # Also need to finish the True "coarse-ID" estimation # # which takes uncertainty into account to ensure the # # estimated optimal gain is stabilizing so P_K is well # # defined # # # Also for the theory we need estimates of error of R_K # # # # Model-based estimation # [Ahat,Bhat] = lsqr_lti(SS) # P_Khat = dlyap_mult(Ahat,Bhat,SS.a,SS.Aa,SS.b,SS.Bb,SS.Q,SS.R,SS.S0,K) # R_K = SS.R + Bhat.T*P_Khat*Bhat # # # Model-free estimation # # Rollout length # nt = 20 # # # Number of rollouts # nr = 10000 # # # Random initial state standard deviation # xstd = 1 # # # Random control input standard deviation # ustd = 1 # # # Random disturbance input standard deviation # # wstd = 0.01 # wstd = 0 # # [~,H21hat,H22hat] = lsqr_lti_qfun(SS,xstd,ustd,wstd,nt,nr) # H22=H22hat # H21=H21hat # Calculate step direction (V) if PGO.regularizer is None or PGO.opt_method == 'proximal': G = Glqr else: Greg = PGO.regularizer.rgrad(SS.K) G = Glqr + PGO.regweight * Greg if PGO.regularizer is None or PGO.opt_method == 'proximal': if PGO.step_direction == 'gradient': V = G elif PGO.step_direction == 'natural_gradient': V = solveb(SS.grad, SS.S) elif PGO.step_direction == 'gauss_newton': V = solveb(la.solve(SS.RK, SS.grad), SS.S) else: if PGO.step_direction == 'gradient': V = G # # Variant 1 - seems more elegant but why should it work? # elif PGO.step_direction=='natural_gradient': # V = solveb(G,SS.S) # elif PGO.step_direction=='gauss_newton': # V = solveb(la.solve(SS.RK,G),SS.S) # # Variant 2 - seems more justifiable # elif PGO.step_direction=='natural_gradient': # V = solveb(SS.grad,SS.S) + PGO.regweight*PGO.regularizer.rgrad(SS.K) # elif PGO.step_direction=='gauss_newton': # V = solveb(la.solve(SS.RK,SS.grad),SS.S) + PGO.regweight*PGO.regularizer.rgrad(SS.K) # Check if mean-square stable if SS.c == np.inf: raise Exception('ITERATE WENT UNSTABLE DURING GRADIENT DESCENT') if PGO.regularizer is None: objfun = SS.c else: objfun = SS.c + PGO.regweight * PGO.regularizer.rfun(SS.K) # Record current iterate if PGO.keep_hist: K_hist[:, :, iterc] = SS.K grad_hist[:, :, iterc] = SS.grad c_hist[iterc] = SS.c objfun_hist[iterc] = objfun if iterc == 0: Kchange = np.inf else: Kchange = la.norm(K - Kold, 'fro') / la.norm(K, 'fro') Kold = K # Check for stopping condition if PGO.stop_crit == 'gradient': normgrad = la.norm(G) stop_quant = normgrad stop_thresh = PGO.epsilon if normgrad < PGO.epsilon: converged = True elif PGO.stop_crit == 'Kchange': stop_quant = Kchange stop_thresh = PGO.epsilon if Kchange < PGO.epsilon: converged = True elif PGO.stop_crit == 'fbest': stop_quant = fbest_repeats stop_thresh = PGO.fbest_repeat_max if fbest_repeats > PGO.fbest_repeat_max: converged = True elif PGO.stop_crit == 'fixed': stop_quant = iterc stop_thresh = PGO.max_iters if iterc >= PGO.max_iters - 1: stop_early = True else: iterc += 1 stop = converged or stop_early if PGO.display_output and PGO.regularizer is not None: Kmax = np.max(np.abs(vec(SS.K))) bin1 = 0.05 * Kmax sparsity = np.sum(np.abs(SS.K) < bin1) / SS.K.size # Record current best (subgradient method) if objfun < objfun_best: objfun_best = objfun Kbest = SS.K fbest_repeats = 0 else: fbest_repeats += 1 # Update iterate if PGO.opt_method == 'gradient': if PGO.step_direction == 'policy_iteration': eta = 0.5 # for printing only H21 = la.multi_dot([SS.B.T, SS.P, SS.A]) H22 = SS.RK if PGO.regularizer is None: K = -la.solve(H22, H21) SS.setK(K) else: if PGO.stepsize_method == 'constant': K = -la.solve( H22, H21 ) - PGO.eta * PGO.regweight * Greg # This might not work, it is sequential GN then grad desc regularizer SS.setK(K) elif PGO.stepsize_method == 'backtrack': raise Exception( "Invalid stepsize option, choose constant") else: # Calculate step size if PGO.stepsize_method == 'constant': eta = PGO.eta K = SS.K - eta * V SS.setK(K) elif PGO.stepsize_method == 'backtrack': SS, eta = backtrack(SS, PGO.regweight, PGO.regularizer, G, -V, eta0=PGO.eta) K = SS.K elif PGO.stepsize_method == 'square_summable': # INWORK eta = PGO.eta / (1.0 + iterc) K = SS.K - eta * V SS.setK(K) elif PGO.opt_method == 'proximal': # Gradient step on LQR cost if PGO.step_direction == 'policy_iteration': eta = 0.5 # for printing only H21 = la.multi_dot([SS.B.T, SS.P, SS.A]) H22 = SS.RK K = -la.solve(H22, H21) SS.setK(K) else: # Calculate step size if PGO.stepsize_method == 'constant': eta = PGO.eta K = SS.K - eta * V SS.setK(K) elif PGO.stepsize_method == 'backtrack': SS, eta = backtrack(SS, PGO.regweight, PGO.regularizer, G, -V, eta0=PGO.eta) K = SS.K elif PGO.stepsize_method == 'square_summable': # INWORK eta = PGO.eta / (1.0 + iterc) K = SS.K - eta * V SS.setK(K) # Prox step on regularizer K = prox(SS, PGO) SS.setK(K) if hasattr(PGO, 'slow'): if PGO.slow is not None: sleep(PGO.slow) # Printing if PGO.display_output: # Print iterate messages printstr0 = "{0:9d}".format(iterc + 1) printstr1 = " {0:5.3e} / {1:5.3e}".format(stop_quant, stop_thresh) printstr2a = "{0:5.3e}".format(objfun) printstr2b = "{0:5.3e}".format(objfun_best) printstr3 = " {0:5.3e}".format(Kchange) printstr4 = "{0:5.3e}".format(eta) printstr = printstr0 + ' | ' + printstr1 + ' | ' + printstr2a + ' | ' + printstr2b + ' | ' + printstr3 + ' | ' + printstr4 if PGO.regularizer is not None: printstr5 = "{0:6.2f}%".format(100 * sparsity) printstr = printstr + ' | ' + printstr5 if PGO.display_inplace: if iterc == 0: print(" " * len(printstr), end='') inplace_print(printstr) else: print(printstr) if stop: # Print stopping messages print('') if converged: print('Optimization converged, stopping now') if stop_early: # warnings.simplefilter('always', UserWarning) # warn('Max iterations exceeded, stopping optimization early') print('Max iterations exceeded, stopping optimization') if PGO.keep_hist: # Trim empty parts from preallocation K_hist = K_hist[:, :, 0:iterc + 1] grad_hist = grad_hist[:, :, 0:iterc + 1] c_hist = c_hist[0:iterc + 1] objfun_hist = objfun_hist[0:iterc + 1] else: K_hist = None grad_hist = None c_hist = None objfun_hist = None if PGO.keep_opt == 'best': SS.setK(Kbest) t_end = time() hist_list = [K_hist, grad_hist, c_hist, objfun_hist] print( 'Policy gradient descent optimization completed after %d iterations, %.3f seconds' % (iterc, t_end - t_start)) return SS, hist_list
def regularizer_grad(K, regstr, mu=0, soft=False, thresh1=0, thresh2=0): # VECTOR NORMS if regstr == 'vec1': # Vector 1-norm grad = np.sign(K) if regstr == 'vec2': # Vector 1-norm grad = K / la.norm(K, 'fro') if regstr == 'vecinf': # Vector inf-norm if not soft: grad = infnormgrad(K) else: grad = infnormgrad(K, thresh1) # HUBER VECTOR NORMS if regstr == 'vec_huber': # Vector Huber-norm grad = softsign(K, thresh2) # SQUARED VECTOR NORMS if regstr == 'vec1sq': # Squared vector 1-norm grad = 2 * la.norm(vec(K), ord=1) * np.sign(K) if regstr == 'vec2sq': # Squared vector 2-norm grad = 2 * K if regstr == 'vecinfsq': # Squared vector inf-norm if not soft: grad = infnormgradsq(K) else: grad = infnormgradsq(K, thresh1) if regstr == 'vec_hubersq': # Squared vector Huber-norm grad = 2 * huber_norm(thresh2, K) * softsign(K, thresh2) # MATRIX NORMS if regstr == 'mr': # Row norm grad = np.zeros_like(K) for i in range(K.shape[0]): if not soft: grad[i, :] = infnormgrad(K[i, :]) else: grad[i, :] = infnormgrad(K[i, :], thresh1) if regstr == 'mc': # Column norm grad = np.zeros_like(K) for j in range(K.shape[1]): if not soft: grad[:, j] = infnormgrad(K[:, j]) else: grad[:, j] = infnormgrad(K[:, j], thresh1) if regstr == 'glr': # Group lasso on rows grad = np.zeros_like(K) for i in range(K.shape[0]): grad[i, :] = K[i, :] / la.norm(K[i, :], 2) if regstr == 'glc': # Group lasso on columns grad = np.zeros_like(K) for j in range(K.shape[1]): grad[:, j] = K[:, j] / la.norm(K[:, j], 2) if regstr == 'sglr': # Sparse group lasso on rows grad = (1 - mu) * regularizer_grad(K, 'vec1') + mu * regularizer_grad( K, 'glr') if regstr == 'sglc': # Sparse group lasso on columns grad = (1 - mu) * regularizer_grad(K, 'vec1') + mu * regularizer_grad( K, 'glc') # HUBER MATRIX NORMS if regstr == 'mr_huber': grad = np.zeros_like(K) for i in range(K.shape[0]): a = la.norm(K[i, :], np.inf) if a > thresh2: grad[i, :] = infnormgrad(K[i, :]) else: grad[i, :] = (0.5 / thresh2) * infnormgradsq(K[i, :]) if regstr == 'glr_huber': grad = np.zeros_like(K) for i in range(K.shape[0]): a = la.norm(K[i, :], 2) if a > thresh2: grad[i, :] = K[i, :] / a else: grad[i, :] = (1 / thresh2) * K[i, :] if regstr == 'glc_huber': grad = np.zeros_like(K) for j in range(K.shape[1]): a = la.norm(K[:, j], 2) if a > thresh2: grad[:, j] = K[:, j] / a else: grad[:, j] = K[:, j] / thresh2 # SQUARED MATRIX NORMS if regstr == 'mrsq': # Squared row norm grad = np.zeros_like(K) subs = [] subsmax = [] for i, im in enumerate(np.argmax(K, axis=1)): subsmax.append((i, im)) if not soft: subs = subsmax.copy() else: for i, row in enumerate(K): for j, colj in enumerate(row): if colj >= (1 - thresh1) * K[subsmax[i]]: subs.append((i, j)) for sb in subs: if not soft: grad[sb] = 2 * regularizer_fun(K, 'mr') * softsignscalar(K[sb]) else: grad[sb] = 2 * regularizer_fun(K, 'mr') * softsignscalar( K[sb], thresh2) return grad
# Reprocess system parameters to match format used in sys-id code n = np.copy(SS.n) m = np.copy(SS.m) A = np.copy(SS.A) B = np.copy(SS.B) varAi = np.copy(SS.a) varBj = np.copy(SS.b) Aa = SS.Aa Bb = SS.Bb Ai = np.moveaxis(SS.Aa, 2, 0) Bj = np.moveaxis(SS.Bb, 2, 0) SigmaA = np.sum([varAi[i]*np.outer(vec(Ai[i]), vec(Ai[i])) for i in range(SS.p)], axis=0) SigmaB = np.sum([varBj[j]*np.outer(vec(Bj[j]), vec(Bj[j])) for j in range(SS.q)], axis=0) Q = np.copy(SS.Q) R = np.copy(SS.R) # Rollout length (same as pol-grad) ell = 20 # Number of rollouts # Max number of rollouts = nr*num_iters from pol-grad experiment nr = 1000 num_iters = 200 ns = 10 # Number of experiments/trials ne = 20
def run(self, T): """ Perform adaptive Kalman filter iterations Input Parameters: T: Integer of maximum filter iterations """ F = self.F H = self.H Q = self.Q R = self.R n = self.n m = self.m p = self.p Mopi = self.Mopi x_mean0 = self.x_mean0 x_covr0 = self.x_covr0 Q_est0 = self.Q_est0 R_est0 = self.R_est0 L0 = self.L0 # Preallocate history data arrays x_hist = np.full((T + 1, n), np.nan) y_hist = np.full((T + 1, m), np.nan) x_pre_hist = np.full((T + 1, n), np.nan) x_post_hist = np.full((T + 1, n), np.nan) P_pre_hist = np.full((T + 1, n, n), np.nan) P_post_hist = np.full((T + 1, n, n), np.nan) K_hist = np.full((T + 1, n, m), np.nan) Q_est_hist = np.full((T + 1, n, n), np.nan) R_est_hist = np.full((T + 1, m, m), np.nan) # Initialize the iterates x_post = x_mean0 P_post = x_covr0 Q_est = Q_est0 R_est = R_est0 L = L0 x = npr.multivariate_normal(x_mean0, x_covr0) x_hist[0] = x x_post_hist[0] = x_post P_post_hist[0] = P_post # Perform dynamic adaptive Kalman filter updates for k in range(T): # Print the iteration number print("k = %9d / %d" % (k + 1, T)) # Generate a new multivariate Gaussian measurement noise v = npr.multivariate_normal(np.zeros(m), R) # Collect and store a new measurement y = mdot(H, x) + v y_hist[k] = y # Noise covariance estimation if k > p - 1: # Collect measurement till 'k-1' time steps Yold = vec(y_hist[np.arange(k - 1, k - p - 1, -1)].T) # Collect measurement till 'k' time steps Ynew = vec(y_hist[np.arange(k, k - p, -1)].T) # Formulate a linear stationary time series Z = mdot(Mopi, Ynew) - mdot(F, Mopi, Yold) # Recursive covariance unbiased estimator L = ((k - 1) / k) * L + (1 / k) * np.outer(Z, Z) # Get the least squares estimate of selected covariances Q_est_new, R_est_new = self.lse(L) # Check positive semidefiniteness of estimated covariances if is_pos_def(Q_est_new): Q_est = np.copy(Q_est_new) if is_pos_def(R_est_new): R_est = np.copy(R_est_new) # Update the covariance estimate history Q_est_hist[k] = Q_est R_est_hist[k] = R_est ## Update state estimates using standard Kalman filter equations # Calculate the a priori state estimate x_pre = mdot(F, x_post) # Calculate the a priori error covariance estimate P_pre = mdot(F, P_post, F.T) + Q_est # Calculate the Kalman gain K = solveb(mdot(P_pre, H.T), mdot(H, P_pre, H.T) + R_est) # Calculate the a posteriori state estimate x_post = x_pre + mdot(K, y - mdot(H, x_pre)) # Calculate the a posteriori error covariance estimate IKH = np.eye(n) - mdot(K, H) P_post = mdot(IKH, P_pre, IKH.T) + mdot(K, R, K.T) # Store the histories x_pre_hist[k + 1] = x_pre x_post_hist[k + 1] = x_post P_pre_hist[k + 1] = P_pre P_post_hist[k + 1] = P_post K_hist[k + 1] = K # True system updates (true state transition and measurement) # Generate process noise w = npr.multivariate_normal(np.zeros(n), Q) # Update and store the state x = mdot(F, x) + w x_hist[k + 1] = x # Tie up loose ends y_hist[-1] = y x_pre_hist[0] = x_post_hist[0] P_pre_hist[0] = P_post_hist[0] K_hist[0] = K_hist[1] Q_est_hist[-1] = Q_est R_est_hist[-1] = R_est # Return data history data_hist = DataHist(T, Q_est_hist, R_est_hist, x_hist, x_pre_hist, x_post_hist, P_pre_hist, P_post_hist) return data_hist
def estimate_model(n, m, A, B, SigmaA, SigmaB, nr, ell, x_hist, u_mean_hist, u_covr_hist, display_estimates=False, AB_known=False): muhat_hist = np.zeros([ell+1, n]) Xhat_hist = np.zeros([ell+1, n*n]) What_hist = np.zeros([ell+1, n*m]) # First stage: mean dynamics parameter estimation if AB_known: Ahat = np.copy(A) Bhat = np.copy(B) else: # Form data matrices for least-squares estimation for t in range(ell+1): muhat_hist[t] = (1/nr)*np.sum(x_hist[t], axis=0) Xhat_hist[t] = (1/nr)*vec(np.sum(np.einsum('...i,...j', x_hist[t], x_hist[t]), axis=0)) if t < ell: # What_hist[t] = (1/nr)*vec(np.sum(np.einsum('...i,...j',x_hist[t],u_mean_hist[t]),axis=0)) What_hist[t] = vec(np.outer(muhat_hist[t], u_mean_hist[t])) Y = muhat_hist[1:].T Z = np.vstack([muhat_hist[0:-1].T, u_mean_hist.T]) # Solve least-squares problem # Thetahat = mdot(Y, Z.T, la.pinv(mdot(Z, Z.T))) Thetahat = la.lstsq(Z.T, Y.T, rcond=None)[0].T # Split learned model parameters Ahat = Thetahat[:,0:n] Bhat = Thetahat[:,n:n+m] AAhat = np.kron(Ahat, Ahat) ABhat = np.kron(Ahat, Bhat) BAhat = np.kron(Bhat, Ahat) BBhat = np.kron(Bhat, Bhat) # Second stage: covariance dynamics parameter estimation # Form data matrices for least-squares estimation C = np.zeros([ell, n*n]).T Uhat_hist = np.zeros([ell, m*m]) for t in range(ell): Uhat_hist[t] = vec(u_covr_hist[t] + np.outer(u_mean_hist[t], u_mean_hist[t])) Cminus = mdot(AAhat,Xhat_hist[t])+mdot(BAhat,What_hist[t])+mdot(ABhat,What_hist[t].T)+mdot(BBhat,Uhat_hist[t]) C[:,t] = Xhat_hist[t+1] - Cminus D = np.vstack([Xhat_hist[0:-1].T, Uhat_hist.T]) # Solve least-squares problem # SigmaThetahat_prime = mdot(C, D.T, la.pinv(mdot(D,D.T))) SigmaThetahat_prime = la.lstsq(D.T, C.T, rcond=None)[0].T # Split learned model parameters SigmaAhat_prime = SigmaThetahat_prime[:, 0:n*n] SigmaBhat_prime = SigmaThetahat_prime[:, n*n:n*(n+m)] # Reshape and project the noise covariance estimates onto the semidefinite cone SigmaAhat = reshaper(SigmaAhat_prime, n, n, n, n) SigmaBhat = reshaper(SigmaBhat_prime, n, m, n, m) SigmaAhat = positive_semidefinite_part(SigmaAhat) SigmaBhat = positive_semidefinite_part(SigmaBhat) if display_estimates: prettyprint(Ahat, "Ahat") prettyprint(A, "A ") prettyprint(Bhat, "Bhat") prettyprint(B, "B ") prettyprint(SigmaAhat, "SigmaAhat") prettyprint(SigmaA, "SigmaA ") prettyprint(SigmaBhat, "SigmaBhat") prettyprint(SigmaB, "SigmaB ") return Ahat, Bhat, SigmaAhat, SigmaBhat
def estimate_model_var_only(n, m, A, B, SigmaA, SigmaB, varAi, varBj, Ai, Bj, nr, ell, x_hist, u_mean_hist, u_covr_hist, display_estimates=False, AB_known=False, detailed_outputs=True): muhat_hist = np.zeros([ell+1, n]) Xhat_hist = np.zeros([ell+1, n*n]) What_hist = np.zeros([ell+1, n*m]) # First stage: mean dynamics parameter estimation if AB_known: Ahat = np.copy(A) Bhat = np.copy(B) else: # Form data matrices for least-squares estimation for t in range(ell+1): muhat_hist[t] = (1/nr)*np.sum(x_hist[t], axis=0) Xhat_hist[t] = (1/nr)*vec(np.sum(np.einsum('...i,...j', x_hist[t], x_hist[t]), axis=0)) if t < ell: # What_hist[t] = (1/nr)*vec(np.sum(np.einsum('...i,...j',x_hist[t],u_mean_hist[t]),axis=0)) What_hist[t] = vec(np.outer(muhat_hist[t], u_mean_hist[t])) Y = muhat_hist[1:].T Z = np.vstack([muhat_hist[0:-1].T, u_mean_hist.T]) # Solve least-squares problem # Thetahat = mdot(Y, Z.T, la.pinv(mdot(Z, Z.T))) Thetahat = la.lstsq(Z.T, Y.T, rcond=None)[0].T # Split learned model parameters Ahat = Thetahat[:,0:n] Bhat = Thetahat[:,n:n+m] AAhat = np.kron(Ahat, Ahat) ABhat = np.kron(Ahat, Bhat) BAhat = np.kron(Bhat, Ahat) BBhat = np.kron(Bhat, Bhat) # Second stage: covariance dynamics parameter estimation # Form data matrices for least-squares estimation C = np.zeros([ell, n*n]).T Uhat_hist = np.zeros([ell, m*m]) for t in range(ell): Uhat_hist[t] = vec(u_covr_hist[t] + np.outer(u_mean_hist[t], u_mean_hist[t])) Cminus = mdot(AAhat,Xhat_hist[t])+mdot(BAhat,What_hist[t])+mdot(ABhat,What_hist[t].T)+mdot(BBhat,Uhat_hist[t]) C[:,t] = Xhat_hist[t+1] - Cminus C = vec(C) X1 = Xhat_hist[0:-1].T U1 = Uhat_hist.T D1 = np.vstack([vec(np.dot(np.kron(Ai[i], Ai[i]), X1)) for i in range(np.size(varAi))]) D2 = np.vstack([vec(np.dot(np.kron(Bj[j], Bj[j]), U1)) for j in range(np.size(varBj))]) D = np.vstack([D1, D2]) # Solve least-squares problem # var_hat = mdot(C, D.T, la.pinv(mdot(D,D.T))) # var_hat = mdot(la.pinv(mdot(D, D.T)), D, C) var_hat = la.lstsq(D.T, C, rcond=None)[0] varAi_hat = np.maximum(var_hat[0:np.size(varAi)], 0) varBj_hat = np.maximum(var_hat[np.size(varAi):], 0) SigmaAhat = np.sum([varAi_hat[i]*np.outer(vec(Ai[i]), vec(Ai[i])) for i in range(np.size(varAi))], axis=0) SigmaBhat = np.sum([varBj_hat[j]*np.outer(vec(Bj[j]), vec(Bj[j])) for j in range(np.size(varBj))], axis=0) if display_estimates: prettyprint(Ahat, "Ahat") prettyprint(A, "A ") prettyprint(Bhat, "Bhat") prettyprint(B, "B ") prettyprint(SigmaAhat, "SigmaAhat") prettyprint(SigmaA, "SigmaA ") prettyprint(SigmaBhat, "SigmaBhat") prettyprint(SigmaB, "SigmaB ") if detailed_outputs: outputs = Ahat, Bhat, SigmaAhat, SigmaBhat, varAi_hat, varBj_hat else: outputs = Ahat, Bhat, SigmaAhat, SigmaBhat return outputs
def dlyap_mult(A, B, K, a, Aa, b, Bb, Q, R, S0, matrixtype='P', algo='iterative', show_warn=False, check_pd=False, P00=None, S00=None): n = A.shape[1] n2 = n * n p = len(a) q = len(b) AK = A + np.dot(B, K) stable = True stable2 = True if algo == 'linsolve': if matrixtype == 'P': # Intermediate terms Aunc_P = np.zeros([n2, n2]) for i in range(p): Aunc_P = Aunc_P + a[i] * kron(Aa[:, :, i].T) BKunc_P = np.zeros([n2, n2]) for j in range(q): BKunc_P = BKunc_P + b[j] * kron(np.dot(K.T, Bb[:, :, j].T)) # Compute matrix and vector for the linear equation solver Alin_P = np.eye(n2) - kron(AK.T) - Aunc_P - BKunc_P blin_P = vec(Q) + np.dot(kron(K.T), vec(R)) # Solve linear equations xlin_P = la.solve(Alin_P, blin_P) # Reshape P = np.reshape(xlin_P, [n, n]) if check_pd: stable = is_pos_def(P) elif matrixtype == 'S': # Intermediate terms Aunc_S = np.zeros([n2, n2]) for i in range(p): Aunc_S = Aunc_S + a[i] * kron(Aa[:, :, i]) BKunc_S = np.zeros([n2, n2]) for j in range(q): BKunc_S = BKunc_S + b[j] * kron(np.dot(Bb[:, :, j], K)) # Compute matrix and vector for the linear equation solver Alin_S = np.eye(n2) - kron(AK) - Aunc_S - BKunc_S blin_S = vec(S0) # Solve linear equations xlin_S = la.solve(Alin_S, blin_S) # Reshape S = np.reshape(xlin_S, [n, n]) if check_pd: stable = is_pos_def(S) elif matrixtype == 'PS': P = dlyap_mult(A, B, K, a, Aa, b, Bb, Q, R, S0, matrixtype='P', algo='linsolve') S = dlyap_mult(A, B, K, a, Aa, b, Bb, Q, R, S0, matrixtype='S', algo='linsolve') elif algo == 'iterative': # Implicit iterative solution to generalized discrete Lyapunov equation # Inspired by https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=7553367 # In turn inspired by https://pdf.sciencedirectassets.com/271503/1-s2.0-S0898122100X0020X/1-s2.0-089812219500119J/main.pdf?x-amz-security-token=AgoJb3JpZ2luX2VjECgaCXVzLWVhc3QtMSJIMEYCIQD#2F00Re8b3wnBnFpZQrjkOeXrNI4bYZ1J6#2F9BcJptZYAAIhAOQjTsZX573uFFEr7QveHx4NaZYWxlZfRN6hr5h1GJWWKuMDCOD#2F#2F#2F#2F#2F#2F#2F#2F#2F#2FwEQAhoMMDU5MDAzNTQ2ODY1IgxqkGe6i8wGmEj6YAwqtwNDKbotYDExP2D6PO8MrlIKYmHCtJhTu1CXLv0N5NKsYT90H2rJTNU0MvqsUsnXtbn6C9t9ed31XTf#2BHc7KrGmpOils7zgrjV1QG4LP0Fu2OcT4#2F#2FOGLWNvVjWY9gOLEHSeG5LhvBbxJiZVrI#2Bm1QAIVz5dxH5DVB27A2e9OmRrswrpPWuxQV#2BUvLkz2dVM4qSkvaDA#2F3KEJk9s0XE74mjO4ZHX7d9Q2aYwxsvFbII6Hms#2FZmB6125tBTwzd0K5xDit5kaoiYadOetp3M#2FvCdaiO0QeQwkV4#2FUaprOIIQGwJaMJuMNe7xInQxF#2B#2FmER81JhWEpBHBmz#2F5p0d2tU7F2oTDc2OR#2BV5dTKab47zgUw648fDT7ays0TQzqTMGnGcX9wIQpxSCam2E8Bhg6tsEs0#2FudddgnsiId368q70xai6ucMfabMSCqnv7O0OZqPVwY5b7qk4mxKIehpIzV6rrtXSAGrH95WGlgGz#2Fhmg9Qq6AUtb8NSqyYw0uZ00E#2FPZmNTnI3nwxjOA5qhyEbw3uXogRwYrv0dLkd50s7oO3mlYFeJDBurhx11t9p94dFqQq7sDY70m#2F4xMNCcmuUFOrMBY1JZuqtQ7QFBVbgzV#2B4xSHV6#2FyD#2F4ezttczZY3eSASJpdC4rjYHXcliiE7KOBHivchFZMIYeF3J4Nvn6UykX5sNfRANC2BDPrgoCQUp95IE5kgYGB8iEISlp40ahVXK62GhEASJxMjJTI9cJ2M#2Ff#2BJkwmqAGjTsBwjxkgiLlHc63rBAEJ2e7xoTwDDql3FSSYcvKzwioLfet#2FvXWvjPzz44tB3#2BTvYamM0uq47XPlUFcTrw#3D&AWSAccessKeyId=ASIAQ3PHCVTYWXNG3EKG&Expires=1554423148&Signature=Ysi80usGGEjPCvw#2BENTSD90NgVs#3D&hash=e5cf30dad62b0b57d7b7f5ba524cccacdbb36d2f747746e7fbebb7717b415820&host=68042c943591013ac2b2430a89b270f6af2c76d8dfd086a07176afe7c76c2c61&pii=089812219500119J&tid=spdf-a9dae0e9-65fd-4f31-bf3f-e0952eb4176c&sid=5c8c88eb95ed9742632ae57532a4a6e1c6b1gxrqa&type=client # Faster for large systems i.e. >50 states # Options max_iters = 1000 epsilon_P = 1e-5 epsilon_S = 1e-5 # Initialize if matrixtype == 'P' or matrixtype == 'PS': if P00 is None: P = np.copy(Q) else: P = P00 if matrixtype == 'S' or matrixtype == 'PS': if S00 is None: S = np.copy(S0) else: S = S00 iterc = 0 converged = False stop = False while not stop: if matrixtype == 'P' or matrixtype == 'PS': P_prev = P APAunc = np.zeros([n, n]) for i in range(p): APAunc += a[i] * mdot(Aa[:, :, i].T, P, Aa[:, :, i]) BPBunc = np.zeros([n, n]) for j in range(q): BPBunc += b[j] * mdot(K.T, Bb[:, :, j].T, P, Bb[:, :, j], K) AAP = AK.T QQP = sympart(Q + mdot(K.T, R, K) + APAunc + BPBunc) P = dlyap(AAP, QQP) if np.any(np.isnan(P)) or np.any( np.isinf(P)) or not is_pos_def(P): stable = False try: converged_P = la.norm(P - P_prev, 2) / la.norm( P_prev, 2) < epsilon_P stable2 = True except: # print(P) # print(P_prev) # print(P-P_prev) # print(la.norm()) stable2 = False # print('') if matrixtype == 'S' or matrixtype == 'PS': S_prev = S ASAunc = np.zeros([n, n]) for i in range(p): ASAunc += a[i] * mdot(Aa[:, :, i], S, Aa[:, :, i].T) BSBunc = np.zeros([n, n]) for j in range(q): BSBunc = b[j] * mdot(Bb[:, :, j], K, S, K.T, Bb[:, :, j].T) AAS = AK QQS = sympart(S0 + ASAunc + BSBunc) S = dlyap(AAS, QQS) if np.any(np.isnan(S)) or not is_pos_def(S): stable = False converged_S = la.norm(S - S_prev, 2) / la.norm(S, 2) < epsilon_S # Check for stopping condition if matrixtype == 'P': converged = converged_P elif matrixtype == 'S': converged = converged_S elif matrixtype == 'PS': converged = converged_P and converged_S if iterc >= max_iters: stable = False else: iterc += 1 stop = converged or not stable or not stable2 # print('\ndlyap iters = %s' % str(iterc)) elif algo == 'finite_horizon': P = np.copy(Q) Pt = np.copy(Q) S = np.copy(Q) St = np.copy(Q) converged = False stop = False while not stop: if matrixtype == 'P' or matrixtype == 'PS': APAunc = np.zeros([n, n]) for i in range(p): APAunc += a[i] * mdot(Aa[:, :, i].T, Pt, Aa[:, :, i]) BPBunc = np.zeros([n, n]) for j in range(q): BPBunc += b[j] * mdot(K.T, Bb[:, :, j].T, Pt, Bb[:, :, j], K) Pt = mdot(AK.T, Pt, AK) + APAunc + BPBunc P += Pt converged_P = np.abs(Pt).sum() < 1e-15 stable = np.abs(P).sum() < 1e10 if matrixtype == 'S' or matrixtype == 'PS': ASAunc = np.zeros([n, n]) for i in range(p): ASAunc += a[i] * mdot(Aa[:, :, i], St, Aa[:, :, i].T) BSBunc = np.zeros([n, n]) for j in range(q): BSBunc = b[j] * mdot(Bb[:, :, j], K, St, K.T, Bb[:, :, j].T) St = mdot(AK, Pt, AK.T) + ASAunc + BSBunc S += St converged_S = np.abs(St).sum() < 1e-15 stable = np.abs(S).sum() < 1e10 if matrixtype == 'P': converged = converged_P elif matrixtype == 'S': converged = converged_S elif matrixtype == 'PS': converged = converged_P and converged_S stop = converged or not stable if not stable: P = None S = None if show_warn: warnings.simplefilter('always', UserWarning) warn('System is possibly not mean-square stable') if matrixtype == 'P': return P elif matrixtype == 'S': return S elif matrixtype == 'PS': return P, S