def gen_rand_ABC(n=4, m=3, p=2, rho=None, seed=1): npr.seed(seed) if rho is None: rho = 0.9 A = npr.randn(n, n).round(1) A = A * (rho / specrad(A)) B = npr.rand(n, m).round(1) C = npr.rand(n, p).round(1) return A, B, C
def random_system(n=2, m=1, seed=0): npr.seed(seed) A = npr.randn(n, n) A = A * 0.8 / specrad(A) B = npr.randn(n, m) SigmaA_basevec = 0.1 * npr.randn(n * n) SigmaB_basevec = 0.1 * npr.randn(n * m) SigmaA = np.outer(SigmaA_basevec, SigmaA_basevec) SigmaB = np.outer(SigmaB_basevec, SigmaB_basevec) return n, m, A, B, SigmaA, SigmaB
def check_random_systems(K, eta, R, bound_type): s = np.zeros(R) if bound_type == "unidirectional": r = np.linspace(0, 1, R) elif bound_type == "bidirectional": r = np.linspace(-1, 1, R) for j in range(R): Arand = np.copy(A) for i in range(p): Arand += eta[i]*r[j]*Ai[i] s[j] = specrad(Arand + B_true@K) return s
def model_based_robust_stabilization_experiment(): seed = 1 npr.seed(seed) problem_data_true, problem_data = gen_double_spring_mass() problem_data_keys = [ 'A', 'B', 'C', 'Ai', 'Bj', 'Ck', 'varAi', 'varBj', 'varCk', 'Q', 'R', 'S' ] A, B, C, Ai, Bj, Ck, varAi, varBj, varCk, Q, R, S = [ problem_data[key] for key in problem_data_keys ] n, m, p = [M.shape[1] for M in [A, B, C]] q, r, s = [M.shape[0] for M in [Ai, Bj, Ck]] # Synthesize controllers using various uncertainty modeling terms # Modify problem data_files # LQR w/ game adversary # Setting varAi, varBj, varCk = 0 => no multiplicative noise on the game problem_data_model_n = copy.deepcopy(problem_data) problem_data_model_n['varAi'] *= 0 problem_data_model_n['varBj'] *= 0 problem_data_model_n['varCk'] *= 0 # LQR w/ multiplicative noise # Setting C = 0 and varCk = 0 => no game adversary problem_data_model_m = copy.deepcopy(problem_data) problem_data_model_m['C'] *= 0 problem_data_model_m['varCk'] *= 0 # Simulation options sim_options = None num_iterations = 50 problem_data_known = True # Policy iteration on LQR w/ game adversary and multiplicative noise K0, L0 = get_initial_gains(problem_data, initial_gain_method='dare') print("LQR w/ game adversary and multiplicative noise") P_pi, K_pi, L_pi, H_pi, P_history_pi, K_history_pi, L_history_pi, c_history_pi, H_history_pi = policy_iteration( problem_data, problem_data_known, K0, L0, sim_options, num_iterations) verify_gare(problem_data, P_pi, algo_str='Policy iteration - Game w/ Multiplicative noise') # Check concavity condition Qvv_pi = -S + mdot(C.T, P_pi, C) + np.sum( [varCk[k] * mdot(Ck[k].T, P_pi, Ck[k]) for k in range(s)], axis=0) if not is_pos_def(-Qvv_pi): raise Exception( 'Problem fails the concavity condition, adjust adversary strength') # Check positive definiteness condition QKL_pi = Q + mdot(K_pi.T, R, K_pi) - mdot(L_pi.T, S, L_pi) if not is_pos_def(QKL_pi): raise Exception( 'Problem fails the positive definiteness condition, adjust adversary strength' ) print(QKL_pi) # Policy Iteration on LQR w/ game adversary K0n, L0n = get_initial_gains(problem_data_model_n, initial_gain_method='dare') print("LQR w/ game adversary") Pn_pi, Kn_pi, Ln_pi, Hn_pi, Pn_history_pi, Kn_history_pi, Ln_history_pi, cn_history_pi, Hn_history_pi = policy_iteration( problem_data_model_n, problem_data_known, K0n, L0n, sim_options, num_iterations) verify_gare(problem_data_model_n, Pn_pi, algo_str='Policy iteration - Game w/o Multiplicative noise') # Policy Iteration on LQR w/ multiplicative noise K0m, L0m = get_initial_gains(problem_data_model_m, initial_gain_method='dare') print("LQR w/ multiplicative noise") Pm_pi, Km_pi, Lm_pi, Hm_pi, Pm_history_pi, Km_history_pi, Lm_history_pi, cm_history_pi, Hm_history_pi = policy_iteration( problem_data_model_m, problem_data_known, K0m, L0m, sim_options, num_iterations) verify_gare(problem_data_model_m, Pm_pi, algo_str='Policy iteration - LQR w/ Multiplicative noise') # LQR on true system A_true, B_true, Q_true, R_true = [ problem_data_true[key] for key in ['A', 'B', 'Q', 'R'] ] n_true, m_true = [M.shape[1] for M in [A_true, B_true]] Pare_true, Kare_true = dare_gain(A_true, B_true, Q_true, R_true) # LQR on nominal system, no explicit robust control design Pce, Kce = dare_gain(A, B, Q, R) # Check if synthesized controllers stabilize the true system K_pi_true = np.hstack([K_pi, np.zeros([m, n])]) Kn_pi_true = np.hstack([Kn_pi, np.zeros([m, n])]) Km_pi_true = np.hstack([Km_pi, np.zeros([m, n])]) Kce_true = np.hstack([Kce, np.zeros([m, n])]) Kol_true = np.zeros_like(Kce_true) control_method_strings = [ 'open-loop ', 'cert equiv ', 'noise ', 'game ', 'noise + game ', 'optimal ' ] K_list = [Kol_true, Kce_true, Km_pi_true, Kn_pi_true, K_pi_true, Kare_true] AK_list = [A_true + np.dot(B_true, K) for K in K_list] QK_list = [Q_true + mdot(K.T, R_true, K) for K in K_list] specrad_list = [specrad(AK) for AK in AK_list] cost_list = [ np.trace(dlyap(AK.T, QK)) if sr < 1 else np.inf for AK, QK, sr in zip(AK_list, QK_list, specrad_list) ] set_numpy_decimal_places(1) output_text_filename = 'results_model_based_robust_stabilization_experiment.txt' output_text_path = os.path.join('..', 'results', output_text_filename) with open(output_text_path, 'w') as f: header_str = 'method | specrad | cost | gains' print(header_str, file=f) for control_method_string, sr, cost, K in zip(control_method_strings, specrad_list, cost_list, K_list): line_str = '%s %.3f %8s %s' % (control_method_string, sr, '%10.0f' % cost, K) print(line_str, file=f)
def policy_iteration(problem_data, problem_data_known, K0, L0, sim_options=None, num_iterations=100, print_iterates=True): """Policy iteration""" problem_data_keys = [ 'A', 'B', 'C', 'Ai', 'Bj', 'Ck', 'varAi', 'varBj', 'varCk', 'Q', 'R', 'S' ] A, B, C, Ai, Bj, Ck, varAi, varBj, varCk, Q, R, S = [ problem_data[key] for key in problem_data_keys ] n, m, p = [M.shape[1] for M in [A, B, C]] K, L = np.copy(K0), np.copy(L0) # Check initial policies are stabilizing if specrad(A + B.dot(K0) + C.dot(L0)) > 1: raise Exception("Initial policies are not stabilizing!") P_history, K_history, L_history = [ np.zeros([num_iterations, dim, n]) for dim in [n, m, p] ] H_history = np.zeros([num_iterations, n + m + p, n + m + p]) c_history = np.zeros(num_iterations) print('Policy iteration') for i in range(num_iterations): # if print_iterates: # print('iteration %3d / %3d' % (i+1, num_iterations)) # print(K) # print(L) # Record history K_history[i] = K L_history[i] = L # Policy evaluation P = gdlyap(problem_data, K, L) Qxx, Quu, Qvv, Qux, Qvx, Qvu = qfun(problem_data, problem_data_known, P, K, L, sim_options) QuvQvvinv = solveb(Qvu.T, Qvv) QvuQuuinv = solveb(Qvu, Quu) H = np.block([[Qxx, Qux.T, Qvx.T], [Qux, Quu, Qvu.T], [Qvx, Qvu, Qvv]]) # Policy improvement K = -la.solve(Quu - QuvQvvinv.dot(Qvu), Qux - QuvQvvinv.dot(Qvx)) L = -la.solve(Qvv - QvuQuuinv.dot(Qvu.T), Qvx - QvuQuuinv.dot(Qux)) # Record history P_history[i] = P H_history[i] = H c_history[i] = np.trace(P) if print_iterates: print('iteration %3d / %3d' % (i + 1, num_iterations)) print(P) print('') return P, K, L, H, P_history, K_history, L_history, c_history, H_history
def mean_square_stable(problem_data, K, L): return specrad(cost_operator_P(problem_data, K, L)) < 1
def stablestr(r): r_str = ("%f "%r) s_str = "( stable)" if r < 1 else "(unstable)" return r_str + s_str print("") print("Stability report") print("-----------------------------------------------") print("Closed-loop gain matrices") print("K_certainty_equivalent = %s"%Kce) print("K_robust_algo1 = %s"%K1) print("K_robust_algo2 = %s"%K2) print("") print("Spectral radii of true system, closed-loop") print("Open-loop = %s"%stablestr(specrad(A_true))) print("K_certainty_equivalent = %s"%stablestr(specrad(A_true + mdot(B_true, Kce)))) print("K_robust_algo1 = %s"%stablestr(specrad(A_true + mdot(B_true, K1)))) print("K_robust_algo2 = %s"%stablestr(specrad(A_true + mdot(B_true, K2)))) print("") print("Spectral radii of nominal system, closed-loop") print("Open-loop = %s"%stablestr(specrad(A))) print("K_certainty_equivalent = %s"%stablestr(specrad(A + mdot(B_true, Kce)))) print("K_robust_algo1 = %s"%stablestr(specrad(A + mdot(B_true, K1)))) print("K_robust_algo2 = %s"%stablestr(specrad(A + mdot(B_true, K2)))) print("") print("Spectral radii of random systems in robustness region, closed-loop") print("K_robust_algo1 = %s"%stablestr(s1.max())) print("K_robust_algo2 = %s"%stablestr(s2.max())) print("") print("Robustness regions, closed-loop")