def Q_solve(block_dim, A, D, F, interactive=False, disp=True, verbose=False, debug=False, Rs=[10, 100, 1000], N_iter=400, gamma=.5, tol=1e-1, min_step_size=1e-6, methods=['frank_wolfe']): """ Solves Q optimization. min_Q -log det R + Tr(RF) ------------------- |D-ADA.T I | X = | I R | | D cI | | cI R | ------------------- X is PSD """ dim = 4*block_dim search_tol = 1. # Copy over initial data D = np.copy(D) F = np.copy(F) c = np.sqrt(1/gamma) # Numerical stability scale = 1./np.linalg.norm(D,2) # Rescaling D *= scale # Scale down objective matrices scale_factor = np.linalg.norm(F, 2) if scale_factor < 1e-6: # F can be zero if there are no observations for this state return np.eye(block_dim) # Improving conditioning delta=1e-2 D = D + delta*np.eye(block_dim) Dinv = np.linalg.inv(D) D_ADA_T = D - np.dot(A, np.dot(D, A.T)) + delta*np.eye(block_dim) # Compute trace upper bound R = (2*np.trace(D) + 2*(1./gamma)*np.trace(Dinv)) Rs = [R] As, bs, Cs, ds, Fs, gradFs, Gs, gradGs = \ Q_constraints(block_dim, A, F, D, c) (D_ADA_T_cds, I_1_cds, I_2_cds, R_1_cds, D_cds, c_I_1_cds, c_I_2_cds, R_2_cds) = \ Q_coords(block_dim) # Construct init matrix X_init = np.zeros((dim, dim)) set_entries(X_init, D_ADA_T_cds, D_ADA_T) set_entries(X_init, I_1_cds, np.eye(block_dim)) set_entries(X_init, I_2_cds, np.eye(block_dim)) Qinv_init_1 = np.linalg.inv(D_ADA_T) set_entries(X_init, R_1_cds, Qinv_init_1) set_entries(X_init, D_cds, D) set_entries(X_init, c_I_1_cds, c*np.eye(block_dim)) set_entries(X_init, c_I_2_cds, c*np.eye(block_dim)) Qinv_init_2 = np.linalg.inv((1./c)**2 * D) set_entries(X_init, R_2_cds, Qinv_init_2) X_init = X_init + (1e-4)*np.eye(dim) if min(np.linalg.eigh(X_init)[0]) < 0: print("Q_SOLVE INIT FAILED!") X_init == None else: print("Q_SOLVE SUCCESS!") g = GeneralSolver() def obj(X): return (1./scale_factor) * log_det_tr(X, F) def grad_obj(X): return (1./scale_factor) * grad_log_det_tr(X, F) g.save_constraints(dim, obj, grad_obj, As, bs, Cs, ds, Fs, gradFs, Gs, gradGs) (U, X, succeed) = g.solve(N_iter, tol, search_tol, interactive=interactive, disp=disp, verbose=verbose, debug=debug, Rs=Rs, min_step_size=min_step_size, methods=methods, X_init=X_init) if succeed: R_1 = scale*get_entries(X, R_1_cds) R_2 = scale*get_entries(X, R_2_cds) R_avg = (R_1 + R_2) / 2. # Ensure stability R_avg = R_avg + (1e-3) * np.eye(block_dim) Q = np.linalg.inv(R_avg) # Unscale answer Q *= (1./scale) if disp: print("Q:\n", Q) return Q
def A_solve(block_dim, B, C, D, E, Q, mu, interactive=False, disp=True, verbose=False, debug=False, Rs=[10, 100, 1000], N_iter=400, tol=1e-1, min_step_size=1e-6, methods=['frank_wolfe']): """ Solves A optimization. min_A Tr [ Q^{-1} ([C - B] A.T + A [C - B].T + A E A.T] -------------------- | D-Q A | X = | A.T D^{-1} | | I A | | A.T I | -------------------- A mu == 0 X is PSD """ dim = 4*block_dim search_tol = 1. # Copy in inputs B = np.copy(B) C = np.copy(C) D = np.copy(D) E = np.copy(E) Q = np.copy(Q) mu = np.copy(mu) # Scale down objective matrices scale_factor = (max(np.linalg.norm(C-B, 2), np.linalg.norm(E,2))) if scale_factor < 1e-6 or np.linalg.norm(D, 2) < 1e-6: # If A has no observations, not much we can say return .5*np.eye(block_dim) C = C/scale_factor B = B/scale_factor E = E/scale_factor # Numerical stability scale = 1./np.linalg.norm(D, 2) # Rescaling D *= scale Q *= scale # Improving conditioning delta=1e-2 D = D + delta*np.eye(block_dim) Q = Q + delta*np.eye(block_dim) Dinv = np.linalg.inv(D) # Compute post-scaled inverses Dinv = np.linalg.inv(D) Qinv = np.linalg.inv(Q) # Compute trace upper bound R = np.abs(np.trace(D)) + np.abs(np.trace(Dinv)) + 2 * block_dim Rs = [R] As, bs, Cs, ds, Fs, gradFs, Gs, gradGs = \ A_constraints(block_dim, D, Dinv, Q, mu) (D_Q_cds, Dinv_cds, I_1_cds, I_2_cds, A_1_cds, A_T_1_cds, A_2_cds, A_T_2_cds) = A_coords(block_dim) # Construct init matrix upper_norm = np.linalg.norm(D-Q, 2) lower_norm = np.linalg.norm(D, 2) const = np.sqrt(upper_norm/lower_norm) factor = .95 for i in range(10): X_init = np.zeros((dim, dim)) set_entries(X_init, D_Q_cds, D-Q) set_entries(X_init, A_1_cds, const*np.eye(block_dim)) set_entries(X_init, A_T_1_cds, const*np.eye(block_dim)) set_entries(X_init, Dinv_cds, Dinv) set_entries(X_init, I_1_cds, np.eye(block_dim)) set_entries(X_init, A_2_cds, const*np.eye(block_dim)) set_entries(X_init, A_T_2_cds, const*np.eye(block_dim)) set_entries(X_init, I_2_cds, np.eye(block_dim)) X_init = X_init + (1e-2)*np.eye(dim) if min(np.linalg.eigh(X_init)[0]) < 0: X_init = None const = const * factor else: print("A_SOLVE SUCCESS AT %d" % i) print("const: ", const) break if X_init == None: print("A_SOLVE INIT FAILED!") def obj(X): return A_dynamics(X, block_dim, C, B, E, Qinv) def grad_obj(X): return grad_A_dynamics(X, block_dim, C, B, E, Qinv) g = GeneralSolver() g.save_constraints(dim, obj, grad_obj, As, bs, Cs, ds, Fs, gradFs, Gs, gradGs) (U, X, succeed) = g.solve(N_iter, tol, search_tol, interactive=interactive, disp=disp, verbose=verbose, debug=debug, Rs=Rs, min_step_size=min_step_size, methods=methods, X_init=X_init) if succeed: A_1 = get_entries(X, A_1_cds) A_T_1 = get_entries(X, A_T_1_cds) A_2 = get_entries(X, A_2_cds) A_T_2 = get_entries(X, A_T_2_cds) A = (A_1 + A_T_1 + A_2 + A_T_2) / 4. if disp: print("A:\n", A) return A