def evaluate_policy(theta, F): """ Given theta (scalar, dtype=float) and policy F (array_like), returns the value associated with that policy under the worst case path for {w_t}, as well as the entropy level. """ rlq = RBLQ(Q, R, A, B, C, beta, theta) K_F, P_F, d_F, O_F, o_F = rlq.evaluate_F(F) x0 = np.array([[1.], [0.], [0.]]) value = - x0.T.dot(P_F.dot(x0)) - d_F entropy = x0.T.dot(O_F.dot(x0)) + o_F return map(float, (value, entropy))
c = 2 theta_bar = 2.0 # == Useful constants == # m0 = (a0 - c) / (2 * a1) m1 = b / (2 * a1) # == Formulate LQ problem == # Q = gamma R = [[a1, -a1, 0], [-a1, a1, 0], [0, 0, 0]] A = [[0, 0, m0], [0, 1, 0], [0, 0, 1]] B = [[0], [1], [0]] C = [[m1], [0], [0]] rlq = RBLQ(A, B, C, Q, R, beta, theta_bar) lq = LQ(Q, R, A, B, beta=beta) f, k, p = rlq.robust_rule() print f print rlq.K_to_F(k) if 0: F_opt, K_opt, P_opt = rlq.robust_rule_simple() x0 = np.asarray((1, 0, 1)).reshape(3, 1) num_thetas = 20 thetas = np.linspace(1, 5, num_thetas) vals = np.empty((2, num_thetas)) ent = np.empty(num_thetas)
df = df.dropna(how='any') return df #-----------------------------------------------------------------------------# # Main #-----------------------------------------------------------------------------# # == Compute the optimal rule == # optimal_lq = LQ(Q, R, A, B, C, beta) Po, Fo, do = optimal_lq.stationary_values() # == Compute a robust rule given theta == # baseline_robust = RBLQ(Q, R, A, B, C, beta, theta) Fb, Kb, Pb = baseline_robust.robust_rule() # == Check the positive definiteness of worst-case covariance matrix to == # # == ensure that theta exceeds the breakdown point == # test_matrix = np.identity(Pb.shape[0]) - np.dot(C.T, Pb.dot(C)) / theta eigenvals, eigenvecs = eig(test_matrix) assert (eigenvals >= 0).all(), 'theta below breakdown point.' emax = 1.6e6 optimal_best_case = value_and_entropy(emax, Fo, 'best') robust_best_case = value_and_entropy(emax, Fb, 'best') optimal_worst_case = value_and_entropy(emax, Fo, 'worst') robust_worst_case = value_and_entropy(emax, Fb, 'worst')