import numpy as np import lqr_control as control A = np.array([[1.01, 0.01, 0], [0.01, 1.01, 0.01], [0, 0.01, 1.01]]) B = np.identity(3) Q = np.identity(3) R1 = np.identity(3) * 100 R2 = np.identity(3) * 1000 x0 = np.array([[1], [0], [-1]]) #(0,0,0) is stable u0 = np.zeros((3, 1)) # default to 0 init for starting action # number of time steps to simulate T = 100 K_1, _, _ = control.dlqr(A, B, Q, R1) K_2, _, _ = control.dlqr(A, B, Q, R2) x_1, u_1 = control.simulate_discrete(A, B, K_1, x0, u0, T) x_2, u_2 = control.simulate_discrete(A, B, K_2, x0, u0, T) control.plot_states(x_1, 'State Temps', R1) control.plot_states(x_2, 'State Temps', R2)
running_cost += cost.item() if done: break avg_length += t # logging if i_episode % log_interval == 0: avg_length = avg_length / log_interval running_cost = running_cost / log_interval print( 'Episode {} \t Avg length: {:.2f} \t Avg cost: {:.2f}'.format( i_episode, avg_length, running_cost)) running_cost = 0 avg_length = 0 # random init to compare how the two controls act x0 = np.random.uniform(-5, 5, (1, )) u0 = np.zeros((1, )) T = 50 x_star, u_star = control.simulate_discrete(A, B, K, x0.reshape(1, 1), u0.reshape(1, 1), T) x_sim, u_sim = simulate(A, B, ppo.policy.actor, x0, u0, T) compare_paths(np.array(x_sim), np.squeeze(x_star[:, :-1]), "state") compare_paths(np.array(u_sim), np.squeeze(u_star[:, :-1]), "action") compare_V(ppo.policy.critic, A, B, Q, R, K, T, gamma, alpha) compare_P(ppo.policy.actor, K)