A = np.array(1).reshape(1, 1) B = np.array(1).reshape(1, 1) Q = np.array(1).reshape(1, 1) R = np.array(1).reshape(1, 1) x0 = np.array(-1).reshape(1, 1) u0 = np.array(0).reshape(1, 1) # number of time steps to simulate T = 30 # number of iterations of the dynamical systems for training NUM_TRIALS = 250 ALPHA = 100 GAMMA = 0.9 K, _, _ = control.dlqr(A, B, Q, R) def loss(target, prediction, alpha=1): return float((1 / (alpha**2)) * np.square(target - alpha * prediction)) def train(K): loss_history = [] for i in range(NUM_TRIALS): x = np.random.randn(1).reshape(1, 1) #print('yhat = '+str(y_hat)) total_loss = 0 for t in range(T):
import numpy as np import lqr_control as control A = np.array([[1.01, 0.01, 0], [0.01, 1.01, 0.01], [0, 0.01, 1.01]]) B = np.identity(3) Q = np.identity(3) R1 = np.identity(3) * 100 R2 = np.identity(3) * 1000 x0 = np.array([[1], [0], [-1]]) #(0,0,0) is stable u0 = np.zeros((3, 1)) # default to 0 init for starting action # number of time steps to simulate T = 100 K_1, _, _ = control.dlqr(A, B, Q, R1) K_2, _, _ = control.dlqr(A, B, Q, R2) x_1, u_1 = control.simulate_discrete(A, B, K_1, x0, u0, T) x_2, u_2 = control.simulate_discrete(A, B, K_2, x0, u0, T) control.plot_states(x_1, 'State Temps', R1) control.plot_states(x_2, 'State Temps', R2)
lr = 0.001 betas = (0.9, 0.999) # parameters for Adam optimizer random_seed = 1 ############################################# if random_seed: print("Random Seed: {}".format(random_seed)) torch.manual_seed(random_seed) np.random.seed(random_seed) memory = Memory() pg = PG(state_dim, action_dim, n_latent_var, lr, betas, gamma) # Optimal control for comparison K, P, _ = control.dlqr(A, B, Q, R) optimal_params = torch.FloatTensor([-K.item(), 0.]) # compare initial policy with optimal compare_P(pg.policy.agent, K, actor_label="Initial Policy") # important parameters print(f"device: {device}, lr: {lr}, betas: {betas}") # logging variables running_cost = 0 # training loop for i_episode in range(1, max_episodes + 1): state = torch.normal(0, 1, size=(1, 1)) done = False