예제 #1
0
A = np.array(1).reshape(1, 1)
B = np.array(1).reshape(1, 1)
Q = np.array(1).reshape(1, 1)
R = np.array(1).reshape(1, 1)

x0 = np.array(-1).reshape(1, 1)
u0 = np.array(0).reshape(1, 1)

# number of time steps to simulate
T = 30
# number of iterations of the dynamical systems for training
NUM_TRIALS = 250
ALPHA = 100
GAMMA = 0.9

K, _, _ = control.dlqr(A, B, Q, R)


def loss(target, prediction, alpha=1):
    return float((1 / (alpha**2)) * np.square(target - alpha * prediction))


def train(K):
    loss_history = []
    for i in range(NUM_TRIALS):
        x = np.random.randn(1).reshape(1, 1)

        #print('yhat = '+str(y_hat))

        total_loss = 0
        for t in range(T):
예제 #2
0
import numpy as np
import lqr_control as control

A = np.array([[1.01, 0.01, 0], [0.01, 1.01, 0.01], [0, 0.01, 1.01]])
B = np.identity(3)
Q = np.identity(3)
R1 = np.identity(3) * 100
R2 = np.identity(3) * 1000

x0 = np.array([[1], [0], [-1]])  #(0,0,0) is stable
u0 = np.zeros((3, 1))  # default to 0 init for starting action

# number of time steps to simulate
T = 100

K_1, _, _ = control.dlqr(A, B, Q, R1)

K_2, _, _ = control.dlqr(A, B, Q, R2)

x_1, u_1 = control.simulate_discrete(A, B, K_1, x0, u0, T)
x_2, u_2 = control.simulate_discrete(A, B, K_2, x0, u0, T)

control.plot_states(x_1, 'State Temps', R1)
control.plot_states(x_2, 'State Temps', R2)
lr = 0.001
betas = (0.9, 0.999)  # parameters for Adam optimizer

random_seed = 1
#############################################

if random_seed:
    print("Random Seed: {}".format(random_seed))
    torch.manual_seed(random_seed)
    np.random.seed(random_seed)

memory = Memory()
pg = PG(state_dim, action_dim, n_latent_var, lr, betas, gamma)

# Optimal control for comparison
K, P, _ = control.dlqr(A, B, Q, R)
optimal_params = torch.FloatTensor([-K.item(), 0.])

# compare initial policy with optimal
compare_P(pg.policy.agent, K, actor_label="Initial Policy")

# important parameters
print(f"device: {device}, lr: {lr}, betas: {betas}")

# logging variables
running_cost = 0

# training loop
for i_episode in range(1, max_episodes + 1):
    state = torch.normal(0, 1, size=(1, 1))
    done = False