def compute_loss(self, y, t): if y.shape[0] != t.shape[0]: print('Array size is NOT match.') sys.exit(1) return None loss_l1 = ut.l1_norm(self.params) loss_l2 = ut.l2_norm(self.params) loss = 0.0 for i in range(0, y.shape[0] - self.batch_size + 1, self.batch_size): if len(y[i:]) >= self.batch_size: loss += self.compute_loss_sum(y[i:i + self.batch_size], t[i:i + self.batch_size]) else: loss += self.compute_loss_sum(y[i:], t[i:]) return (loss / y.shape[0]) + (self.optimizer.l1 * loss_l1 + self.optimizer.l2 * loss_l2)
def backward(self, dy): xp = cuda.get_array_module(dy) eps = 1e-8 dE = (self.y - self.t) / self.t.shape[0] l1_norm = ut.l1_norm(params) l2_norm = ut.l2_norm(params) # print('l1_norm: %f' % l1_norm, 'l2_norm: %f' % l2_norm) self.dl1 = xp.sum(-xp.exp(self.l1) * self.E / ((xp.exp(self.l1) + xp.exp(self.l2))**2 + eps)) + xp.exp(self.l1) * l1_norm self.dl2 = xp.sum(-xp.exp(self.l2) * self.E / ((xp.exp(self.l1) + xp.exp(self.l2))**2 + eps)) + xp.exp(self.l2) * l2_norm if self.variational is True: self.l1 -= 0.0000001 * self.dl1 * xp.sqrt(l1_norm + eps) self.l2 -= 0.000001 * self.dl2 * xp.sqrt(l2_norm + eps) # self.grads['l1_' + self.name] = self.dl1 #/ l1_norm # self.grads['l2_' + self.name] = self.dl2 #/ l2_norm return dE # / (self.l1**2 + self.l2**2)
psi=observables.monomials(1), regressor='sindy') #%% Training error norms = np.empty((N)) for i in range(N): phi_x = np.vstack(tensor.Phi_X[:, i]) # current (lifted) state action = np.vstack(tensor.U[:, i]) # true_x_prime = np.vstack(tensor.Y[:,i]) true_phi_x_prime = np.vstack(tensor.Phi_Y[:, i]) predicted_phi_x_prime = tensor.K_(action) @ phi_x # Compute norms norms[i] = utilities.l2_norm(true_phi_x_prime, predicted_phi_x_prime) print("Training error:", np.mean(norms)) #%% Testing error normalized by mean norm of different starting states num_episodes = 100 num_steps_per_episode = 100 norms = np.empty((num_episodes, num_steps_per_episode)) norms_states = np.empty((num_episodes, num_steps_per_episode)) X0_sample = np.random.rand(2, num_episodes) * state_range * np.random.choice( np.array([-1, 1]), size=(2, num_episodes)) # random initial states norm_X0s = utilities.l2_norm(X0_sample, np.zeros_like(X0_sample)) avg_norm_X0s = np.mean(norm_X0s) for episode in range(num_episodes): x = np.vstack(X0_sample[:, episode]) # initial state
for episode in range(num_episodes): x = np.vstack(env.reset()) done = False while not done: phi_x = tensor.phi(x) # u = np.array([[env.action_space.sample()]]) # Sampled from random agent u = np.array([[np.random.choice(All_U[0])]]) predicted_x_prime = tensor.B.T @ tensor.K_(u) @ phi_x # true_x_prime = np.vstack(Y[:, i]) observation, cost, done, info = env.step(u[:, 0]) true_x_prime = np.vstack(observation) testing_norms.append(utilities.l2_norm(true_x_prime, predicted_x_prime)) x = true_x_prime testing_norms = np.array(testing_norms) print("Mean testing norm:", np.mean(testing_norms)) #%% LQR w/ Entropy gamma = 0.99 lamb = 0.1 soln = dare(A * np.sqrt(gamma), B * np.sqrt(gamma), Q, R) P = soln[0] # C = np.array(dlqr(A, B, Q, R)[0]) #! Check this again C = np.linalg.inv(R + gamma * B.T @ P @ B) @ (gamma * B.T @ P @ A)
psi=observables.monomials(2), regressor='ols') #%% Training error print("\nTraining error:") training_norms = np.zeros([train_X.shape[1]]) for i in range(train_X.shape[1]): x = np.vstack(train_X[:, i]) phi_x = tensor.phi(x) predicted_x_prime = tensor.B.T @ tensor.K_(train_U[:, i]) @ phi_x true_x_prime = np.vstack(train_Y[:, i]) training_norms[i] = utilities.l2_norm(true_x_prime, predicted_x_prime) # for i in range(num_episodes*num_steps_per_episode): # x = np.vstack(X[:, i]) # phi_x = tensor.phi(x) # true_x_prime = np.vstack(Y[:, i]) # predicted_x_prime = tensor.B.T @ tensor.K_(U[:, i]) @ phi_x # training_norms[i] = utilities.l2_norm(true_x_prime, predicted_x_prime) print("Mean training norm:", np.mean(training_norms)) #%% Testing error print("\nTesting error:")
regressor='sindy') #%% Training error ols_norms = np.empty((N)) sindy_norms = np.empty((N)) for i in range(N): phi_x = np.vstack(ols_tensor.Phi_X[:, i]) # current (lifted) state action = np.vstack(ols_tensor.U[:, i]) true_phi_x_prime = np.vstack(ols_tensor.Phi_Y[:, i]) ols_predicted_phi_x_prime = ols_tensor.K_(action) @ phi_x sindy_predicted_phi_x_prime = sindy_tensor.K_(action) @ phi_x # Compute norms ols_norms[i] = utilities.l2_norm(true_phi_x_prime, ols_predicted_phi_x_prime) sindy_norms[i] = utilities.l2_norm(true_phi_x_prime, sindy_predicted_phi_x_prime) print("Training error (OLS):", np.mean(ols_norms)) print("Training error (SINDy):", np.mean(sindy_norms)) #%% Testing error normalized by mean norm of different starting states num_episodes = 100 num_steps_per_episode = 100 ols_norms = np.empty((num_episodes, num_steps_per_episode)) sindy_norms = np.empty((num_episodes, num_steps_per_episode)) X_sample = np.random.rand(2, num_episodes) * state_range * np.random.choice( np.array([-1, 1]), size=(2, num_episodes)) #%%
#%% Koopman tensor tensor = KoopmanTensor(X, Y, U, phi, psi) #%% Training error norms = np.empty((N)) for i in range(N): phi_x = np.vstack(tensor.Phi_X[:,i]) # current (lifted) state action = np.vstack(U[:,i]) true_x_prime = np.vstack(Y[:,i]) predicted_x_prime = tensor.B.T @ tensor.K_(action) @ phi_x # Compute norms norms[i] = utilities.l2_norm(true_x_prime, predicted_x_prime) print("Average training error:", np.mean(norms)) #%% Testing error num_episodes = 100 num_steps_per_episode = 100 norms = np.zeros((num_episodes)) for episode in range(num_episodes): x = np.array([[np.random.choice(list(State))]]) for step in range(num_steps_per_episode): phi_x = phi(x) # apply phi to state action = np.array([[np.random.choice(list(Action))]]) # sample random action