class CVAE: def __init__(self, X, Y, layers_P, layers_Q, layers_R): # Normalize data self.Xmean, self.Xstd = X.mean(0), X.std(0) self.Ymean, self.Ystd = Y.mean(0), Y.std(0) X = (X - self.Xmean) / self.Xstd Y = (Y - self.Ymean) / self.Ystd self.X = X self.Y = Y self.layers_P = layers_P self.layers_Q = layers_Q self.layers_R = layers_R self.X_dim = X.shape[1] self.Y_dim = Y.shape[1] self.Z_dim = layers_Q[-1] # Initialize encoder params = self.initialize_NN(layers_P) self.idx_P = np.arange(params.shape[0]) # Initialize decoder params = np.concatenate([params, self.initialize_NN(layers_Q)]) self.idx_Q = np.arange(self.idx_P[-1] + 1, params.shape[0]) # Initialize prior params = np.concatenate([params, self.initialize_NN(layers_R)]) self.idx_R = np.arange(self.idx_Q[-1] + 1, params.shape[0]) self.params = params # Total number of parameters self.num_params = self.params.shape[0] # Define optimizer self.optimizer = Adam(self.num_params, lr=1e-3) # Define gradient function using autograd self.grad_elbo = grad(self.ELBO) def initialize_NN(self, Q): hyp = np.array([]) layers = len(Q) for layer in range(0, layers - 2): A = -np.sqrt(6.0 / (Q[layer] + Q[layer + 1])) + 2.0 * np.sqrt( 6.0 / (Q[layer] + Q[layer + 1])) * np.random.rand( Q[layer], Q[layer + 1]) b = np.zeros((1, Q[layer + 1])) hyp = np.concatenate([hyp, A.ravel(), b.ravel()]) A = -np.sqrt(6.0 / (Q[-2] + Q[-1])) + 2.0 * np.sqrt( 6.0 / (Q[-2] + Q[-1])) * np.random.rand(Q[-2], Q[-1]) b = np.zeros((1, Q[-1])) hyp = np.concatenate([hyp, A.ravel(), b.ravel()]) A = -np.sqrt(6.0 / (Q[-2] + Q[-1])) + 2.0 * np.sqrt( 6.0 / (Q[-2] + Q[-1])) * np.random.rand(Q[-2], Q[-1]) b = np.zeros((1, Q[-1])) hyp = np.concatenate([hyp, A.ravel(), b.ravel()]) return hyp def forward_pass(self, X, Q, params): H = X idx_3 = 0 layers = len(Q) for layer in range(0, layers - 2): idx_1 = idx_3 idx_2 = idx_1 + Q[layer] * Q[layer + 1] idx_3 = idx_2 + Q[layer + 1] A = np.reshape(params[idx_1:idx_2], (Q[layer], Q[layer + 1])) b = np.reshape(params[idx_2:idx_3], (1, Q[layer + 1])) H = np.tanh(np.matmul(H, A) + b) idx_1 = idx_3 idx_2 = idx_1 + Q[-2] * Q[-1] idx_3 = idx_2 + Q[-1] A = np.reshape(params[idx_1:idx_2], (Q[-2], Q[-1])) b = np.reshape(params[idx_2:idx_3], (1, Q[-1])) mu = np.matmul(H, A) + b idx_1 = idx_3 idx_2 = idx_1 + Q[-2] * Q[-1] idx_3 = idx_2 + Q[-1] A = np.reshape(params[idx_1:idx_2], (Q[-2], Q[-1])) b = np.reshape(params[idx_2:idx_3], (1, Q[-1])) Sigma = np.exp(np.matmul(H, A) + b) return mu, Sigma def ELBO(self, params): X = self.X_batch Y = self.Y_batch # Prior: p(z|x) mu_0, Sigma_0 = self.forward_pass(X, self.layers_R, params[self.idx_R]) # Encoder: q(z|x,y) mu_1, Sigma_1 = self.forward_pass(np.concatenate([X, Y], axis=1), self.layers_Q, params[self.idx_Q]) # Reparametrization trick epsilon = np.random.randn(X.shape[0], self.Z_dim) Z = mu_1 + epsilon * np.sqrt(Sigma_1) # Decoder: p(y|x,z) mu_2, Sigma_2 = self.forward_pass(np.concatenate([X, Z], axis=1), self.layers_P, params[self.idx_P]) # Log-determinants log_det_0 = np.sum(np.log(Sigma_0)) log_det_1 = np.sum(np.log(Sigma_1)) log_det_2 = np.sum(np.log(Sigma_2)) # KL[q(z|x,y) || p(z|x)] KL = 0.5 * (np.sum(Sigma_1 / Sigma_0) + np.sum( (mu_0 - mu_1)**2 / Sigma_0) - self.Z_dim + log_det_0 - log_det_1) # -log p(y|x,z) NLML = 0.5 * (np.sum((Y - mu_2)**2 / Sigma_2) + log_det_2 + np.log(2. * np.pi) * self.Y_dim * X.shape[0]) return NLML + KL # Fetches a mini-batch of data def fetch_minibatch(self, X, Y, N_batch): N = X.shape[0] idx = np.random.choice(N, N_batch, replace=False) X_batch = X[idx, :] Y_batch = Y[idx, :] return X_batch, Y_batch # Trains the model def train(self, nIter=10000, batch_size=100): start_time = timeit.default_timer() for it in range(nIter): # Fetch minibatch self.X_batch, self.Y_batch = self.fetch_minibatch( self.X, self.Y, batch_size) # Evaluate loss using current parameters params = self.params elbo = self.ELBO(params) # Update parameters grad_params = self.grad_elbo(params) self.params = self.optimizer.step(params, grad_params) # Print if it % 10 == 0: elapsed = timeit.default_timer() - start_time print('It: %d, ELBO: %.3e, Time: %.2f' % (it, elbo, elapsed)) start_time = timeit.default_timer() def generate_samples(self, X_star, N_samples): X_star = (X_star - self.Xmean) / self.Xstd # Encode X_star mu_0, Sigma_0 = self.forward_pass(X_star, self.layers_R, self.params[self.idx_R]) # Reparametrization trick epsilon = np.random.randn(N_samples, self.Z_dim) Z = mu_0 + epsilon * np.sqrt(Sigma_0) # Decode mean_star, var_star = self.forward_pass( np.concatenate([X_star, Z], axis=1), self.layers_P, self.params[self.idx_P]) # De-normalize mean_star = mean_star * self.Ystd + self.Ymean var_star = var_star * self.Ystd**2 return mean_star, var_star
class LinearRegression: """ Linear regression model: y = (w.T)*x + \epsilon p(y|x,theta) ~ N(y|(w.T)*x, sigma^2), theta = (w, sigma^2) """ # Initialize model class def __init__(self, X, Y): # Normalize data self.Xmean, self.Xstd = X.mean(0), X.std(0) self.Ymean, self.Ystd = Y.mean(0), Y.std(0) X = (X - self.Xmean) / self.Xstd Y = (Y - self.Ymean) / self.Ystd self.X = X self.Y = Y self.n = X.shape[0] # Randomly initialize weights and noise variance w = np.random.randn(X.shape[1], Y.shape[1]) sigma_sq = np.array([np.log([1e-3])]) # Concatenate all parameters in a single vector self.theta = np.concatenate([w.flatten(), sigma_sq.flatten()]) # Count total number of parameters self.num_params = self.theta.shape[0] # Define optimizer self.optimizer = Adam(self.num_params, lr=1e-3) # Define loss gradient function using autograd self.grad_loss = grad(self.loss) # Evaluates the forward prediction of the model def forward_pass(self, X, w): y = np.matmul(X, w) return y # Evaluates the negative log-likelihood loss, i.e. -log p(y|x,theta) def loss(self, theta): X = self.X_batch Y = self.Y_batch # Fetch individual parameters from the theta vector and reshape if needed w = np.reshape(theta[:-1], (self.X.shape[1], self.Y.shape[1])) sigma_sq = np.exp(theta[-1]) # Evaluate the model's prediction Y_pred = self.forward_pass(X, w) # Compute the loss NLML = 0.5 * self.n * np.log(2.0*np.pi*sigma_sq) + \ 0.5 * (np.sum(Y - Y_pred)**2) / sigma_sq return NLML # Fetches a mini-batch of data def fetch_minibatch(self, X, Y, N_batch): N = X.shape[0] idx = np.random.choice(N, N_batch, replace=False) X_batch = X[idx, :] Y_batch = Y[idx, :] return X_batch, Y_batch # Trains the model by minimizing the MSE loss def train(self, nIter=10000, batch_size=100): start_time = timeit.default_timer() for it in range(nIter): # Fetch minibatch self.X_batch, self.Y_batch = self.fetch_minibatch( self.X, self.Y, batch_size) # Evaluate loss using current parameters theta = self.theta loss = self.loss(theta) # Update parameters grad_theta = self.grad_loss(theta) self.theta = self.optimizer.step(theta, grad_theta) # Print if it % 10 == 0: elapsed = timeit.default_timer() - start_time print('It: %d, Loss: %.3e, Time: %.2f' % (it, loss, elapsed)) start_time = timeit.default_timer() # Evaluates predictions at test points def predict(self, X_star): # Normalize inputs X_star = (X_star - self.Xmean) / self.Xstd w = np.reshape(self.theta[:-1], (self.X.shape[1], self.Y.shape[1])) y_star = self.forward_pass(X_star, w) # De-normalize outputs y_star = y_star * self.Ystd + self.Ymean return y_star
class NeuralNetwork: # Initialize the class def __init__(self, X, Y, layers): # Normalize data self.Xmean, self.Xstd = X.mean(0), X.std(0) self.Ymean, self.Ystd = Y.mean(0), Y.std(0) X = (X - self.Xmean) / self.Xstd Y = (Y - self.Ymean) / self.Ystd self.X = X self.Y = Y self.layers = layers # Define and initialize neural network self.params = self.initialize_NN(self.layers) # Total number of parameters self.num_params = self.params.shape[0] # Define optimizer self.optimizer = Adam(self.num_params, lr=1e-3) # Define gradient function using autograd self.grad_loss = grad(self.loss) # Initializes the network weights and biases using Xavier initialization def initialize_NN(self, Q): params = np.array([]) num_layers = len(Q) for layer in range(0, num_layers - 1): weights = -np.sqrt(6.0 / (Q[layer] + Q[layer + 1])) + 2.0 * np.sqrt( 6.0 / (Q[layer] + Q[layer + 1])) * np.random.rand( Q[layer], Q[layer + 1]) biases = np.zeros((1, Q[layer + 1])) params = np.concatenate([params, weights.ravel(), biases.ravel()]) return params # Evaluates the forward pass def forward_pass(self, X, Q, params): H = X idx_3 = 0 num_layers = len(self.layers) # All layers up to last for layer in range(0, num_layers - 2): idx_1 = idx_3 idx_2 = idx_1 + Q[layer] * Q[layer + 1] idx_3 = idx_2 + Q[layer + 1] weights = np.reshape(params[idx_1:idx_2], (Q[layer], Q[layer + 1])) biases = np.reshape(params[idx_2:idx_3], (1, Q[layer + 1])) H = np.tanh(np.matmul(H, weights) + biases) # Last linear layer idx_1 = idx_3 idx_2 = idx_1 + Q[-2] * Q[-1] idx_3 = idx_2 + Q[-1] weights = np.reshape(params[idx_1:idx_2], (Q[-2], Q[-1])) biases = np.reshape(params[idx_2:idx_3], (1, Q[-1])) mu = np.matmul(H, weights) + biases return mu # Evaluates the mean square error loss def loss(self, params): X = self.X_batch Y = self.Y_batch mu = self.forward_pass(X, self.layers, params) return np.mean((Y - mu)**2) # Fetches a mini-batch of data def fetch_minibatch(self, X, Y, N_batch): N = X.shape[0] idx = np.random.choice(N, N_batch, replace=False) X_batch = X[idx, :] Y_batch = Y[idx, :] return X_batch, Y_batch # Trains the model by minimizing the MSE loss def train(self, nIter=10000, batch_size=100): start_time = timeit.default_timer() for it in range(nIter): # Fetch minibatch self.X_batch, self.Y_batch = self.fetch_minibatch( self.X, self.Y, batch_size) # Evaluate loss using current parameters params = self.params loss = self.loss(params) # Update parameters grad_params = self.grad_loss(params) self.params = self.optimizer.step(params, grad_params) # Print if it % 10 == 0: elapsed = timeit.default_timer() - start_time print('It: %d, Loss: %.3e, Time: %.2f' % (it, loss, elapsed)) start_time = timeit.default_timer() # Evaluates predictions at test points def predict(self, X_star): # Normalize inputs X_star = (X_star - self.Xmean) / self.Xstd y_star = self.forward_pass(X_star, self.layers, self.params) # De-normalize outputs y_star = y_star * self.Ystd + self.Ymean return y_star
class RNN: # Initialize the class def __init__(self, X, Y, hidden_dim): # X has the form lags x data x dim # Y has the form data x dim self.X = X self.Y = Y self.X_dim = X.shape[-1] self.Y_dim = Y.shape[-1] self.hidden_dim = hidden_dim self.lags = X.shape[0] # Define and initialize neural network self.params = self.initialize_RNN() # Total number of parameters self.num_params = self.params.shape[0] # Define optimizer self.optimizer = Adam(self.num_params, lr=1e-3) # Define gradient function using autograd self.grad_loss = grad(self.loss) # Initializes the network weights and biases using Xavier initialization def initialize_RNN(self): hyp = np.array([]) Q = self.hidden_dim U = -np.sqrt(6.0 / (self.X_dim + Q)) + 2.0 * np.sqrt( 6.0 / (self.X_dim + Q)) * np.random.rand(self.X_dim, Q) b = np.zeros((1, Q)) W = np.eye(Q) hyp = np.concatenate([hyp, U.ravel(), b.ravel(), W.ravel()]) V = -np.sqrt(6.0 / (Q + self.Y_dim)) + 2.0 * np.sqrt( 6.0 / (Q + self.Y_dim)) * np.random.rand(Q, self.Y_dim) c = np.zeros((1, self.Y_dim)) hyp = np.concatenate([hyp, V.ravel(), c.ravel()]) return hyp def forward_pass(self, X, hyp): Q = self.hidden_dim H = np.zeros((X.shape[1], Q)) idx_1 = 0 idx_2 = idx_1 + self.X_dim * Q idx_3 = idx_2 + Q idx_4 = idx_3 + Q * Q U = np.reshape(hyp[idx_1:idx_2], (self.X_dim, Q)) b = np.reshape(hyp[idx_2:idx_3], (1, Q)) W = np.reshape(hyp[idx_3:idx_4], (Q, Q)) for i in range(0, self.lags): H = np.tanh(np.matmul(H, W) + np.matmul(X[i, :, :], U) + b) idx_1 = idx_4 idx_2 = idx_1 + Q * self.Y_dim idx_3 = idx_2 + self.Y_dim V = np.reshape(hyp[idx_1:idx_2], (Q, self.Y_dim)) c = np.reshape(hyp[idx_2:idx_3], (1, self.Y_dim)) Y = np.matmul(H, V) + c return Y # Evaluates the mean square error loss def loss(self, params): X = self.X_batch Y = self.Y_batch mu = self.forward_pass(X, params) return np.mean((Y - mu)**2) # Fetches a mini-batch of data def fetch_minibatch_rnn(self, X, Y, N_batch): N = X.shape[1] idx = np.random.choice(N, N_batch, replace=False) X_batch = X[:, idx, :] Y_batch = Y[idx, :] return X_batch, Y_batch # Trains the model by minimizing the MSE loss def train(self, nIter=10000, batch_size=100): start_time = timeit.default_timer() for it in range(nIter): # Fetch minibatch self.X_batch, self.Y_batch = self.fetch_minibatch_rnn( self.X, self.Y, batch_size) # Evaluate loss using current parameters params = self.params loss = self.loss(params) # Update parameters grad_params = self.grad_loss(params) self.params = self.optimizer.step(params, grad_params) # Print if it % 10 == 0: elapsed = timeit.default_timer() - start_time print('It: %d, Loss: %.3e, Time: %.2f' % (it, loss, elapsed)) start_time = timeit.default_timer() # Evaluates predictions at test points def predict(self, X_star): y_star = self.forward_pass(X_star, self.params) return y_star