def df(self, x, mu, sigma): r""" Density function for the LogNormal Distribution: .. math:: f(x) = \frac{1}{x \sigma \sqrt{2\pi}}e^{-\frac{1}{2}\left ( \frac{\ln x - \mu}{\sigma} \right )^{2}} Parameters ---------- x : numpy array or scalar The values at which the function will be calculated mu : numpy array or scalar The location parameter for the LogNormal distribution sigma : numpy array or scalar The scale parameter for the LogNormal distribution Returns ------- df : scalar or numpy array The value(s) of the density function at x. Examples -------- >>> import numpy as np >>> from surpyval import LogNormal >>> x = np.array([1, 2, 3, 4, 5]) >>> LogNormal.df(x, 3, 4) array([0.07528436, 0.04222769, 0.02969364, 0.02298522, 0.01877747]) """ return 1. / x * norm.pdf(np.log(x), mu, sigma)
def hf(self, x, mu, sigma): r""" Instantaneous hazard rate for the Normal Distribution: .. math:: h(x) = \frac{\frac{1}{\sigma \sqrt{2\pi}}e^{-\frac{1}{2}\left ( \frac{x - \mu}{\sigma} \right )^{2}}}{1 - \Phi \left( \frac{x - \mu}{\sigma} \right )} Parameters ---------- x : numpy array or scalar The values at which the function will be calculated mu : numpy array or scalar The location parameter for the Normal distribution sigma : numpy array or scalar The scale parameter for the Normal distribution Returns ------- hf : scalar or numpy array The value(s) of the instantaneous hazard rate function at x. Examples -------- >>> import numpy as np >>> from surpyval import Normal >>> x = np.array([1, 2, 3, 4, 5]) >>> Normal.hf(x, 3, 4) array([0.12729011, 0.16145984, 0.19947114, 0.24088849, 0.28526944]) """ return norm.pdf(x, mu, sigma) / self.sf(x, mu, sigma)
def df(self, x, mu, sigma): r""" Density function for the Normal Distribution: .. math:: f(x) = \frac{1}{\sigma \sqrt{2\pi}}e^{-\frac{1}{2}\left ( \frac{x - \mu}{\sigma} \right )^{2}} Parameters ---------- x : numpy array or scalar The values at which the function will be calculated mu : numpy array or scalar The location parameter for the Normal distribution sigma : numpy array or scalar The scale parameter for the Normal distribution Returns ------- df : scalar or numpy array The value(s) of the density function at x. Examples -------- >>> import numpy as np >>> from surpyval import Normal >>> x = np.array([1, 2, 3, 4, 5]) >>> Normal.df(x, 3, 4) array([0.08801633, 0.09666703, 0.09973557, 0.09666703, 0.08801633]) """ return norm.pdf(x, mu, sigma)
def callback(params, t, g): print("Iteration {0:} " \ "lower bound {1:.4f}; " \ "mean {2:.4f} [{3:.4f}]; " \ "variance {4:.4f}[{5:.4f}]".format( t, -elbo(params, t), params[0], true_mean, np.exp(params[1]) , true_std)) ax1.clear() ax1.set_xlim([-10, 10]) ax1.set_ylim(bottom=0) mu, log_std = params[0], params[1] xs = np.linspace(-10, 10, 800) ys = norm.pdf(xs, mu, np.exp(log_std)) ax1.plot(xs, ys, color='#f3c273', linewidth=2.0) ys = np.exp(logpx(xs)) ax1.fill_between(xs, 0, ys, color='#aaaaaa') gray_patch = Patch(color='#aaaaaa', label='$p(x)$') yellow_patch = Patch(color='#f3c273', label='$q(x)$') ax1.legend(handles=[gray_patch, yellow_patch]) ax2.clear() ax2.set_xlabel('Mean') ax2.set_ylabel('Variance') ax2.set_zlabel('Negative ELBO') ax2.set_zlim([-100, 150]) ax2.plot_surface(X, Y, Z, cmap=cm.coolwarm, shade=True, cstride=1, rstride=1, zorder=1) ax2.contour(X, Y, Z, zdir='z', offset=-100, cmap=cm.coolwarm, zorder=0, levels=np.linspace(0, 30, 30)) a = Arrow(params[0], params[1], -g[0], -g[1], width=0.5, zorder=2) ax2.add_patch(a) art3d.pathpatch_2d_to_3d(a, z=-100, zdir="z") # ax2.plot([params[0], params[0]], # [params[1], params[1]], # [-50, elbo(params, 0)], '--', linewidth=2.0, zorder=5) # ax2.scatter(params[0], params[1], elbo(params, 0), marker='o', s=100) plt.draw() plt.pause(1.0 / 30.0)
def callback(hyper_weights, opt_iteration, g): """Do whatever work is desired on each optimization iteration. Draws graphs, prints information, and stores information. :param hyper_weights: The weights ([[float]]) of the hypernetwork. :param opt_iteration: The current iteration of optimization. :param g: The gradient ([[float]]) of the optimizer. :return: None. """ global log_likelihoods, valid_loss, test_loss, grad_norms_hyper, grad_norms_hypernet, global_opt_iteration global hyper_cur log_likelihood = hyper_train_objective(hyper_weights, hyper_cur) log_likelihoods[global_opt_iteration] = log_likelihood # Store the training loss. weights_cur = hypernet(hyper_weights, hyper_cur) train_performance[global_opt_iteration] = log_likelihood - hyper_loss(weights_cur, hyper_cur) valid_loss[global_opt_iteration] = hyper_valid_objective(hyper_weights, hyper_cur) test_loss[global_opt_iteration] = hyper_test_objective(hyper_weights, hyper_cur) grad_norm = np.sum([np.sum([np.sum(np.abs(weight_or_bias)) for weight_or_bias in layer]) for layer in g]) grad_norms_hypernet[global_opt_iteration] = grad_norm grad_norms_hyper[global_opt_iteration] = grad_norms_hyper[global_opt_iteration-1] global_opt_iteration += 1 print("Iteration {} Loss {} Grad L1 Norm {}".format(opt_iteration, log_likelihood, grad_norm)) if global_opt_iteration % graph_mod == 0: # Only print on every iteration that is a multiple of graph_mod. [ax.cla() for ax in axs] # Clear all of the axes. axs[0].set_xlabel('Hyperparameter $\lambda$'), axs[0].set_ylabel('Loss $\mathcal{L}$') for cur, hyper in enumerate(learned_hyper_range): hyper_train_loss[cur] = hyper_train_objective(hyper_weights, hyper) weights = hypernet(hyper_weights, hyper) hyper_train_performance[cur] = hyper_train_loss[cur] - hyper_loss(weights, hyper) hyper_valid_loss[cur] = hyper_valid_objective(hyper_weights, hyper) hyper_test_loss[cur] = hyper_test_objective(hyper_weights, hyper) axs[0].plot(real_hyper_range, real_train_loss, 'bx', ms=28, label='Train loss of optimized weights') axs[0].plot(learned_hyper_range, hyper_train_loss, 'b-', label='Train loss of hypernetwork weights') axs[0].set_ylim([-1.5, 3.8]) axs[0].plot(real_hyper_range, real_valid_loss, 'rx', ms=28, label='Valid. loss of optimized weights') axs[0].plot(learned_hyper_range, hyper_valid_loss, 'r-', label='Valid. loss of hypernetwork weights') min_hyper_found = 1.836 # Known minimum from doing a search with 1000 points over this range. axs[0].axvline(x=min_hyper_found, c='k', linestyle='dashed', label='Optimal hyperparameter $\lambda$') pdf_range = np.linspace(hyper_cur - 0.5, hyper_cur + 0.5, 100) axs[0].plot(pdf_range, norm.pdf(pdf_range, loc=hyper_cur, scale=0.06) / 4.0 + axs[0].get_ylim()[0], c='g', label='$p (\lambda | \hat{\lambda})$') [ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.45), borderaxespad=0.0, fancybox=True, framealpha=0.0, fontsize=28) for ax in axs] # Create a legend for all the axes. setup_ax_and_save(axs, fig, 'hypernets_local_small')
def expected_new_max(mean, std, max_so_far): return max_so_far - \ (mean - max_so_far) * norm.cdf(mean, max_so_far, std) \ + std * norm.pdf(mean, max_so_far, std)
def fit_nn_reg(X, y, hidden_layer_sizes, batch_size, epochs, X_test, y_test, no_samples=20, mean_y_train=0.0, std_y_train=1.0, nonln='relu', weight_prior_std=1.0, noise_var=0.1, plot_toy=False, init_w=None): layer_sizes = np.array([X.shape[1]] + hidden_layer_sizes + [1]) if nonln == 'tanh': nonlinearity = np.tanh elif nonln == 'relu': nonlinearity = lambda x: np.maximum(x, 0.0) elif nonln == 'rbf': nonlinearity = lambda x: norm.pdf(x, 0, 1) elif nonln == 'sin': nonlinearity = lambda x: np.sin(x) elif nonln == 'sigmoid': nonlinearity = lambda x: 1 / (1 + np.exp(-x)) num_weights, elbo, predictions, get_error_and_ll, unpack_layers, prediction_test, unpack_params \ = make_nn_funs(layer_sizes, nonlinearity=nonlinearity, weight_prior_std=weight_prior_std, noise_var=noise_var) elbo_grad = grad(elbo) prior_var = 1.0 N_train = X.shape[0] print(" Epoch | train RMSE | test RMSE") if plot_toy: # Set up figure. fig = plt.figure(figsize=(12, 8), facecolor='white') ax = fig.add_subplot(111, frameon=True) plt.show(block=False) def print_perf(epoch, w): num_samples_test = 500 pred_mean, pred_var, rmse_train, ll = get_error_and_ll( w, X, y, location=0.0, scale=1.0, num_samples=num_samples_test) pred_mean, pred_var, rmse_test, ll = get_error_and_ll( w, X_test, y_test, location=0.0, scale=1.0, num_samples=num_samples_test) print("{0:15}|{1:15}|{2:15}|".format(epoch, rmse_train, rmse_test)) if plot_toy: # # Plot data and functions. # plt.cla() # ax.plot(X.ravel(), y.ravel(), 'bx') # plot_inputs = np.reshape(np.linspace(-7, 7, num=300), (300,1)) # outputs_mean, outputs_var = prediction_test(w, plot_inputs, num_samples_test) # ax.plot(plot_inputs, outputs_mean, 'b-') # ax.plot(plot_inputs, outputs_mean + 2*np.sqrt(outputs_var), 'b-') # ax.plot(plot_inputs, outputs_mean - 2*np.sqrt(outputs_var), 'b-') # ax.set_ylim([-1, 1]) # plt.draw() # plt.pause(1.0/60.0) # Sample functions from posterior. rs = npr.RandomState(0) mean, std = unpack_params(w) #rs = npr.RandomState(0) sample_weights = rs.randn(10, num_weights) * std + mean plot_inputs = np.linspace(-7, 7, num=400) outputs = predictions(sample_weights, np.expand_dims(plot_inputs, 1)) # Plot data and functions. plt.cla() ax.plot(X.ravel(), y.ravel(), 'bx') ax.plot(plot_inputs, outputs[:, :, 0].T) ax.set_ylim([-2, 3]) plt.draw() plt.pause(1.0 / 60.0) # Train with adam batch_idxs = make_batches(X.shape[0], batch_size) # Initialize parameters rs = npr.RandomState(0) if init_w is None: init_mean = 0.1 * rs.randn(num_weights) else: init_mean = init_w init_log_std = -2 * np.ones(num_weights) init_var_params = np.concatenate([init_mean, init_log_std]) w = init_var_params m1 = 0 m2 = 0 beta1 = 0.9 beta2 = 0.999 epsilon = 1e-8 alpha = 5e-3 t = 0 elbo_vec = [] for epoch in range(epochs): permutation = np.random.choice(range(X.shape[0]), X.shape[0], replace=False) # print_perf(epoch, w) for idxs in batch_idxs: t += 1 eb = elbo(w, weight_prior_std**2, X[permutation[idxs]], y[permutation[idxs]], N_train, num_samples=no_samples) elbo_vec.append(eb) print(eb) grad_w = elbo_grad(w, weight_prior_std**2, X[permutation[idxs]], y[permutation[idxs]], N_train, num_samples=no_samples) m1 = beta1 * m1 + (1 - beta1) * grad_w m2 = beta2 * m2 + (1 - beta2) * grad_w**2 m1_hat = m1 / (1 - beta1**t) m2_hat = m2 / (1 - beta2**t) w -= alpha * m1_hat / (np.sqrt(m2_hat) + epsilon) t += 1 print_perf(epochs - 1, w) return w, get_error_and_ll, prediction_test, unpack_params, elbo_vec
def build_toy_dataset(n_data=40, noise_std=0.1): D = 1 rs = npr.RandomState(0) inputs = np.concatenate([np.linspace(0, 2, num=n_data/2), np.linspace(6, 8, num=n_data/2)]) targets = np.cos(inputs) + rs.randn(n_data) * noise_std inputs = (inputs - 5.0) / 4.0 inputs = inputs.reshape((len(inputs), D)) targets = targets.reshape((len(targets), D)) return inputs, targets if __name__ == '__main__': # Specify inference problem by its unnormalized log-posterior. rbf = lambda x: norm.pdf(x, 0, 1) num_weights, predictions, logprob = \ make_nn_funs(layer_sizes=[1, 20, 1], L2_reg=0.01, noise_variance = 0.01, nonlinearity=rbf) inputs, targets = build_toy_dataset() log_posterior = lambda weights, t: logprob(weights, inputs, targets) # Build variational objective. objective, gradient, unpack_params = \ black_box_variational_inference(log_posterior, num_weights, num_samples=20) # Set up figure. fig = plt.figure(figsize=(8,8), facecolor='white') ax = fig.add_subplot(111, frameon=False)
# Network parameters layer_sizes = [784, 200, 100, 10] L2_reg = .01 D = 784 # Training parameters param_scale = 0.1 learning_rate = 1e-2 momentum = 0.9 batch_size = 100 num_epochs = 50 # Load and process MNIST data (borrowing from Kayak) N_data, train_images, train_labels, test_images, test_labels = load_mnist() rbf = lambda x: norm.pdf(x, 0, 1) relu = lambda x: np.maximum(x, 0.0) # Make neural net functions num_weights, predictions, logprob, frac_err = \ make_nn_funs(layer_sizes, L2_reg, noise_variance=0.01, nonlinearity=relu) #loss_grad = grad(log_prob) # Initialize weights rs = npr.RandomState(0) num_samples = 20 init_mean = rs.randn(num_weights)
def fit_nn_reg(X, y, hidden_layer_sizes, batch_size, epochs, X_test, y_test, mean_y_train=0.0, std_y_train=1.0, nonln='relu', weight_prior_std=1.0, noise_var=0.1, plot_toy=False): layer_sizes = np.array([X.shape[1]] + hidden_layer_sizes + [1]) if nonln == 'tanh': nonlinearity = np.tanh elif nonln == 'relu': nonlinearity = lambda x: np.maximum(x, 0.0) elif nonln == 'rbf': nonlinearity = lambda x: norm.pdf(x, 0, 1) elif nonln == 'sin': nonlinearity = lambda x: np.sin(x) elif nonln == 'sigmoid': nonlinearity = lambda x: 1 / (1 + np.exp(-x)) num_weights, predictions, logprob, get_error \ = make_nn_funs(layer_sizes, nonlinearity=nonlinearity, weight_prior_std=weight_prior_std, noise_var=noise_var) logprob_grad = grad(logprob) Ntrain = X.shape[0] print(" Epoch | train RMSE | test RMSE") if plot_toy: # Set up figure. fig = plt.figure(figsize=(12, 8), facecolor='white') ax = fig.add_subplot(111, frameon=True) plt.show(block=False) def print_perf(epoch, w): rmse_train = get_error(w, X, y, location=0.0, scale=1.0) rmse_test = get_error(w, X_test, y_test, location=0.0, scale=1.0) print("{0:15}|{1:15}|{2:15}|".format(epoch, rmse_train, rmse_test)) if plot_toy: # Plot data and functions. plt.cla() ax.plot(X.ravel(), y.ravel(), 'bx') plot_inputs = np.reshape(np.linspace(-7, 7, num=300), (300, 1)) outputs = predictions(w, plot_inputs) ax.plot(plot_inputs, outputs) ax.set_ylim([-1, 1]) plt.draw() plt.pause(1.0 / 60.0) # Train with adam batch_idxs = make_batches(X.shape[0], batch_size) # Initialize parameters rs = npr.RandomState(0) init_weights = 0.1 * rs.randn(num_weights) w = init_weights N_test = X_test.shape[0] m1 = 0 m2 = 0 beta1 = 0.9 beta2 = 0.999 epsilon = 1e-8 alpha = 1e-2 t = 0 log_prob_vec = [] for epoch in range(epochs): permutation = np.random.choice(range(X.shape[0]), X.shape[0], replace=False) print_perf(epoch, w) for idxs in batch_idxs: t += 1 lp = logprob(w, X[permutation[idxs]], y[permutation[idxs]], X.shape[0]) log_prob_vec.append(lp) grad_w = logprob_grad(w, X[permutation[idxs]], y[permutation[idxs]], X.shape[0]) m1 = beta1 * m1 + (1 - beta1) * grad_w m2 = beta2 * m2 + (1 - beta2) * grad_w**2 m1_hat = m1 / (1 - beta1**t) m2_hat = m2 / (1 - beta2**t) w += alpha * m1_hat / (np.sqrt(m2_hat) + epsilon) t += 1 return w, np.array(log_prob_vec)
import autograd.numpy as np import autograd.numpy.random as npr import matplotlib.pyplot as plt from autograd.misc.optimizers import adam from autograd import grad, elementwise_grad from autograd.scipy.stats import multivariate_normal as mvn import autograd.scipy.stats.norm as norm rs = npr.RandomState(0) egrad = elementwise_grad def objective(p): return norm.cdf(p) x = np.linspace(0, 7, 5) g = grad(objective)(1.0) eg = egrad(objective) print(eg(x), norm.pdf(x)) print(g - norm.pdf(1.0))
def diag_gaussian_density(x, mu, log_std): return np.sum(norm.pdf(x, mu, np.exp(log_std)), axis=-1)