def black_box_variational_inference(logprob, D, num_samples): """Implements http://arxiv.org/abs/1401.0118, and uses the local reparameterization trick from http://arxiv.org/abs/1506.02557""" # sys.stdout = Logger("experiment.txt") def unpack_params(params): # Variational dist is a diagonal Gaussian. mean, log_std = params[:D], params[D:] return mean, log_std def gaussian_entropy(log_std): return 0.5 * D * (1.0 + np.log(2*np.pi)) + np.sum(log_std) rs = npr.RandomState(0) def variational_objective(params, t): """Provides a stochastic estimate of the variational lower bound.""" mean, log_std = unpack_params(params) samples = rs.randn(num_samples, D) * np.exp(log_std) + mean lower_bound = gaussian_entropy(log_std) + np.mean(logprob(samples, t)) loss = np.mean(logprob(samples, t)) print("loss is "+ str(loss)) return -lower_bound gradient = grad(variational_objective) return variational_objective, gradient, unpack_params
def fit_maxlike(x, r_guess): # follows Wikipedia's section on negative binomial max likelihood assert np.var(x) > np.mean(x), "Likelihood-maximizing parameters don't exist!" loglike = lambda r, p: np.sum(negbin_loglike(r, p, x)) p = lambda r: np.sum(x) / np.sum(r+x) rprime = lambda r: grad(loglike)(r, p(r)) r = newton(rprime, r_guess) return r, p(r)
def logistic_predictions(weights, inputs): # Outputs probability of a label being true according to logistic model. return sigmoid(np.dot(inputs, weights)) def training_loss(weights): # Training loss is the negative log-likelihood of the training labels. preds = logistic_predictions(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) return -np.sum(np.log(label_probabilities)) # Build a toy dataset. inputs = np.array([[0.52, 1.12, 0.77], [0.88, -1.08, 0.15], [0.52, 0.06, -1.30], [0.74, -2.49, 1.39]]) targets = np.array([True, True, False, True]) # Build a function that returns gradients of training loss using autogradwithbay. training_gradient_fun = grad(training_loss) # Check the gradients numerically, just to be safe. weights = np.array([0.0, 0.0, 0.0]) quick_grad_check(training_loss, weights) # Optimize weights using gradient descent. print("Initial loss:", training_loss(weights)) for i in range(100): weights -= training_gradient_fun(weights) * 0.01 print("Trained loss:", training_loss(weights))
def newton(f, x0): # wrap scipy.optimize.newton with our automatic derivatives return scipy.optimize.newton(f, x0, fprime=grad(f), fprime2=grad(grad(f)))
def predictive_gradients(self, Xnew): # todo, check if the gradient is correct or not """ Compute the derivatives of the predicted latent function with respect to X* Given a set of points at which to predict X* (size [N*,Q]), compute the derivatives of the mean and variance. Resulting arrays are sized: dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one). Note that this is not the same as computing the mean and variance of the derivative of the function! dv_dX* -- [N*, Q], (since all outputs have the same variance) :param X: The points at which to get the predictive gradients :type X: np.ndarray (Xnew x self.input_dim) :returns: dmu_dX, dv_dX :rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ] """ # dmu_dX = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim)) # for i in range(self.output_dim): # dmu_dX[:,:,i] = self.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self.X) def meanCal(CurX): # # inputs = np.expand_dims(CurX, 0) mean, log_std = self.unpack_params(self.update_param) rs = npr.RandomState(0) samples = [] samples.append(np.exp(log_std) + mean) samples = np.array(samples) inputnew = [] for i in range(0, Xnew.shape[0]): inputs = rs.randn(self.num_samples, 1) + CurX[i] inputnew.append(inputs) outputnew = [] inputnew = np.array(inputnew) inputs = inputnew init = 0 for W, b in self.unpack_layers(samples): # dotvalue = np.dot(inputs, W) # outputs = dotvalue+ b # for i in range(0,Xnew.shape[0]): # if init==0: # outputnew.append(np.einsum('mnd,mdo->mno', inputnew[i], W) + b) # inputnew[i] = self.nonlinearity(outputnew[i]) # else: # outputnew[i] = np.einsum('mnd,mdo->mno', inputnew[i], W) + b # inputnew[i] = self.nonlinearity(outputnew[i]) outputs = np.einsum("mnd,mdo->mno", inputs, W) + b inputs = self.nonlinearity(outputs) init = init + 1 meanresult = np.mean(inputnew, axis=1) meanresult = np.expand_dims(meanresult, 0) return meanresult hypergrad = grad(meanCal) def varCal(CurX): inputs = np.expand_dims(CurX, 0) mean, log_std = self.unpack_params(self.update_param) rs = npr.RandomState(0) samples = [] samples.append(np.exp(log_std) + mean) samples = np.array(samples) inputs = rs.randn(self.num_samples, 1) + inputs for W, b in self.unpack_layers(samples): # dotvalue = np.dot(inputs, W) # outputs = dotvalue+ b outputs = np.einsum("mnd,mdo->mno", inputs, W) + b inputs = self.nonlinearity(outputs) variance = outputs.var(axis=1) return variance hypergrad1 = grad(varCal) mean1, var1 = self.predict(Xnew) mean_gra = np.expand_dims(hypergrad(Xnew), 0) var_gra = hypergrad1(Xnew) return mean_gra, var_gra
import imp, urllib add_color_channel = lambda x : x.reshape((x.shape[0], 1, x.shape[1], x.shape[2])) one_hot = lambda x, K : np.array(x[:,None] == np.arange(K)[None, :], dtype=int) source, _ = urllib.urlretrieve( 'https://raw.githubusercontent.com/HIPS/Kayak/master/examples/data.py') data = imp.load_source('data', source).mnist() train_images, train_labels, test_images, test_labels = data train_images = add_color_channel(train_images) / 255.0 test_images = add_color_channel(test_images) / 255.0 train_labels = one_hot(train_labels, 10) test_labels = one_hot(test_labels, 10) N_data = train_images.shape[0] # Make neural net functions N_weights, pred_fun, loss_fun, frac_err = make_nn_funs(input_shape, layer_specs, L2_reg) loss_grad = grad(loss_fun) # Initialize weights rs = npr.RandomState() W = rs.randn(N_weights) * param_scale # Check the gradients numerically, just to be safe # quick_grad_check(loss_fun, W, (train_images[:50], train_labels[:50])) print(" Epoch | Train err | Test error ") def print_perf(epoch, W): test_perf = frac_err(W, test_images, test_labels) train_perf = frac_err(W, train_images, train_labels) print("{0:15}|{1:15}|{2:15}".format(epoch, train_perf, test_perf)) # Train with sgd
from __future__ import absolute_import from __future__ import print_function import autogradwithbay.numpy as np import matplotlib.pyplot as plt from autogradwithbay import grad from builtins import range, map def fun(x): return np.sin(x) d_fun = grad(fun) # First derivative dd_fun = grad(d_fun) # Second derivative x = np.linspace(-10, 10, 100) plt.plot(x, list(map(fun, x)), x, list(map(d_fun, x)), x, list(map(dd_fun, x))) plt.xlim([-10, 10]) plt.ylim([-1.2, 1.2]) plt.axis('off') plt.savefig("sinusoid.png") plt.clf() # Taylor approximation to sin function def fun(x): currterm = x ans = currterm for i in range(1000): print(i, end=' ') currterm = - currterm * x ** 2 / ((2 * i + 3) * (2 * i + 2)) ans = ans + currterm if np.abs(currterm) < 0.2: break # (Very generous tolerance!)
# Implement a 3-hidden layer neural network. num_weights, predictions, logprob = make_nn_funs(layer_sizes=[1, 20, 20, 20, 1], nonlinearity=rbf) inputs, targets = build_toy_dataset() objective = lambda weights, t: -logprob(weights, inputs, targets) # Set up figure. fig = plt.figure(figsize=(12, 8), facecolor="white") ax = fig.add_subplot(111, frameon=False) plt.show(block=False) def callback(params, t, g): print("Iteration {} log likelihood {}".format(t, -objective(params, t))) # Plot data and functions. plt.cla() ax.plot(inputs.ravel(), targets.ravel(), "bx") plot_inputs = np.reshape(np.linspace(-7, 7, num=300), (300, 1)) outputs = predictions(params, plot_inputs) ax.plot(plot_inputs, outputs) ax.set_ylim([-1, 1]) plt.draw() plt.pause(1.0 / 60.0) rs = npr.RandomState(0) init_params = 0.1 * rs.randn(num_weights) print("Optimizing network parameters...") optimized_params = adam(grad(objective), init_params, step_size=0.01, num_iters=1000, callback=callback)