# The reason for the closure is so that the gradient can depend # on both the input to the original function (x), and the output of the # original function (ans). def make_grad_logsumexp(ans, x): # If you want to be able to take higher-order derivatives, then all the # code inside this function must be itself differentiable by autogradwithbay. def gradient_product(g): # This closure multiplies g with the Jacobian of logsumexp (d_ans/d_x). # Because autogradwithbay uses reverse-mode differentiation, g contains # the gradient of the objective w.r.t. ans, the output of logsumexp. return np.full(x.shape, g) * np.exp(x - np.full(x.shape, ans)) return gradient_product # Now we tell autogradwithbay that logsumexmp has a gradient-making function. logsumexp.defgrad(make_grad_logsumexp) if __name__ == '__main__': # Now we can use logsumexp() inside a larger function that we want # to differentiate. def example_func(y): z = y**2 lse = logsumexp(z) return np.sum(lse) grad_of_example = grad(example_func) print("Gradient: ", grad_of_example(npr.randn(10))) # Check the gradients numerically, just to be safe. quick_grad_check(example_func, npr.randn(10))
def logistic_predictions(weights, inputs): # Outputs probability of a label being true according to logistic model. return sigmoid(np.dot(inputs, weights)) def training_loss(weights): # Training loss is the negative log-likelihood of the training labels. preds = logistic_predictions(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) return -np.sum(np.log(label_probabilities)) # Build a toy dataset. inputs = np.array([[0.52, 1.12, 0.77], [0.88, -1.08, 0.15], [0.52, 0.06, -1.30], [0.74, -2.49, 1.39]]) targets = np.array([True, True, False, True]) # Build a function that returns gradients of training loss using autogradwithbay. training_gradient_fun = grad(training_loss) # Check the gradients numerically, just to be safe. weights = np.array([0.0, 0.0, 0.0]) quick_grad_check(training_loss, weights) # Optimize weights using gradient descent. print("Initial loss:", training_loss(weights)) for i in range(100): weights -= training_gradient_fun(weights) * 0.01 print("Trained loss:", training_loss(weights))
batch_size = 256 num_epochs = 50 # Load and process MNIST data (borrowing from Kayak) N_data, train_images, train_labels, test_images, test_labels = load_mnist() # Make neural net functions N_weights, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg) loss_grad = grad(loss_fun) # Initialize weights rs = npr.RandomState() W = rs.randn(N_weights) * param_scale # Check the gradients numerically, just to be safe quick_grad_check(loss_fun, W, (train_images, train_labels)) print(" Epoch | Train err | Test err ") def print_perf(epoch, W): test_perf = frac_err(W, test_images, test_labels) train_perf = frac_err(W, train_images, train_labels) print("{0:15}|{1:15}|{2:15}".format(epoch, train_perf, test_perf)) # Train with sgd batch_idxs = make_batches(train_images.shape[0], batch_size) cur_dir = np.zeros(N_weights) for epoch in range(num_epochs): print_perf(epoch, W) for idxs in batch_idxs: