def test_assignment(): def f(y): x = dict(a=1) return assignment(y, x) y = np.array([1.0,2.0,3.0]) quick_grad_check(f, y, verbose=False) return
def train_nn(train_images, train_labels, test_images, test_labels): # Make neural net functions N_weights, predict_fun, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg) loss_grad = grad(loss_fun) # Initialize weights rs = npr.RandomState() weights = rs.randn(N_weights) * param_scale # Check the gradients numerically, just to be safe quick_grad_check(loss_fun, weights, (train_images, train_labels)) print " Epoch | Train err | Test err " def print_perf(epoch, weights): test_perf = frac_err(weights, test_images, test_labels) train_perf = frac_err(weights, train_images, train_labels) print "{0:15}|{1:15}|{2:15}".format(epoch, train_perf, test_perf) # Train with sgd batch_idxs = make_batches(train_images.shape[0], batch_size) cur_dir = np.zeros(N_weights) for epoch in range(num_epochs): print_perf(epoch, weights) for idxs in batch_idxs: grad_W = loss_grad(weights, train_images[idxs], train_labels[idxs]) cur_dir = momentum * cur_dir + (1.0 - momentum) * grad_W weights -= learning_rate * cur_dir return weights
def test_assignment(): def f(y): x = dict(a=1) return assignment(y, x) y = np.array([1.0, 2.0, 3.0]) quick_grad_check(f, y, verbose=False) return
def create_model(args, inputs, targets): if args.classifier == 'logreg': mdl = model.create_logreg_model(args, inputs, targets) elif args.classifier == 'fullconn': mdl = model.create_fully_connected_model(args, inputs, targets) else: raise Exception('Unknown classifier type {}'.format(args.classifier)) quick_grad_check(mdl.loss, mdl.params_flat, verbose=False) return mdl
def make_openmm_system_autograd_compatible(topology, system, initial_positions, temperature=298 * unit.kelvin): """Given the specification of an openmm system and a bath temperature, and return a flattened function that computes the unnormalized log Gibbs density, and also defines its gradient for compatibility with autograd. """ # Can also compute these on the GPU eventually. platform = mm.Platform.getPlatformByName("Reference") ghmc = GHMCIntegrator(temperature=temperature) beta = 1.0 / (temperature * kB) sim = app.Simulation(topology, system, ghmc, platform) sim.context.setPositions(initial_positions) n_atoms = len(initial_positions) def unflatten(x): """Given an (n_atoms * 3,)array, unpack into an (n_atoms, 3) array""" xyz = x.reshape((n_atoms, 3)) return xyz @primitive def flat_log_q(x): """Use OpenMM to compute minus the reduced potential at x.""" sim.context.setPositions(unflatten(x)) return - sim.context.getState(getEnergy=True).getPotentialEnergy() * beta def grad_flat_log_q(x): """Use OpenMM to compute minus the gradient of the reduced potential at x.""" sim.context.setPositions(unflatten(x)) g = (sim.context.getState(getForces=True).getForces(asNumpy=True) * beta) return g.value_in_unit(g.unit).flatten() def make_grad_flat_log_q(ans, x): def gradient(g): return grad_flat_log_q(x) return gradient flat_log_q.defgrad(make_grad_flat_log_q) flat_pos = initial_positions.value_in_unit(initial_positions.unit).flatten() quick_grad_check(flat_log_q, flat_pos) return flat_log_q
def test_deprecated_quick_grad_check_wrapper(): from autograd.util import quick_grad_check with warnings.catch_warnings(record=True) as w: quick_grad_check(lambda x, y: x**2 + y, 1., (2., ))
print training_text.replace('\n', ' ') + "| " + predicted_text.replace('\n', ' ') def callback(weights): print "Train loss:", loss_fun(weights, train_inputs, train_targets) print_training_prediction(weights, train_inputs, train_targets) # Build gradient of loss function using autograd. loss_and_grad = grad(loss_fun, return_function_value=True) # Wrap function to only have one argument, for scipy.minimize. def training_loss_and_grad(weights): return loss_and_grad(weights, train_inputs, train_targets) init_weights = npr.randn(num_weights) * param_scale # Check the gradients numerically, just to be safe quick_grad_check(loss_fun, init_weights, (train_inputs, train_targets)) print "Training RNN..." result = minimize(training_loss_and_grad, init_weights, jac=True, method='CG', options={'maxiter':train_iters}, callback=callback) trained_weights = result.x print print "Generating text from RNN..." num_letters = 30 for t in xrange(20): text = " " for i in xrange(num_letters): seqs = string_to_one_hot(text, output_size)[:, np.newaxis, :] logprobs = pred_fun(trained_weights, seqs)[-1].ravel() text += chr(npr.choice(len(logprobs), p=np.exp(logprobs)))
batch_size = 256 num_epochs = 50 # Load and process MNIST data (borrowing from Kayak) N_data, train_images, train_labels, test_images, test_labels = load_mnist() # Make neural net functions N_weights, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg) loss_grad = grad(loss_fun) # Initialize weights rs = npr.RandomState() W = rs.randn(N_weights) * param_scale # Check the gradients numerically, just to be safe quick_grad_check(loss_fun, W, (train_images, train_labels)) print(" Epoch | Train err | Test err ") def print_perf(epoch, W): test_perf = frac_err(W, test_images, test_labels) train_perf = frac_err(W, train_images, train_labels) print("{0:15}|{1:15}|{2:15}".format(epoch, train_perf, test_perf)) # Train with sgd batch_idxs = make_batches(train_images.shape[0], batch_size) cur_dir = np.zeros(N_weights) for epoch in range(num_epochs): print_perf(epoch, W) for idxs in batch_idxs:
def test_likelihood_gradient(): quick_grad_check(logprob_two_moons, np.atleast_2d(np.array([0.1, 0.2]).T))
# on both the input to the original function (x), and the output of the # original function (ans). def make_grad_logsumexp(ans, x): # If you want to be able to take higher-order derivatives, then all the # code inside this function must be itself differentiable by autograd. def gradient_product(g): # This closure multiplies g with the Jacobian of logsumexp (d_ans/d_x). # Because autograd uses reverse-mode differentiation, g contains # the gradient of the objective w.r.t. ans, the output of logsumexp. return np.full(x.shape, g) * np.exp(x - np.full(x.shape, ans)) return gradient_product # Now we tell autograd that logsumexmp has a gradient-making function. logsumexp.defgrad(make_grad_logsumexp) if __name__ == '__main__': # Now we can use logsumexp() inside a larger function that we want # to differentiate. def example_func(y): z = y**2 lse = logsumexp(z) return np.sum(lse) grad_of_example = grad(example_func) print("Gradient: ", grad_of_example(npr.randn(10))) # Check the gradients numerically, just to be safe. quick_grad_check(example_func, npr.randn(10))
# The reason for the closure is so that the gradient can depend # on both the input to the original function (x), and the output of the # original function (ans). def make_grad_logsumexp(ans, x): # If you want to be able to take higher-order derivatives, then all the # code inside this function must be itself differentiable by autograd. def gradient_product(g): # This closure multiplies g with the Jacobian of logsumexp (d_ans/d_x). # Because autograd uses reverse-mode differentiation, g contains # the gradient of the objective w.r.t. ans, the output of logsumexp. return np.full(x.shape, g) * np.exp(x - np.full(x.shape, ans)) return gradient_product # Now we tell autograd that logsumexmp has a gradient-making function. logsumexp.defgrad(make_grad_logsumexp) if __name__ == '__main__': # Now we can use logsumexp() inside a larger function that we want # to differentiate. def example_func(y): z = y**2 lse = logsumexp(z) return np.sum(lse) grad_of_example = grad(example_func) print("Gradient: ", grad_of_example(npr.randn(10))) # Check the gradients numerically, just to be safe. quick_grad_check(example_func, npr.randn(10))
#Compute the gradient w.r.t the first input variable x1 g_x1_f = grad(f, 0) #Compute the gradient w.r.t the second input variable x2 g_x2_f = grad(f, 1) #Evaluate and print the value of the function at x1=1, x2=2 print(f(1, 2)) #Produces 2.23 #Evaluate and print the value of the gradient w.r.t x1 at x1=1, x2=2 print(g_x1_f(1, 2)) #Produces 0.44 #Evaluate and print the value of the gradient w.r.t x2 at x1=1, x2=2 print(g_x2_f(1, 2)) #Produces 0.89 from autograd.util import quick_grad_check #Define the function def f(x1, x2): return numpy.sqrt(x1 * x1 + x2 * x2) #Computes and checks the gradient for the given values quick_grad_check(f, 1.0, extra_args=[2.0]) #Output # #Checking gradient of <function f at 0x10504bed8> at 1.0 #Gradient projection OK #(numeric grad: 0.447213595409, analytic grad: 0.
#Wrapper Around Numpy import autograd.numpy as numpy #Function to generate gradients from autograd import grad #Define the function def f(x1, x2): return numpy.sqrt(x1 * x1 + x2 * x2) #Compute the gradient w.r.t the first input variable x1 g_x1_f = grad(f,0) #Compute the gradient w.r.t the second input variable x2 g_x2_f = grad(f,1) #Evaluate and print the value of the function at x1=1, x2=2 print( f(1,2) ) #Produces 2.23 #Evaluate and print the value of the gradient w.r.t x1 at x1=1, x2=2 print( g_x1_f(1,2) ) #Produces 0.44 #Evaluate and print the value of the gradient w.r.t x2 at x1=1, x2=2 print( g_x2_f(1,2) ) #Produces 0.89 from autograd.util import quick_grad_check #Define the function def f(x1, x2): return numpy.sqrt(x1 * x1 + x2 * x2) #Computes and checks the gradient for the given values quick_grad_check(f,1.0,extra_args=[2.0]) #Output # #Checking gradient of <function f at 0x10504bed8> at 1.0 #Gradient projection OK #(numeric grad: 0.447213595409, analytic grad: 0.
def test_deprecated_quick_grad_check_wrapper(): from autograd.util import quick_grad_check with warnings.catch_warnings(record=True) as w: quick_grad_check(lambda x, y: x**2 + y, 1., (2.,))
train_images = partial_flatten(train_images) / 255.0 test_images = partial_flatten(test_images) / 255.0 train_labels = one_hot(train_labels, 10) test_labels = one_hot(test_labels, 10) N_data = train_images.shape[0] # Make neural net functions N_weights, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg) loss_grad = grad(loss_fun) # Initialize weights rs = npr.RandomState() W = rs.randn(N_weights) * param_scale # Check the gradients numerically, just to be safe quick_grad_check(loss_fun, W, (train_images, train_labels)) print " Epoch | Train err | Test error " def print_perf(epoch, W): test_perf = frac_err(W, test_images, test_labels) train_perf = frac_err(W, train_images, train_labels) print "{0:15}|{1:15}|{2:15}".format(epoch, train_perf, test_perf) # Train with sgd batch_idxs = make_batches(N_data, batch_size) cur_dir = np.zeros(N_weights) for epoch in range(num_epochs): print_perf(epoch, W) for idxs in batch_idxs: grad_W = loss_grad(W, train_images[idxs], train_labels[idxs])
def run_aevb(train_images): start_time = time.time() # Create aevb function # Training parameters D = train_images.shape[1] enc_layers = [D, hidden_units, 2*latent_dimensions] dec_layers = [latent_dimensions, hidden_units, D] N_weights_enc, encoder, encoder_log_like = make_gaussian_nn(enc_layers) N_weights_dec, decoder, decoder_log_like = make_binary_nn(dec_layers) # Optimize aevb batch_size = 100 num_training_iters = 1600 rs = npr.RandomState(0) num_steps = 0 init_enc_w = rs.randn(N_weights_enc) * param_scale init_dec_w = rs.randn(N_weights_dec) * param_scale init_output_weights = 0.1*rs.randn(num_steps, latent_dimensions) init_transform_weights = 0.1*rs.randn(num_steps, latent_dimensions) init_biases = 0.1*rs.randn(num_steps) flow_sample, parser = build_flow_sampler_with_inputs(latent_dimensions, num_steps) parser.add_shape('encoder weights',len(init_enc_w)) parser.add_shape('decoder weights',len(init_dec_w)) sampler_params = np.zeros(len(parser)) parser.put(sampler_params, 'output weights', init_output_weights) parser.put(sampler_params, 'transform weights', init_transform_weights) parser.put(sampler_params, 'biases', init_biases) parser.put(sampler_params,'encoder weights', init_enc_w) parser.put(sampler_params,'decoder weights', init_dec_w) batch_idxs = make_batches(train_images.shape[0], batch_size) log_prior = build_logprob_standard_normal(latent_dimensions) def batch_value_and_grad(weights, iter): iter = iter % len(batch_idxs) cur_data = train_images[batch_idxs[iter]] return lower_bound(weights,parser,flow_sample,encoder,decoder_log_like,log_prior,N_weights_enc,cur_data,samples_per_image,latent_dimensions,rs) lb_grad = grad(batch_value_and_grad) def lb_grad_check(weights): return batch_value_and_grad(weights,0) quick_grad_check(lb_grad_check,sampler_params) print 'checked!' kill def callback(params, i, grad): ml = batch_value_and_grad(params,i) print "log marginal likelihood:", ml #Generate samples num_samples = 100 images_per_row = 10 zs = rs.randn(num_samples,latent_dimensions) samples = decoder(parser.get(params, 'decoder weights'), zs) # samples = np.random.binomial(1,decoder(parser.get(params, 'decoding weights'), zs)) fig = plt.figure(1) fig.clf() ax = fig.add_subplot(111) plot_images(samples, ax, ims_per_row=images_per_row) plt.savefig('samples.png') final_params = adam(lb_grad, sampler_params, num_training_iters, callback=callback) def decoder_with_weights(zs): return decoder(parser.get(final_params, 'decoding weights'), zs) return decoder_with_weights finish_time = time.time() print "total runtime", finish_time - start_time
# Hmc should be greater than Hbound assert Hmc_hi > Hbound, "bound isn't lower ya dope (%2.3f not greater than %2.3f)" % ( Hmc_hi, Hbound) print "Gap percentiles [1, 50, 99] %s" % str( np.percentile(gaps, [1, 50, 99])) ######################################### # test per mu_n function and gradient # ######################################### n = 0 lbn, lbs = make_lower_bound_MoGn(theta, n, s2min=1e-7) thn = theta[n, :D] assert np.isclose(lower_bound_MoG(theta), lbn(thn)), "per n is bad" from autograd.util import quick_grad_check, nd quick_grad_check(lbn, thn) print "Hessiandiag, numeric hessian diag" hlbn = hessian(lbn) print np.diag(hlbn(thn)) hdiag = numeric_hessian_diag(lbn, thn) print hdiag ##################################### # Test NVPI on a small, 2d example # ##################################### from vbproj.vboost import mog means = np.array([[1., 1.], [-1., -1.], [-1, 1]]) covs = np.array([2 * np.eye(2), 1 * np.eye(2), 1 * np.eye(2)]) icovs = np.array([np.linalg.inv(c) for c in covs])
print(training_text.replace('\n', ' ') + "|" + predicted_text.replace('\n', ' ')) # Wrap function to only have one argument, for scipy.minimize. def training_loss(weights): return -loglike_fun(weights, train_inputs, train_inputs) def callback(weights): print("Train loss:", training_loss(weights)) print_training_prediction(weights) # Build gradient of loss function using autograd. training_loss_and_grad = value_and_grad(training_loss) init_weights = npr.randn(num_weights) * param_scale # Check the gradients numerically, just to be safe quick_grad_check(training_loss, init_weights) print("Training LSTM...") result = minimize(training_loss_and_grad, init_weights, jac=True, method='CG', options={'maxiter':train_iters}, callback=callback) trained_weights = result.x print("\nGenerating text from LSTM model...") num_letters = 30 for t in range(20): text = "" for i in range(num_letters): seqs = string_to_one_hot(text, output_size)[:, np.newaxis, :] logprobs = pred_fun(trained_weights, seqs)[-1].ravel() text += chr(npr.choice(len(logprobs), p=np.exp(logprobs))) print(text)
def callback(weights): print "Train loss:", loss_fun(weights, train_inputs, train_targets) print_training_prediction(weights, train_inputs, train_targets) # Build gradient of loss function using autograd. loss_and_grad = grad(loss_fun, return_function_value=True) # Wrap function to only have one argument, for scipy.minimize. def training_loss_and_grad(weights): return loss_and_grad(weights, train_inputs, train_targets) init_weights = npr.randn(num_weights) * param_scale # Check the gradients numerically, just to be safe quick_grad_check(loss_fun, init_weights, (train_inputs, train_targets)) print "Training RNN..." result = minimize(training_loss_and_grad, init_weights, jac=True, method='CG', options={'maxiter': train_iters}, callback=callback) trained_weights = result.x print print "Generating text from RNN..." num_letters = 30 for t in xrange(20): text = " "
# Make neural net functions N_weights, pred_fun, loss_fun, frac_err = \ cv.make_nn_funs(input_shape, layer_specs, L2_reg) loss_grad = grad(loss_fun) # Initialize weights rs = npr.RandomState() W = rs.randn(N_weights) * param_scale print loss_fun(W, train_images[:10], samps[:10]) print %lprun -m conv_net \ loss_grad(W, train_images[:10], samps[:10]) # Check the gradients numerically, just to be safe quick_grad_check(loss_fun, W, (train_images[:10], samps[:10])) ######################################################################## # Plotting/Printing Funcs ######################################################################## print(" Epoch | Train err | Best error ") best_w = W.copy() best_loss = np.inf def callback(x, i, g): train_perf = loss_fun(x, train_images, samps) global best_loss global best_w if train_perf < best_loss: best_w = x.copy() best_loss = train_perf print("{0:15}|{1:15}|{2:15}".format(i, "%2.5g"%train_perf, best_loss))
label_probabilities = preds * targets + (1 - preds) * (1 - targets) return -np.sum(np.log(label_probabilities)) # Build a toy dataset. inputs = np.array([[0.52, 1.12, 0.77], [0.88, -1.08, 0.15], [0.52, 0.06, -1.30], [0.74, -2.49, 1.39]]) targets = np.array([True, True, False, True]) # Build a function that returns gradients of training loss using autograd. cost_grad = grad(cost) # Check the gradients numerically, just to be safe. weights = np.array([0.0, 0.0, 0.0]) quick_grad_check(cost, weights) # Optimize weights using gradient descent. print "Initial loss:", cost(weights) momentum = 0 for i in xrange(1000): # print cost_grad(weights) momentum = cost_grad(weights) + momentum*0.8 weights -= momentum # print cost(weights) print "Trained loss:", cost(weights) print weights weights = np.array([0.0, 0.0, 0.0]) [x, f, d] = lbfgs(func=cost, x0=weights, fprime=cost_grad)
# Wrap function to only have one argument, for scipy.minimize. def training_loss(weights): return -loglike_fun(weights, train_inputs, train_inputs) def callback(weights): print("Train loss:", training_loss(weights)) print_training_prediction(weights) # Build gradient of loss function using autograd. training_loss_and_grad = value_and_grad(training_loss) init_weights = npr.randn(num_weights) * param_scale # Check the gradients numerically, just to be safe quick_grad_check(training_loss, init_weights) print("Training LSTM...") result = minimize(training_loss_and_grad, init_weights, jac=True, method='CG', options={'maxiter': train_iters}, callback=callback) trained_weights = result.x print("\nGenerating text from LSTM model...") num_letters = 30 for t in range(20): text = "" for i in range(num_letters):
preds = sigmoid(np.dot(inputs, weights)) label_probabilities = preds * targets + (1 - preds) * (1 - targets) return -np.sum(np.log(label_probabilities)) # Build a toy dataset. inputs = np.array([[0.52, 1.12, 0.77], [0.88, -1.08, 0.15], [0.52, 0.06, -1.30], [0.74, -2.49, 1.39]]) targets = np.array([True, True, False, True]) # Build a function that returns gradients of training loss using autograd. cost_grad = grad(cost) # Check the gradients numerically, just to be safe. weights = np.array([0.0, 0.0, 0.0]) quick_grad_check(cost, weights) # Optimize weights using gradient descent. print "Initial loss:", cost(weights) momentum = 0 for i in xrange(1000): # print cost_grad(weights) momentum = cost_grad(weights) + momentum * 0.8 weights -= momentum # print cost(weights) print "Trained loss:", cost(weights) print weights weights = np.array([0.0, 0.0, 0.0]) [x, f, d] = lbfgs(func=cost, x0=weights, fprime=cost_grad)