def test_sample_from_1D_gaussian(self): energy_norm_cst = 0.5 * np.log(2 * 3.141592) U = lambda q: energy_norm_cst + (q ** 2).sum() / 2 grad_U = lambda q: q.sum() # This won't work as well with epsilon = 1.e-3 # or with L = 10. It'll get something close, but # the histogram plot will look difform. epsilon = 0.1 L = 10 N = 100000 N_accepted_counter = 0 X = np.zeros((N, 1)) # starts at the origin with X[0,:] for n in np.arange(1, N): X[n, :] = hmc.one_step_update(U, grad_U, epsilon, L, X[n - 1, :]) if X[n, :] != X[n - 1, :]: N_accepted_counter = N_accepted_counter + 1 sample_mean = X.mean() sample_var = X.var() self.assertTrue(abs(sample_mean) < 0.01) self.assertTrue(abs(sample_var - 1.0) < 0.01) if False: print "sample mean : %0.6f" % sample_mean print "sample var : %0.6f" % sample_var print "acceptance ration : %0.6f" % (N_accepted_counter * 1.0 / N,) import matplotlib matplotlib.use("Agg") import pylab import os pylab.hist(X[:, 0], 50) pylab.draw() pylab.savefig(os.path.join("/u/alaingui/umontreal/cae.py/plots/", "debug_HMC.png"), dpi=300) pylab.close()
def perform_one_update(the_dae, X, noise_stddev, L, epsilon, simulate_only=False): # We'll format the parameters of the DAE so that they # fit into the mold of hamiltonian_monte_carlo.one_step_update, # we'll call that function and then reformat the parameters. # # In particular, this means that we'll transform (W, b, c) # into the vector of position for the Hamiltonian Monte Carlo. # Helper functions. One being the opposite of the other. def serialize_params_as_q(W, b, c): return np.hstack((W.reshape((-1, )), b.reshape( (-1, )), c.reshape((-1, )))).reshape((-1, )) def read_params_from_q(q, n_inputs, n_hiddens): n_elems_W = n_inputs * n_hiddens n_elems_b = n_inputs n_elems_c = n_hiddens W = q[0:n_elems_W].reshape((n_inputs, n_hiddens)).copy() b = q[n_elems_W:n_elems_W + n_elems_b].reshape((n_elems_b, )).copy() c = q[n_elems_W + n_elems_b:n_elems_W + n_elems_b + n_elems_c].reshape( (n_elems_c, )).copy() return (W, b, c) # We'll reuse the same perturbed_X for all the evaluations # of the potential during the leapfrog. if noise_stddev > 0.0: perturbed_X = X + np.random.normal(scale=noise_stddev, size=X.shape) else: perturbed_X = X.copy() def U(q): # the dataset X is baked into the definition of U(q) (W, b, c) = read_params_from_q(q, the_dae.n_inputs, the_dae.n_hiddens) (loss, _, _) = the_dae.theano_loss(W, b, c, perturbed_X, X) return loss.sum() def grad_U(q): (W, b, c) = read_params_from_q(q, the_dae.n_inputs, the_dae.n_hiddens) (grad_W, grad_b, grad_c) = the_dae.theano_gradients(W, b, c, perturbed_X, X) # We can use the same function to spin the gradients into the # momentum vector. This works well because # grad_W.shape == W.shape # grad_b.shape == b.shape # grad_c.shape == c.shape return serialize_params_as_q(grad_W, grad_b, grad_c) current_q = serialize_params_as_q(the_dae.W, the_dae.b, the_dae.c) updated_q = hmc.one_step_update(U, grad_U, epsilon, L, current_q, verbose=True) if the_dae.logging.has_key('hmc'): if np.any(current_q != updated_q): # it would make more sense to store the acceptance likelihood # instead, but right now we don't pass it as extra returned value the_dae.logging['hmc']['acceptance_history'].append(1) else: the_dae.logging['hmc']['acceptance_history'].append(0) else: the_dae.logging['hmc'] = {} the_dae.logging['hmc']['acceptance_history'] = [] # Our function U(q) is mutating the dae in a way that makes # it necessary that we rewrite the parameters values at the # end even if current_q == updated_q. (the_dae.W, the_dae.b, the_dae.c) = read_params_from_q(updated_q, the_dae.n_inputs, the_dae.n_hiddens)
def perform_one_update(the_dae, X, noise_stddev, L, epsilon, simulate_only = False): # We'll format the parameters of the DAE so that they # fit into the mold of hamiltonian_monte_carlo.one_step_update, # we'll call that function and then reformat the parameters. # # In particular, this means that we'll transform (W, b, c) # into the vector of position for the Hamiltonian Monte Carlo. # Helper functions. One being the opposite of the other. def serialize_params_as_q(W, b, c): return np.hstack((W.reshape((-1,)), b.reshape((-1,)), c.reshape((-1,)))).reshape((-1,)) def read_params_from_q(q, n_inputs, n_hiddens): n_elems_W = n_inputs * n_hiddens n_elems_b = n_inputs n_elems_c = n_hiddens W = q[0 : n_elems_W].reshape((n_inputs, n_hiddens)).copy() b = q[n_elems_W : n_elems_W + n_elems_b].reshape((n_elems_b,)).copy() c = q[n_elems_W + n_elems_b : n_elems_W + n_elems_b + n_elems_c].reshape((n_elems_c,)).copy() return (W, b, c) # We'll reuse the same perturbed_X for all the evaluations # of the potential during the leapfrog. if noise_stddev > 0.0: perturbed_X = X + np.random.normal(scale=noise_stddev, size=X.shape) else: perturbed_X = X.copy() def U(q): # the dataset X is baked into the definition of U(q) (W, b, c) = read_params_from_q(q, the_dae.n_inputs, the_dae.n_hiddens) (loss, _, _) = the_dae.theano_loss(W, b, c, perturbed_X, X) return loss.sum() def grad_U(q): (W, b, c) = read_params_from_q(q, the_dae.n_inputs, the_dae.n_hiddens) (grad_W, grad_b, grad_c) = the_dae.theano_gradients(W, b, c, perturbed_X, X) # We can use the same function to spin the gradients into the # momentum vector. This works well because # grad_W.shape == W.shape # grad_b.shape == b.shape # grad_c.shape == c.shape return serialize_params_as_q(grad_W, grad_b, grad_c) current_q = serialize_params_as_q(the_dae.W, the_dae.b, the_dae.c) updated_q = hmc.one_step_update(U, grad_U, epsilon, L, current_q, verbose=True) if the_dae.logging.has_key('hmc'): if np.any(current_q != updated_q): # it would make more sense to store the acceptance likelihood # instead, but right now we don't pass it as extra returned value the_dae.logging['hmc']['acceptance_history'].append(1) else: the_dae.logging['hmc']['acceptance_history'].append(0) else: the_dae.logging['hmc'] = {} the_dae.logging['hmc']['acceptance_history'] = [] # Our function U(q) is mutating the dae in a way that makes # it necessary that we rewrite the parameters values at the # end even if current_q == updated_q. (the_dae.W, the_dae.b, the_dae.c) = read_params_from_q(updated_q, the_dae.n_inputs, the_dae.n_hiddens)