def test_sample_from_1D_gaussian(self):

        energy_norm_cst = 0.5 * np.log(2 * 3.141592)

        U = lambda q: energy_norm_cst + (q ** 2).sum() / 2
        grad_U = lambda q: q.sum()

        # This won't work as well with epsilon = 1.e-3
        # or with L = 10. It'll get something close, but
        # the histogram plot will look difform.
        epsilon = 0.1
        L = 10
        N = 100000

        N_accepted_counter = 0
        X = np.zeros((N, 1))
        # starts at the origin with X[0,:]
        for n in np.arange(1, N):
            X[n, :] = hmc.one_step_update(U, grad_U, epsilon, L, X[n - 1, :])
            if X[n, :] != X[n - 1, :]:
                N_accepted_counter = N_accepted_counter + 1

        sample_mean = X.mean()
        sample_var = X.var()

        self.assertTrue(abs(sample_mean) < 0.01)
        self.assertTrue(abs(sample_var - 1.0) < 0.01)

        if False:

            print "sample mean : %0.6f" % sample_mean
            print "sample var  : %0.6f" % sample_var
            print "acceptance ration : %0.6f" % (N_accepted_counter * 1.0 / N,)

            import matplotlib

            matplotlib.use("Agg")
            import pylab
            import os

            pylab.hist(X[:, 0], 50)
            pylab.draw()
            pylab.savefig(os.path.join("/u/alaingui/umontreal/cae.py/plots/", "debug_HMC.png"), dpi=300)
            pylab.close()
def perform_one_update(the_dae,
                       X,
                       noise_stddev,
                       L,
                       epsilon,
                       simulate_only=False):

    # We'll format the parameters of the DAE so that they
    # fit into the mold of hamiltonian_monte_carlo.one_step_update,
    # we'll call that function and then reformat the parameters.
    #
    # In particular, this means that we'll transform (W, b, c)
    # into the vector of position for the Hamiltonian Monte Carlo.

    # Helper functions. One being the opposite of the other.
    def serialize_params_as_q(W, b, c):
        return np.hstack((W.reshape((-1, )), b.reshape(
            (-1, )), c.reshape((-1, )))).reshape((-1, ))

    def read_params_from_q(q, n_inputs, n_hiddens):
        n_elems_W = n_inputs * n_hiddens
        n_elems_b = n_inputs
        n_elems_c = n_hiddens

        W = q[0:n_elems_W].reshape((n_inputs, n_hiddens)).copy()
        b = q[n_elems_W:n_elems_W + n_elems_b].reshape((n_elems_b, )).copy()
        c = q[n_elems_W + n_elems_b:n_elems_W + n_elems_b + n_elems_c].reshape(
            (n_elems_c, )).copy()

        return (W, b, c)

    # We'll reuse the same perturbed_X for all the evaluations
    # of the potential during the leapfrog.
    if noise_stddev > 0.0:
        perturbed_X = X + np.random.normal(scale=noise_stddev, size=X.shape)
    else:
        perturbed_X = X.copy()

    def U(q):
        # the dataset X is baked into the definition of U(q)
        (W, b, c) = read_params_from_q(q, the_dae.n_inputs, the_dae.n_hiddens)
        (loss, _, _) = the_dae.theano_loss(W, b, c, perturbed_X, X)
        return loss.sum()

    def grad_U(q):
        (W, b, c) = read_params_from_q(q, the_dae.n_inputs, the_dae.n_hiddens)
        (grad_W, grad_b,
         grad_c) = the_dae.theano_gradients(W, b, c, perturbed_X, X)
        # We can use the same function to spin the gradients into the
        # momentum vector. This works well because
        #   grad_W.shape == W.shape
        #   grad_b.shape == b.shape
        #   grad_c.shape == c.shape
        return serialize_params_as_q(grad_W, grad_b, grad_c)

    current_q = serialize_params_as_q(the_dae.W, the_dae.b, the_dae.c)
    updated_q = hmc.one_step_update(U,
                                    grad_U,
                                    epsilon,
                                    L,
                                    current_q,
                                    verbose=True)

    if the_dae.logging.has_key('hmc'):
        if np.any(current_q != updated_q):
            # it would make more sense to store the acceptance likelihood
            # instead, but right now we don't pass it as extra returned value
            the_dae.logging['hmc']['acceptance_history'].append(1)
        else:
            the_dae.logging['hmc']['acceptance_history'].append(0)
    else:
        the_dae.logging['hmc'] = {}
        the_dae.logging['hmc']['acceptance_history'] = []

    # Our function U(q) is mutating the dae in a way that makes
    # it necessary that we rewrite the parameters values at the
    # end even if current_q == updated_q.
    (the_dae.W, the_dae.b,
     the_dae.c) = read_params_from_q(updated_q, the_dae.n_inputs,
                                     the_dae.n_hiddens)
def perform_one_update(the_dae, X, noise_stddev, L, epsilon, simulate_only  = False):

    # We'll format the parameters of the DAE so that they
    # fit into the mold of hamiltonian_monte_carlo.one_step_update,
    # we'll call that function and then reformat the parameters.
    #
    # In particular, this means that we'll transform (W, b, c)
    # into the vector of position for the Hamiltonian Monte Carlo.

    # Helper functions. One being the opposite of the other.
    def serialize_params_as_q(W, b, c):
        return np.hstack((W.reshape((-1,)),
                          b.reshape((-1,)),
                          c.reshape((-1,)))).reshape((-1,))

    def read_params_from_q(q, n_inputs, n_hiddens):
        n_elems_W = n_inputs * n_hiddens
        n_elems_b = n_inputs
        n_elems_c = n_hiddens

        W = q[0 : n_elems_W].reshape((n_inputs, n_hiddens)).copy()
        b = q[n_elems_W : n_elems_W + n_elems_b].reshape((n_elems_b,)).copy()
        c = q[n_elems_W + n_elems_b : n_elems_W + n_elems_b + n_elems_c].reshape((n_elems_c,)).copy()
        
        return (W, b, c)

    # We'll reuse the same perturbed_X for all the evaluations
    # of the potential during the leapfrog.
    if noise_stddev > 0.0:
        perturbed_X = X + np.random.normal(scale=noise_stddev, size=X.shape)
    else:
        perturbed_X = X.copy()

    def U(q):
        # the dataset X is baked into the definition of U(q)
        (W, b, c) = read_params_from_q(q, the_dae.n_inputs, the_dae.n_hiddens)
        (loss, _, _) = the_dae.theano_loss(W, b, c, perturbed_X, X)
        return loss.sum()

    def grad_U(q):
        (W, b, c) = read_params_from_q(q, the_dae.n_inputs, the_dae.n_hiddens)
        (grad_W, grad_b, grad_c) = the_dae.theano_gradients(W, b, c, perturbed_X, X)
        # We can use the same function to spin the gradients into the
        # momentum vector. This works well because
        #   grad_W.shape == W.shape
        #   grad_b.shape == b.shape
        #   grad_c.shape == c.shape
        return serialize_params_as_q(grad_W, grad_b, grad_c)


    current_q = serialize_params_as_q(the_dae.W, the_dae.b, the_dae.c)
    updated_q = hmc.one_step_update(U, grad_U, epsilon, L, current_q, verbose=True)

    if the_dae.logging.has_key('hmc'):
        if np.any(current_q != updated_q):
            # it would make more sense to store the acceptance likelihood
            # instead, but right now we don't pass it as extra returned value
            the_dae.logging['hmc']['acceptance_history'].append(1)
        else:
            the_dae.logging['hmc']['acceptance_history'].append(0)
    else:
        the_dae.logging['hmc'] = {}
        the_dae.logging['hmc']['acceptance_history'] = []

    # Our function U(q) is mutating the dae in a way that makes
    # it necessary that we rewrite the parameters values at the
    # end even if current_q == updated_q.
    (the_dae.W, the_dae.b, the_dae.c) = read_params_from_q(updated_q, the_dae.n_inputs, the_dae.n_hiddens)