Exemple #1
0
def initialise_adaptor(theta, L, grad_L, num_adaption_steps, delta, sigma0,
                       use_dense_mass_matrix):
    """
    Creates a generator that terminates by returning an instance of the
    pints.DualAveragingAdaption.

    Initialisation of the adaptor requires a 'reasonable' epsilon which
    is in turn also a generator. The find_reasonable_epsilon generator
    terminates with return of a 'reasonable' epsilon. Intermediate returns
    are the current position of the leapfrog integrator.
    """

    # pick the initial inverse mass matrix as the provided sigma0.
    # reduce to a diagonal matrix if not using a dense mass matrix
    if use_dense_mass_matrix:
        init_inv_mass_matrix = sigma0
        init_inv_mass_matrix = 1e-3 * np.eye(len(theta))
    else:
        init_inv_mass_matrix = np.diag(sigma0)
        init_inv_mass_matrix = 1e-3 * np.ones(len(theta))

    # find a good value to start epsilon at (this will later be refined so that
    # the acceptance probability matches delta)
    epsilon = yield from find_reasonable_epsilon(theta, L, grad_L,
                                                 init_inv_mass_matrix)

    # create adaption for epsilon and mass matrix
    return pints.DualAveragingAdaption(num_adaption_steps, delta, epsilon,
                                       init_inv_mass_matrix)
    def test_use_dense_mass_matrix(self):
        num_warmup_steps = 200
        target_accept_prob = 1.0
        init_epsilon = 1.0
        init_inv_mass_matrix = np.array([[1, 0], [0, 1]])

        averager = pints.DualAveragingAdaption(num_warmup_steps,
                                               target_accept_prob,
                                               init_epsilon,
                                               init_inv_mass_matrix)

        self.assertTrue(averager.use_dense_mass_matrix())

        init_inv_mass_matrix = np.array([1, 1])
        averager = pints.DualAveragingAdaption(num_warmup_steps,
                                               target_accept_prob,
                                               init_epsilon,
                                               init_inv_mass_matrix)

        self.assertFalse(averager.use_dense_mass_matrix())
    def test_set_inv_mass(self):
        num_warmup_steps = 200
        target_accept_prob = 1.0
        init_epsilon = 1.0
        init_inv_mass_matrix = np.array([[1, 0], [0, 0]])

        with StreamCapture() as c:
            with self.assertRaises(AttributeError):
                pints.DualAveragingAdaption(num_warmup_steps,
                                            target_accept_prob, init_epsilon,
                                            init_inv_mass_matrix)
            self.assertIn("WARNING", c.text())
    def test_accept_prob_of_greater_then_one(self):
        num_warmup_steps = 200
        target_accept_prob = 1.0
        init_epsilon = 1.0
        init_inv_mass_matrix = np.array([[1, 0], [0, 1]])

        averager = pints.DualAveragingAdaption(num_warmup_steps,
                                               target_accept_prob,
                                               init_epsilon,
                                               init_inv_mass_matrix)

        # should adjust the input 2.0 to 1.0, which is the target accept
        # probability
        averager.adapt_epsilon(2.0)
        self.assertEqual(averager._h_bar, 0.0)
Exemple #5
0
def nuts_sampler(x0, delta, num_adaption_steps, sigma0, hamiltonian_threshold,
                 max_tree_depth, use_dense_mass_matrix):
    """
    The dual averaging NUTS mcmc sampler given in Algorithm 6 of [1]_.
    Implements the multinomial sampling suggested in [2]_. Implements a mass
    matrix for the dynamics, which is detailed in [2]_. Both the step size and
    the mass matrix is adapted using a combination of the dual averaging
    detailed in [1]_ and the windowed adaption for the mass matrix and step
    size implemented in the Stan library (https://github.com/stan-dev/stan)

    Implemented as a coroutine that continually generates new theta values to
    evaluate (L, L') at. Users must send (L, L') back to the coroutine to
    continue execution. The end of an mcmc step is signalled by generating a
    tuple of values (theta, L, acceptance probability, number of leapfrog
    steps)

    Arguments
    ---------

    x0: ndarray
        starting point
    delta: float
        target acceptance probability (Dual Averaging scheme)
    num_adaption_steps: int
        number of adaption steps (Dual Averaging scheme)
    hamiltonian_threshold: float
        threshold to test divergent iterations
    max_tree_depth: int
        maximum tree depth
    use_dense_mass_matrix: bool
        if False, use a diagonal mass matrix, if True use a fully dense mass
        matrix

    References
    ----------
    .. [1] Hoffman, M. D., & Gelman, A. (2014). The No-U-Turn sampler:
           adaptively setting path lengths in Hamiltonian Monte Carlo.
           Journal of Machine Learning Research, 15(1), 1593-1623.

    .. [2] Betancourt, M. (2018). `A Conceptual Introduction to Hamiltonian
           Monte Carlo`, https://arxiv.org/abs/1701.02434.

    """
    # Initialise sampler with x0 and calculate logpdf
    theta = x0
    L, grad_L = (yield theta)

    # Check first point is somewhere sensible
    if not np.isfinite(L):
        raise ValueError('Initial point for MCMC must have finite logpdf.')

    # pick the initial inverse mass matrix as the provided sigma0.
    # reduce to a diagonal matrix if not using a dense mass matrix
    if use_dense_mass_matrix:
        init_inv_mass_matrix = sigma0
        init_inv_mass_matrix = 1e-3 * np.eye(len(x0))
    else:
        init_inv_mass_matrix = np.diag(sigma0)
        init_inv_mass_matrix = 1e-3 * np.ones(len(x0))

    # find a good value to start epsilon at (this will later be refined so that
    # the acceptance probability matches delta)
    epsilon = yield from find_reasonable_epsilon(theta, L, grad_L,
                                                 init_inv_mass_matrix)

    # create adaption for epsilon and mass matrix
    adaptor = pints.DualAveragingAdaption(num_adaption_steps, delta, epsilon,
                                          init_inv_mass_matrix)

    # start at iteration 1
    m = 1

    # provide an infinite generator of mcmc steps....
    while True:
        # randomly sample momentum
        if use_dense_mass_matrix:
            r0 = np.random.multivariate_normal(np.zeros(len(theta)),
                                               adaptor.get_mass_matrix())
        else:
            r0 = np.random.normal(np.zeros(len(theta)),
                                  np.sqrt(adaptor.get_mass_matrix()))

        hamiltonian0 = L - kinetic_energy(r0, adaptor.get_inv_mass_matrix())

        # create initial integration path state
        state = NutsState(theta=theta,
                          r=r0,
                          L=L,
                          grad_L=grad_L,
                          n=0.0,
                          s=1,
                          alpha=1,
                          n_alpha=1,
                          divergent=False,
                          inv_mass_matrix=adaptor.get_inv_mass_matrix())
        j = 0

        # build up an integration path with 2^j points, stopping when we either
        # encounter a U-Turn, or reach a max number of points 2^max_tree_depth
        while j < max_tree_depth and state.s == 1:

            # pick a random direction to integrate in
            # (to maintain detailed balance)
            if np.random.randint(0, 2):
                vj = 1
            else:
                vj = -1

            # recursivly build up tree in that direction
            state_dash = yield from \
                build_tree(state, vj, j, adaptor,
                           hamiltonian0, hamiltonian_threshold)

            state.update(state_dash, direction=vj, root=True)

            j += 1

        # update current position in chain
        theta = state.theta
        L = state.L
        grad_L = state.grad_L

        # adapt epsilon and mass matrix using dual averaging
        restart_stepsize_adapt = \
            adaptor.step(state.theta, state.alpha / state.n_alpha)
        if restart_stepsize_adapt:
            epsilon = yield from \
                find_reasonable_epsilon(theta, L, grad_L,
                                        adaptor.get_inv_mass_matrix())
            adaptor.init_adapt_epsilon(epsilon)

        # signal calling process that mcmc step is complete by passing a tuple
        # (rather than an ndarray)
        yield (theta, L, grad_L, state.alpha / state.n_alpha, state.n_alpha,
               state.divergent)

        # next step
        m += 1
    def test_dual_averaging(self):

        num_warmup_steps = 200
        target_accept_prob = 0.5
        init_epsilon = 1.0
        init_inv_mass_matrix = np.array([[1, 0], [0, 1]])
        target_mass_matrix = np.array([[10, 0], [0, 10]])

        # raises an exception if the requested number of warm-up steps is
        # too low
        with self.assertRaises(ValueError):
            averager = pints.DualAveragingAdaption(10, target_accept_prob,
                                                   init_epsilon,
                                                   init_inv_mass_matrix)

        averager = pints.DualAveragingAdaption(num_warmup_steps,
                                               target_accept_prob,
                                               init_epsilon,
                                               init_inv_mass_matrix)

        # test initialisation
        self.assertEqual(averager._epsilon, init_epsilon)
        np.testing.assert_array_equal(averager.get_inv_mass_matrix(),
                                      init_inv_mass_matrix)
        self.assertEqual(averager._counter, 0)

        # these are the default window sizes for the algorithm
        initial_window = 75
        base_window = 25
        terminal_window = 50

        self.assertEqual(averager._next_window, initial_window + base_window)
        self.assertEqual(averager._adapting, True)

        # dummy function to generate acceptance probabilities
        # dual averaging will attempt to set epsilon so this function
        # returns `target_accept_prob`
        def fake_accept_prob(epsilon):
            return 1.0 / (10.0 * epsilon)

        stored_x = np.empty((2, base_window))
        for i in range(averager._next_window - 1):
            x = np.random.multivariate_normal(
                np.zeros(2) + 123, target_mass_matrix)
            restart = averager.step(x, fake_accept_prob(averager._epsilon))
            self.assertFalse(restart)
            if i >= averager._initial_window:
                stored_x[:, i - averager._initial_window] = x

        # before the end of the window the mass matrix should not have been
        # updated
        np.testing.assert_array_equal(averager.get_inv_mass_matrix(),
                                      init_inv_mass_matrix)
        x = np.random.multivariate_normal(
            np.zeros(2) + 123, target_mass_matrix)

        np.testing.assert_array_equal(averager._samples[:, :-1],
                                      stored_x[:, :-1])
        restart = averager.step(x, fake_accept_prob(averager._epsilon))

        # end of window triggers a restart
        self.assertTrue(restart)
        stored_x[:, -1] = x

        cov = np.cov(stored_x)
        n = base_window
        p = 2
        adapted_cov = (n / (n + 5.0)) * cov + \
            1e-3 * (5.0 / (n + 5.0)) * np.eye(p)
        np.testing.assert_array_equal(averager.get_inv_mass_matrix(),
                                      adapted_cov)
        np.testing.assert_array_equal(averager.get_mass_matrix(),
                                      np.linalg.inv(adapted_cov))

        # test that we have adapted epsilon correctly
        self.assertAlmostEqual(fake_accept_prob(averager._epsilon),
                               target_accept_prob, 1)

        # test the counters
        self.assertEqual(averager._counter, initial_window + base_window)
        self.assertEqual(averager._next_window,
                         num_warmup_steps - terminal_window)

        # test counters for two more windows
        for i in range(averager._next_window - averager._counter):
            x = np.random.multivariate_normal(
                np.zeros(2) + 123, target_mass_matrix)
            averager.step(x, fake_accept_prob(averager._epsilon))

        self.assertEqual(averager._counter, num_warmup_steps - terminal_window)
        self.assertEqual(averager._next_window, num_warmup_steps)

        for i in range(averager._next_window - averager._counter):
            x = np.random.multivariate_normal(
                np.zeros(2) + 123, target_mass_matrix)
            averager.step(x, fake_accept_prob(averager._epsilon))

        self.assertEqual(averager._counter, num_warmup_steps)
        self.assertEqual(averager._adapting, False)

        # check that subsequent steps do nothing
        old_counter = averager._counter
        averager.step(x, fake_accept_prob(averager._epsilon))
        self.assertEqual(old_counter, averager._counter)