Example #1
0
    def __init__(self,
                 params,
                 cost_fun,
                 batch_generator=None,
                 stepsize_schedule=ConstantStepsizeSchedule(0.01),
                 burn_in_steps=3000,
                 session=tf.get_default_session(),
                 dtype=tf.float64,
                 seed=None):
        """
        Initializes the corresponding MCMCSampler super object and
        sets member variables.

        Parameters
        ----------
        params : list of `tensorflow.Variable` objects
            Target parameters for which we want to sample new values.

        cost_fun : callable
            Function that takes `params` as input and returns a
            1-d `tensorflow.Tensor` that contains the cost-value.
            Frequently denoted with `U` in literature.

        batch_generator : `BatchGenerator`, optional
            Iterable which returns dictionaries to feed into
            tensorflow.Session.run() calls to evaluate the cost function.
            Defaults to `None` which indicates that no batches shall be fed.

        stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule
            Iterator class that produces a stream of stepsize values that
            we can use in our samplers.
            See also: `pysgmcmc.stepsize_schedules`

        burn_in_steps : int
            Number of burn-in steps to perform. In each burn-in step, this
            sampler will adapt its own internal parameters to decrease its error.
            Defaults to `3000`.

        session : `tensorflow.Session`, optional
            Session object which knows about the external part of the graph
            (which defines `Cost`, and possibly batches).
            Used internally to evaluate (burn-in/sample) the sampler.

        dtype : tensorflow.DType, optional
            Type of elements of `tensorflow.Tensor` objects used in this sampler.
            Defaults to `tensorflow.float64`.

        seed : int, optional
            Random seed to use.
            Defaults to `None`.

        See also
        ----------
        pysgmcmc.sampling.MCMCSampler:
            Super class of this class. Has generic methods shared by all
            MCMC samplers implemented as part of this framework.

        pysgmcmc.samplers.sghmc.SGHMCSampler:
            Instantiation of this class.
            Uses SGHMC to sample from the target distribution after burn-in.

        pysgmcmc.samplers.sgld.SGLDSampler:
            Instantiation of this class.
            Uses SGLD to sample from the target distribution after burn-in.

        """
        # Sanitize inputs
        assert isinstance(burn_in_steps, int)

        super().__init__(params=params,
                         cost_fun=cost_fun,
                         stepsize_schedule=stepsize_schedule,
                         batch_generator=batch_generator,
                         seed=seed,
                         dtype=dtype,
                         session=session)

        self.burn_in_steps = burn_in_steps
Example #2
0
    def __init__(self,
                 params,
                 cost_fun,
                 batch_generator=None,
                 stepsize_schedule=ConstantStepsizeSchedule(0.01),
                 burn_in_steps=3000,
                 mdecay=0.05,
                 scale_grad=1.0,
                 session=tf.get_default_session(),
                 dtype=tf.float64,
                 seed=None):
        """ Initialize the sampler parameters and set up a tensorflow.Graph
            for later queries.

        parameters
        ----------
        params : list of tensorflow.Variable objects
            Target parameters for which we want to sample new values.

        cost_fun : callable
            Function that takes `params` as input and returns a
            1-d `tensorflow.Tensor` that contains the cost-value.
            Frequently denoted with `U` in literature.

        batch_generator : iterable, optional
            Iterable which returns dictionaries to feed into
            tensorflow.Session.run() calls to evaluate the cost function.
            Defaults to `None` which indicates that no batches shall be fed.

        stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule
            Iterator class that produces a stream of stepsize values that
            we can use in our samplers.
            See also: `pysgmcmc.stepsize_schedules`

        burn_in_steps : int, optional
            Number of burn-in steps to perform. In each burn-in step, this
            sampler will adapt its own internal parameters to decrease its error.
            Defaults to `3000`.\n
            For reference see:
            `Bayesian Optimization with Robust Bayesian Neural Networks. <http://aad.informatik.uni-freiburg.de/papers/16-NIPS-BOHamiANN.pdf>`_

        mdecay : float, optional
            (Constant) momentum decay per time-step.
            Defaults to `0.05`.\n
            For reference see:
            `Bayesian Optimization with Robust Bayesian Neural Networks. <http://aad.informatik.uni-freiburg.de/papers/16-NIPS-BOHamiANN.pdf>`_

        scale_grad : float, optional
            Value that is used to scale the magnitude of the noise used
            during sampling. In a typical batches-of-data setting this usually
            corresponds to the number of examples in the entire dataset.
            Defaults to `1.0` which corresponds to no scaling.

        session : tensorflow.Session, optional
            Session object which knows about the external part of the graph
            (which defines `Cost`, and possibly batches).
            Used internally to evaluate (burn-in/sample) the sampler.

        dtype : tensorflow.DType, optional
            Type of elements of `tensorflow.Tensor` objects used in this sampler.
            Defaults to `tensorflow.float64`.

        seed : int, optional
            Random seed to use.
            Defaults to `None`.

        See Also
        ----------
        pysgmcmc.sampling.BurnInMCMCSampler:
            Base class for `SGHMCSampler` that specifies how actual sampling
            is performed (using iterator protocol, e.g. `next(sampler)`).

        """

        # Set up BurnInMCMCSampler base class:
        # initialize member variables common to all samplers
        # and run initializers for all uninitialized variables in `params`
        # (to avoid errors in the graph definitions below).
        super().__init__(params=params,
                         cost_fun=cost_fun,
                         burn_in_steps=burn_in_steps,
                         batch_generator=batch_generator,
                         seed=seed,
                         dtype=dtype,
                         session=session,
                         stepsize_schedule=stepsize_schedule)

        #  Initialize graph constants {{{ #

        noise = tf.constant(0., name="noise", dtype=dtype)

        scale_grad = tf.constant(scale_grad, dtype=dtype, name="scale_grad")

        epsilon_scaled = tf.divide(self.epsilon,
                                   tf.sqrt(scale_grad),
                                   name="epsilon_scaled")

        mdecay = tf.constant(mdecay, name="mdecay", dtype=dtype)

        #  }}} Initialize graph constants #

        grads = [
            vectorize(gradient)
            for gradient in tf.gradients(self.cost, params)
        ]

        #  Initialize internal sampler parameters {{{ #

        tau = [
            tf.Variable(tf.ones_like(param, dtype=dtype),
                        dtype=dtype,
                        name="tau_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        r = [
            tf.Variable(1. / (tau[i].initialized_value() + 1),
                        name="R_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        g = [
            tf.Variable(tf.ones_like(param, dtype=dtype),
                        dtype=dtype,
                        name="g_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        v_hat = [
            tf.Variable(tf.ones_like(param, dtype=dtype),
                        dtype=dtype,
                        name="v_hat_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        # Initialize Mass matrix inverse
        minv = [
            tf.Variable(tf.divide(tf.constant(1., dtype=dtype),
                                  tf.sqrt(v_hat[i].initialized_value())),
                        name="minv_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        # Initialize momentum
        V = [
            tf.Variable(tf.zeros_like(param, dtype=dtype),
                        dtype=dtype,
                        name="v_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        #  }}} Initialize internal sampler parameters #

        self.minv_t = [None] * len(params)  # gets burned-in

        # R_t = 1/ (tau + 1), shouldn't it be: 1 / tau according to terms?
        # It is not, and changing it to that breaks everything!
        # Why?

        for i, (param, grad) in enumerate(zip(params, grads)):
            vectorized_param = self.vectorized_params[i]
            #  Burn-in logic {{{ #
            r_t = tf.assign(r[i], 1. / (tau[i] + 1), name="r_t_{}".format(i))

            # r_t should always use the old value of tau
            with tf.control_dependencies([r_t]):
                tau_t = tf.assign_add(
                    tau[i],
                    safe_divide(-g[i] * g[i] * tau[i], v_hat[i]) + 1,
                    name="tau_t_{}".format(i))

                # minv = v_hat^{-1/2} = 1 / sqrt(v_hat)
                self.minv_t[i] = tf.assign(minv[i],
                                           safe_divide(1.,
                                                       safe_sqrt(v_hat[i])),
                                           name="minv_t_{}".format(i))
                # tau_t, minv_t should always use the old values of G, v_hat
                with tf.control_dependencies([tau_t, self.minv_t[i]]):
                    g_t = tf.assign_add(g[i],
                                        -r_t * g[i] + r_t * grad,
                                        name="g_t_{}".format(i))

                    v_hat_t = tf.assign_add(v_hat[i],
                                            -r_t * v_hat[i] + r_t * grad**2,
                                            name="v_hat_t_{}".format(i))

                    #  }}} Burn-in logic #

                    with tf.control_dependencies([g_t, v_hat_t]):

                        #  Draw random normal sample {{{ #

                        # Equation 10, variance of normal sample

                        # 2 * epsilon ** 2 * mdecay * Minv - 0 (noise is 0) - epsilon ** 4
                        # = 2 * epsilon ** 2 * epsilon * v_hat^{-1/2} * C * Minv
                        # = 2 * epsilon ** 3 * v_hat^{-1/2} * C * v_hat^{-1/2} - epsilon ** 4

                        # (co-) variance of normal sample
                        noise_scale = (
                            tf.constant(2., dtype=dtype) *
                            epsilon_scaled**tf.constant(2., dtype=dtype) *
                            mdecay * self.minv_t[i] -
                            tf.constant(2., dtype=dtype) *
                            epsilon_scaled**tf.constant(3., dtype) *
                            tf.square(self.minv_t[i]) * noise -
                            epsilon_scaled**4)

                        # turn into stddev
                        sigma = tf.sqrt(tf.maximum(noise_scale, 1e-16),
                                        name="sigma_{}".format(i))

                        sample = self._draw_noise_sample(
                            sigma=sigma, shape=vectorized_param.shape)

                        #  }}} Draw random sample #

                        #  HMC Update {{{ #

                        # Equation 10: right side, where:
                        # Minv = v_hat^{-1/2}, Mdecay = epsilon * v_hat^{-1/2} C
                        v_t = tf.assign_add(
                            V[i],
                            -self.epsilon**2 * self.minv_t[i] * grad -
                            mdecay * V[i] + sample,
                            name="v_t_{}".format(i))

                        # Equation 10: left side
                        vectorized_Theta_t = tf.assign_add(
                            vectorized_param, v_t)

                        self.theta_t[i] = tf.assign(
                            param,
                            unvectorize(vectorized_Theta_t,
                                        original_shape=param.shape),
                            name="theta_t_{}".format(i))
Example #3
0
    def __init__(self,
                 params,
                 cost_fun,
                 batch_generator=None,
                 stepsize_schedule=ConstantStepsizeSchedule(0.01),
                 session=tf.get_default_session(),
                 dtype=tf.float64,
                 seed=None):
        """
        Initialize the sampler base class. Sets up member variables and
        initializes uninitialized target parameters in the current
        `tensorflow.Graph`.

        Parameters
        ------------
        params : list of `tensorflow.Variable` objects
            Target parameters for which we want to sample new values.

        cost_fun : callable
            Function that takes `params` as input and returns a
            1-d `tensorflow.Tensor` that contains the cost-value.
            Frequently denoted with `U` in literature.

        batch_generator : `BatchGenerator`, optional
            Iterable which returns dictionaries to feed into
            tensorflow.Session.run() calls to evaluate the cost function.
            Defaults to `None` which indicates that no batches shall be fed.

        stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule
            Iterator class that produces a stream of stepsize values that
            we can use in our samplers.
            See also: `pysgmcmc.stepsize_schedules`

        session : `tensorflow.Session`, optional
            Session object which knows about the external part of the graph
            (which defines `cost`, and possibly batches).
            Used internally to evaluate (burn-in/sample) the sampler.

        dtype : tensorflow.DType, optional
            Type of elements of `tensorflow.Tensor` objects used in this sampler.
            Defaults to `tensorflow.float64`.

        seed : int, optional
            Random seed to use.
            Defaults to `None`.

        See Also
        ------------
        pysgmcmc.sampling.BurnInMCMCSampler:
            Abstract base class for samplers that perform a burn-in phase
            to tune their own hyperparameters.
            Inherits from `sampling.MCMCSampler`.

        """
        # Sanitize inputs
        assert batch_generator is None or hasattr(batch_generator, "__next__")
        assert seed is None or isinstance(seed, int)

        assert isinstance(session, (tf.Session, tf.InteractiveSession))
        assert isinstance(dtype, tf.DType)

        assert callable(cost_fun)

        self.dtype = dtype

        self.n_iterations = 0

        self.seed = seed

        assert hasattr(stepsize_schedule, "update")
        assert hasattr(stepsize_schedule, "__next__")
        assert hasattr(stepsize_schedule, "initial_value")

        self.stepsize_schedule = stepsize_schedule

        self.batch_generator = batch_generator
        self.session = session

        self.params = params

        # set up costs
        self.cost_fun = cost_fun
        self.cost = cost_fun(self.params)

        # compute vectorized clones of all parameters
        self.vectorized_params = [vectorize(param) for param in self.params]

        self.epsilon = tf.Variable(self.stepsize_schedule.initial_value,
                                   dtype=self.dtype,
                                   name="epsilon",
                                   trainable=False)

        # Initialize uninitialized parameters before usage in any sampler.
        init = tf.variables_initializer(
            uninitialized_params(session=self.session,
                                 params=self.params + self.vectorized_params +
                                 [self.epsilon]))
        self.session.run(init)

        # query this later to determine the next sample
        self.theta_t = [None] * len(params)
Example #4
0
    def __init__(self,
                 params,
                 cost_fun,
                 batch_generator=None,
                 stepsize_schedule=ConstantStepsizeSchedule(0.01),
                 burn_in_steps=3000,
                 A=1.0,
                 scale_grad=1.0,
                 session=tf.get_default_session(),
                 dtype=tf.float64,
                 seed=None):
        """ Initialize the sampler parameters and set up a tensorflow.Graph
            for later queries.

        Parameters
        ----------
        params : list of tensorflow.Variable objects
            Target parameters for which we want to sample new values.

        cost_fun : callable
            Function that takes `params` as input and returns a
            1-d `tensorflow.Tensor` that contains the cost-value.
            Frequently denoted with `U` in literature.

        batch_generator : BatchGenerator, optional
            Iterable which returns dictionaries to feed into
            tensorflow.Session.run() calls to evaluate the cost function.
            Defaults to `None` which indicates that no batches shall be fed.

        stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule
            Iterator class that produces a stream of stepsize values that
            we can use in our samplers.
            See also: `pysgmcmc.stepsize_schedules`

        burn_in_steps: int, optional
            Number of burn-in steps to perform. In each burn-in step, this
            sampler will adapt its own internal parameters to decrease its error.
            Defaults to `3000`.\n
            For reference see:
            `Bayesian Optimization with Robust Bayesian Neural Networks. <http://aad.informatik.uni-freiburg.de/papers/16-NIPS-BOHamiANN.pdf>`_

        A : float, optional
            TODO Doku
            Defaults to `1.0`.

        scale_grad : float, optional
            Value that is used to scale the magnitude of the noise used
            during sampling. In a typical batches-of-data setting this usually
            corresponds to the number of examples in the entire dataset.

        session : tensorflow.Session, optional
            Session object which knows about the external part of the graph
            (which defines `cost`, and possibly batches).
            Used internally to evaluate (burn-in/sample) the sampler.

        dtype : tensorflow.DType, optional
            Type of elements of `tensorflow.Tensor` objects used in this sampler.
            Defaults to `tensorflow.float64`.

        seed : int, optional
            Random seed to use.
            Defaults to `None`.

        See Also
        ----------
        tensorflow_mcmc.sampling.mcmc_base_classes.BurnInMCMCSampler:
            Base class for `SGLDSampler` that specifies how actual sampling
            is performed (using iterator protocol, e.g. `next(sampler)`).

        """

        super().__init__(params=params,
                         cost_fun=cost_fun,
                         batch_generator=batch_generator,
                         burn_in_steps=burn_in_steps,
                         seed=seed,
                         session=session,
                         dtype=dtype)

        n_params = len(params)

        #  Initialize graph constants {{{ #

        A = tf.constant(A, name="A", dtype=dtype)
        noise = tf.constant(0., name="noise", dtype=dtype)
        scale_grad = tf.constant(scale_grad, name="scale_grad", dtype=dtype)

        #  }}} Initialize graph constants #

        grads = [
            vectorize(gradient)
            for gradient in tf.gradients(self.cost, params)
        ]

        #  Initialize internal sampler parameters {{{ #

        tau = [
            tf.Variable(tf.ones_like(param, dtype=dtype),
                        dtype=dtype,
                        name="tau_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        R = [
            tf.Variable(1. / (tau[i].initialized_value() + 1),
                        name="R_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        g = [
            tf.Variable(tf.ones_like(param, dtype=dtype),
                        dtype=dtype,
                        name="g_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        v_hat = [
            tf.Variable(tf.ones_like(param, dtype=dtype),
                        dtype=dtype,
                        name="v_hat_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        #  Initialize mass matrix inverse {{{ #

        minv = [
            tf.Variable(tf.divide(tf.constant(1., dtype=dtype),
                                  tf.sqrt(v_hat[i].initialized_value())),
                        name="minv_{}".format(i),
                        trainable=False)
            for i, param in enumerate(self.vectorized_params)
        ]

        #  }}} Initialize mass matrix inverse #

        #  }}} Initialize internal sampler parameters #

        self.minv_t = [None] * n_params  # gets burned-in

        for i, (param, grad) in enumerate(zip(params, grads)):

            vectorized_param = self.vectorized_params[i]

            #  Burn-in logic {{{ #
            r_t = tf.assign(R[i], 1. / (tau[i] + 1.), name="r_t_{}".format(i))
            # r_t should always use the old value of tau
            with tf.control_dependencies([r_t]):
                tau_t = tf.assign_add(
                    tau[i],
                    safe_divide(-g[i] * g[i] * tau[i], v_hat[i]) + 1,
                    name="tau_t_{}".format(i))

                self.minv_t[i] = tf.assign(minv[i],
                                           safe_divide(1.,
                                                       safe_sqrt(v_hat[i])),
                                           name="minv_t_{}".format(i))
                # tau_t, minv_t should always use the old values of g, g2
                with tf.control_dependencies([tau_t, self.minv_t[i]]):
                    g_t = tf.assign_add(g[i],
                                        -r_t * g[i] + r_t * grad,
                                        name="g_t_{}".format(i))

                    v_hat_t = tf.assign_add(v_hat[i],
                                            -r_t * v_hat[i] + r_t * grad**2,
                                            name="v_hat_t_{}".format(i))

                    #  }}} Burn-in logic #
                    with tf.control_dependencies([g_t, v_hat_t]):
                        #  Draw random sample {{{ #

                        sigma = safe_sqrt(2. * self.epsilon * safe_divide(
                            (self.minv_t[i] * (A - noise)), scale_grad))

                        sample = self._draw_noise_sample(
                            sigma=sigma, shape=vectorized_param.shape)

                        #  }}} Draw random sample #

                        #  SGLD Update {{{ #

                        vectorized_theta_t = tf.assign_add(
                            vectorized_param,
                            -self.epsilon * self.minv_t[i] * A * grad + sample,
                        )
                        self.theta_t[i] = tf.assign(
                            param,
                            unvectorize(vectorized_theta_t,
                                        original_shape=param.shape),
                            name="Theta_t_{}".format(i))
Example #5
0
    def __init__(self,
                 params,
                 cost_fun,
                 tf_scope="default",
                 batch_generator=None,
                 stepsize_schedule=ConstantStepsizeSchedule(0.001),
                 mass=1.0,
                 speed_of_light=0.5,
                 D=1.0,
                 Bhat=0.0,
                 session=tf.get_default_session(),
                 dtype=tf.float64,
                 seed=None):
        """ Initialize the sampler parameters and set up a tensorflow.Graph
            for later queries.

        Parameters
        ----------
        params : list of tensorflow.Variable objects
            Target parameters for which we want to sample new values.

        Cost : tensorflow.Tensor
            1-d Cost tensor that depends on `params`.
            Frequently denoted as U(theta) in literature.

        batch_generator : BatchGenerator, optional
            Iterable which returns dictionaries to feed into
            tensorflow.Session.run() calls to evaluate the cost function.
            Defaults to `None` which indicates that no batches shall be fed.

        stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule
            Iterator class that produces a stream of stepsize values that
            we can use in our samplers.
            See also: `pysgmcmc.stepsize_schedules`

        mass : float, optional
            mass constant.
            Defaults to `1.0`.

        speed_of_light : float, optional
            "Speed of light" constant. TODO EXTEND DOKU
            Defaults to `1.0`.

        D : float, optional
            Diffusion constant.
            Defaults to `1.0`.

        Bhat : float, optional
            TODO: Documentation

        session : tensorflow.Session, optional
            Session object which knows about the external part of the graph
            (which defines `Cost`, and possibly batches).
            Used internally to evaluate (burn-in/sample) the sampler.

        dtype : tensorflow.DType, optional
            Type of elements of `tensorflow.Tensor` objects used in this sampler.
            Defaults to `tensorflow.float64`.

        seed : int, optional
            Random seed to use.
            Defaults to `None`.

        See Also
        ----------
        pysgmcmc.sampling.MCMCSampler:
            Base class for `RelativisticSGHMCSampler` that specifies how
            actual sampling is performed (using iterator protocol,
            e.g. `next(sampler)`).

        """

        # Set up MCMCSampler base class:
        # initialize member variables common to all samplers
        # and run initializers for all uninitialized variables in `params`
        # (to avoid errors in the graph definitions below).
        super().__init__(params=params,
                         cost_fun=cost_fun,
                         batch_generator=batch_generator,
                         tf_scope=tf_scope,
                         stepsize_schedule=stepsize_schedule,
                         seed=seed,
                         dtype=dtype,
                         session=session)

        # Use `-self.Cost` since the rest of the implementation expects
        # a log likelihood (instead of the *negative* log likelihood that
        # we normally use as costs)
        grads = [
            vectorize(gradient)
            for gradient in tf.gradients(-self.cost, params)
        ]

        with tf.variable_scope(tf_scope, reuse=tf.AUTO_REUSE):

            D = tf.constant(D, dtype=dtype)
            b_hat = tf.constant(Bhat, dtype=dtype)

            # In internal implementation, stick to mathematical formulas.
            # For users, prefer readability.
            m = tf.constant(mass, dtype=dtype)
            c = tf.constant(speed_of_light, dtype=dtype)

        momentum = []

        for i in range(len(params)):
            momentum_params = []

            for momentum_sample in _sample_relativistic_momentum(
                    m=mass,
                    c=speed_of_light,
                    n_params=self.vectorized_params[i].shape[0],
                    seed=self.seed):
                momentum_params.append(momentum_sample)

            momentum_params = tf.reshape(momentum_params,
                                         self.vectorized_params[i].shape)
            momentum_params = tf.Variable(momentum_params, dtype=dtype)
            momentum.append(momentum_params)

        # momentum = [
        #     tf.Variable(momentum_sample, dtype=dtype)
        #     for momentum_sample in _sample_relativistic_momentum(
        #         m=mass, c=speed_of_light, n_params=len(self.params), seed=self.seed
        #     )
        # ]

        # # In internal implementation, stick to mathematical formulas.
        # # For users, prefer readability.
        # m = tf.constant(mass, dtype=dtype)
        # c = tf.constant(speed_of_light, dtype=dtype)

        for i, (param, grad) in enumerate(zip(params, grads)):
            vectorized_param = self.vectorized_params[i]

            p_grad = self.epsilon * momentum[i] / (
                m * tf.sqrt(momentum[i] * momentum[i] /
                            (tf.square(m) * tf.square(c)) + 1))

            n = tf.sqrt(
                self.epsilon *
                (2 * D - self.epsilon * b_hat)) * tf.random_normal(
                    shape=vectorized_param.shape, dtype=dtype, seed=seed)

            momentum_t = tf.assign_add(
                momentum[i],
                tf.reshape(self.epsilon * grad + n - D * p_grad,
                           momentum[i].shape))

            p_grad_new = self.epsilon * momentum_t / (
                m * tf.sqrt(momentum_t * momentum_t /
                            (tf.square(m) * tf.square(c)) + 1))
            vectorized_theta_t = tf.assign_add(
                vectorized_param, tf.reshape(p_grad_new,
                                             vectorized_param.shape))

            self.theta_t[i] = tf.assign(
                param,
                unvectorize(vectorized_theta_t, original_shape=param.shape))
    def __init__(self,
                 session,
                 sampling_method=Sampler.SGHMC,
                 get_net=get_default_net,
                 batch_generator=generate_batches,
                 batch_size=20,
                 stepsize_schedule=ConstantStepsizeSchedule(np.sqrt(1e-4)),
                 n_nets=100,
                 n_iters=50000,
                 burn_in_steps=1000,
                 sample_steps=100,
                 normalize_input=True,
                 normalize_output=True,
                 seed=None,
                 dtype=tf.float64,
                 **sampler_kwargs):
        """
        Bayesian Neural Networks use Bayesian methods to estimate the posterior
        distribution of a neural network's weights. This allows to also
        predict uncertainties for test points and thus makes Bayesian Neural
        Networks suitable for Bayesian optimization.

        This module uses stochastic gradient MCMC methods to sample
        from the posterior distribution.

        See [1] for more details.

        [1] J. T. Springenberg, A. Klein, S. Falkner, F. Hutter
            Bayesian Optimization with Robust Bayesian Neural Networks.
            In Advances in Neural Information Processing Systems 29 (2016).

        Parameters
        ----------
        session: tensorflow.Session
            A `tensorflow.Session` object used to delegate computations
            performed in this network over to `tensorflow`.

        sampling_method : Sampler, optional
            Method used to sample networks for this BNN.
            Defaults to `Sampler.SGHMC`.

        n_nets: int, optional
            Number of nets to sample during training (and use to predict).
            Defaults to `100`.

        stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule
            Iterator class that produces a stream of stepsize values that
            we can use during sampling.
            See also: `pysgmcmc.stepsize_schedules`

        mdecay: float, optional
            Momentum decay per time-step (parameter for SGHMCSampler).
            Defaults to `0.05`.

        n_iters: int, optional
            Total number of iterations of the sampler to perform.
            Defaults to `50000`

        batch_size: int, optional
            Number of datapoints to include in each minibatch.
            Defaults to `20` datapoints per minibatch.

        burn_in_steps: int, optional
            Number of burn-in steps to perform
            Defaults to `1000`.

        sample_steps: int, optional
            Number of sample steps to perform.
            Defaults to `100`.

        normalize_input: bool, optional
            Specifies whether or not input data should be normalized.
            Defaults to `True`

        normalize_output: bool, optional
            Specifies whether or not outputs should be normalized.
            Defaults to `True`

        get_net: callable, optional
            Callable that returns a network specification.
            Expected inputs are a `tensorflow.Placeholder` object that
            serves as feedable input to the network and an integer random seed.
            Expected return value is the networks final output.
            Defaults to `get_default_net`.

        batch_generator: callable, optional
            TODO: DOKU
            NOTE: Generator callable with signature like generate_batches that
            yields feedable dicts of minibatches.

        seed: int, optional
            Random seed to use in this BNN.
            Defaults to `None`.

        dtype : tf.DType, optional
            Tensorflow datatype to use for internal representation.
            Defaults to `None`.

        """

        # Sanitize inputs
        assert isinstance(n_nets, int)
        assert isinstance(n_iters, int)
        assert isinstance(burn_in_steps, int)
        assert isinstance(sample_steps, int)
        assert isinstance(batch_size, int)

        assert isinstance(dtype, tf.DType)

        assert n_nets > 0
        assert n_iters > 0
        assert burn_in_steps >= 0
        assert sample_steps > 0
        assert batch_size > 0

        assert callable(get_net)
        assert callable(batch_generator)

        assert hasattr(stepsize_schedule, "update")
        assert hasattr(stepsize_schedule, "__next__")

        if not Sampler.is_supported(sampling_method):
            raise ValueError(
                "'BayesianNeuralNetwork.__init__' received unsupported input "
                "for parameter 'sampling_method'. Input was: {input}.\n"
                "Supported sampling methods are enumerated in "
                "'Sampler' enum type.".format(input=sampling_method))

        self.sampling_method = sampling_method

        self.stepsize_schedule = stepsize_schedule

        self.get_net = get_net
        self.batch_generator = batch_generator

        self.normalize_input = normalize_input
        self.normalize_output = normalize_output

        self.n_nets = n_nets
        self.n_iters = n_iters

        self.batch_size = batch_size

        self.sampler_kwargs = sampler_kwargs

        self.burn_in_steps = burn_in_steps
        self.sample_steps = sample_steps

        self.samples = deque(maxlen=n_nets)

        self.seed = seed

        self.dtype = dtype

        self.session = session

        self.is_trained = False
Example #7
0
    def __init__(self,
                 particles,
                 cost_fun,
                 batch_generator=None,
                 stepsize_schedule=ConstantStepsizeSchedule(0.1),
                 alpha=0.9,
                 fudge_factor=1e-6,
                 session=tf.get_default_session(),
                 dtype=tf.float64,
                 seed=None):
        """ Initialize the sampler parameters and set up a tensorflow.Graph
            for later queries.

        Parameters
        ----------
        particles : List[tensorflow.Variable]
            List of particles each representing a (different) guess of the
            target parameters of this sampler.

        cost_fun : callable
            Function that takes `params` of *one* particle as input and
            returns a 1-d `tensorflow.Tensor` that contains the cost-value.
            Frequently denoted with `U` in literature.

        batch_generator : iterable, optional
            Iterable which returns dictionaries to feed into
            tensorflow.Session.run() calls to evaluate the cost function.
            Defaults to `None` which indicates that no batches shall be fed.

        stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule
            Iterator class that produces a stream of stepsize values that
            we can use in our samplers.
            See also: `pysgmcmc.stepsize_schedules`

        alpha : float, optional
            TODO DOKU
            Defaults to `0.9`.

        fudge_factor : float, optional
            TODO DOKU
            Defaults to `1e-6`.

        session : tensorflow.Session, optional
            Session object which knows about the external part of the graph
            (which defines `Cost`, and possibly batches).
            Used internally to evaluate (burn-in/sample) the sampler.

        dtype : tensorflow.DType, optional
            Type of elements of `tensorflow.Tensor` objects used in this sampler.
            Defaults to `tensorflow.float64`.

        seed : int, optional
            Random seed to use.
            Defaults to `None`.

        See Also
        ----------
        pysgmcmc.sampling.MCMCSampler:
            Base class for `SteinVariationalGradientDescentSampler` that
            specifies how actual sampling is performed (using iterator protocol,
            e.g. `next(sampler)`).

        """

        assert isinstance(alpha, (int, float))
        assert isinstance(fudge_factor, (int, float))
        assert callable(cost_fun)

        self.particles = tf.stack(particles)

        def cost_fun_wrapper(params):
            return tf.map_fn(lambda particle: cost_fun(particle),
                             self.particles)

        cost_fun_wrapper.__name__ = cost_fun.__name__

        super().__init__(params=particles,
                         cost_fun=cost_fun_wrapper,
                         batch_generator=batch_generator,
                         session=session,
                         seed=seed,
                         dtype=dtype,
                         stepsize_schedule=stepsize_schedule)

        fudge_factor = tf.constant(fudge_factor,
                                   dtype=self.dtype,
                                   name="fudge_factor")

        self.epsilon = tf.Variable(stepsize_schedule.initial_value,
                                   dtype=self.dtype,
                                   name="stepsize")

        self.n_particles = tf.cast(self.particles.shape[0], self.dtype)

        historical_grad = tf.get_variable("historical_grad",
                                          self.particles.shape,
                                          dtype=dtype,
                                          initializer=tf.zeros_initializer())

        self.session.run(
            tf.variables_initializer([historical_grad, self.epsilon]))

        lnpgrad = tf.squeeze(tf.gradients(self.cost, self.particles))

        kernel_matrix, kernel_gradients = self.svgd_kernel(self.particles)

        grad_theta = tf.divide(
            tf.matmul(kernel_matrix, lnpgrad) + kernel_gradients,
            self.n_particles)

        historical_grad_t = tf.assign(
            historical_grad,
            alpha * historical_grad + (1. - alpha) * (grad_theta**2))

        adj_grad = tf.divide(grad_theta,
                             fudge_factor + tf.sqrt(historical_grad_t))

        for i, param in enumerate(self.params):
            self.theta_t[i] = tf.assign_sub(param, self.epsilon * adj_grad[i])
Example #8
0
    def _init_basic(self, params, cost_fun, tf_scope="default", batch_generator=None,
                 stepsize_schedule=ConstantStepsizeSchedule(0.01),
                 session=tf.get_default_session(), dtype=tf.float64, seed=None):
        # Sanitize inputs
        assert batch_generator is None or hasattr(batch_generator, "__next__")
        assert seed is None or isinstance(seed, int)

        # assert isinstance(session, (tf.Session, tf.InteractiveSession))
        assert isinstance(dtype, tf.DType)

        # assert callable(cost_fun)

        self.tf_scope = tf_scope

        self.dtype = dtype

        self.n_iterations = 0

        self.seed = seed

        assert hasattr(stepsize_schedule, "update")
        assert hasattr(stepsize_schedule, "__next__")
        assert hasattr(stepsize_schedule, "initial_value")

        self.stepsize_schedule = stepsize_schedule

        self.batch_generator = batch_generator
        self.session = session

        self.params = params

        # set up costs
        self.cost_fun = cost_fun
        self.cost = cost_fun # cost_fun(self.params)

        # compute vectorized clones of all parameters
        with tf.variable_scope(self.tf_scope, reuse=tf.AUTO_REUSE):
            self.vectorized_params = []

            for i, param in enumerate(self.params):
                self.vectorized_params.append(tf.get_variable(
                    initializer=tf.concat([tf.reshape(par.initialized_value(), (-1,)) for par in param], axis=0),
                    name="%s/particle_%s" % (self.tf_scope, i)
                ))

            # self.vectorized_params = tf.stack(self.vectorized_params)

            self.epsilon = tf.get_variable(
                initializer=self.stepsize_schedule.initial_value,
                dtype=self.dtype,
                name="epsilon",
                trainable=False
            )

        # Initialize uninitialized parameters before usage in any sampler.
        init = tf.variables_initializer(
            uninitialized_params(
                session=self.session,
                params=self.vectorized_params + [self.epsilon]
                # params=self.params + self.vectorized_params + [self.epsilon]
            )
        )
        self.session.run(init)

        # query this later to determine the next sample
        self.theta_t = [None] * len(params) * len(params[0])