class AdaptiveRandomWalkMetropolisHastings(kernel_base.TransitionKernel): """Adaptive Multisite Random Walk Metropolis Hastings Algorithm. Consider a continuous multivariate random variable X of dimension d, distributed according to a probability distribution function pi(x) known up to a normalising constant. The general principles are outlined by Roberts and Rosenthal (2009)][1]. Specifically we follow Algorithm 6 of Sherlock et al. (2010)[2], in which we update the MCMC chain by proposing from a multivariate Normal random variable, adapting both the variance and correlation structure of the covariance matrix. In pseudo code the algorithm is: ``` Inputs: i, j iteration indices with initial values 0 d number of dimensions (i.e. number of parameters) N total number of steps X[0] initial chain state S = 0.001.eye(d) = initial covariance matrix m[0] = 2.38^2/d the initial variance scalar i.e. covariance_scaling/d t = 0.234 = target_accept_ratio c = 0.01 = covariance_scaling_limiter k = 0.7 = covariance_scaling_reducer pu = 0.95 f = 0.01 = fixed_variance covariance_burnin = 100 pi(.) denotes probability distribution of argument for i = 0,...,N do // Adapt covariance_scaling, m if u[i-1] < pu then // Only adapt if adaptive part was proposed (NB u[-1]=1.0) Let alpha[i-1] = min(1, pi(X*)/pi(X[i-1]) Let z = max(sgn(alpha[i-1] − t), 0) / t - 1 // NB z = (-1, 1/t-1) Update m[i] = m[i-1] . exp[ z . min(c, (i-1)^(−k)) ] end // Adapt covariance matrix, S if i>covariance_burnin then Update j = j + 1 Update S[j] = Cov(X[0,...,i]) end // Propose new state Draw u[i] ~ Uniform(0, 1) if u[i] < pu then // Adaptive part Draw X* ~ MVN(X[i], m[i] . S[j]) else // Fixed part Draw X* ~ MVN(X[i], f . IdentityMatrix / d) end // Perform MH accept/reject Let alpha[i] = min(1, pi(X*)/pi(X[i]) Draw v ~ Uniform(0, 1) if v < alpha[i] then Update X[i+1] = X* else Update X[i+1] = X[i] end Update i = i + 1 end ``` #### Example ```python import numpy as np import tensorflow as tf import tensorflow_probability as tfp # 10-09-2020 use nightly tfd = tfp.distributions dtype = np.float32 # data x = dtype([2.9, 4.2, 8.3, 1.9, 2.6, 1.0, 8.4, 8.6, 7.9, 4.3]) y = dtype([6.2, 7.8, 8.1, 2.7, 4.8, 2.4, 10.7, 9.0, 9.6, 5.7]) # define linear regression model def Model(x): def alpha(): return tfd.Normal(loc=dtype(0.), scale=dtype(1000.)) def beta(): return tfd.Normal(loc=dtype(0.), scale=dtype(100.)) def sigma(): return tfd.Gamma(concentration=dtype(0.1), rate=dtype(0.1)) def y(alpha, beta, sigma): mu = alpha + beta * x return tfd.Normal(mu, scale=sigma) return tfd.JointDistributionNamed(dict( alpha = alpha, beta = beta, sigma = sigma, y = y)) # target log probability of linear model def log_prob(param): alpha, beta, sigma = tf.unstack(param, axis=-1) lp = model.log_prob({'alpha': alpha, 'beta': beta, 'sigma': sigma, 'y': y}) return tf.reduce_sum(lp) # posterior distribution MCMC chain @tf.function def posterior(iterations, burnin, thinning, initial_state): kernel = AdaptiveRandomWalkMetropolisHastings( target_log_prob_fn = log_prob, initial_state = initial_state) return tfp.mcmc.sample_chain( num_results = iterations, current_state = initial_state, kernel = kernel, num_burnin_steps = burnin, num_steps_between_results = thinning, parallel_iterations = 1, trace_fn = lambda state, results: results) # initialize model model = Model(x) initial_state = dtype([0.1, 0.1, 0.1]) # start chain at alpha=0.1, beta=0.1, sigma=0.1 # estimate posterior distribution samples, results = posterior( iterations = 1000, burnin = 0, thinning = 0, initial_state = initial_state) tf.print('\nAcceptance probability:', tf.math.reduce_mean( tf.cast(results.is_accepted, dtype=tf.float32))) tf.print('\nalpha samples:', samples[0]) tf.print('\nbeta samples:', samples[1]) tf.print('\nsigma samples:', samples[2]) ``` #### References [1]: Gareth Roberts, Jeffrey Rosenthal. Examples of Adaptive MCMC. _Journal of Computational and Graphical Statistics_, 2009. http://probability.ca/jeff/ftpdir/adaptex.pdf [2]: Chris Sherlock, Paul Fearnhead, Gareth O. Roberts. The Random Walk Metropolis: Linking Theory and Practice Through a Case Study. _Statistical Science_, 25:172–190, 2010. https://projecteuclid.org/download/pdfview_1/euclid.ss/1290175840 """ def __init__( self, target_log_prob_fn, initial_state, initial_covariance=None, initial_covariance_scaling=2.38**2, covariance_scaling_reducer=0.7, covariance_scaling_limiter=0.01, covariance_burnin=100, target_accept_ratio=0.234, pu=0.95, fixed_variance=0.01, extra_getter_fn=rwm_extra_getter_fn, extra_setter_fn=rwm_extra_setter_fn, log_accept_prob_getter_fn=rwm_log_accept_prob_getter_fn, seed=None, name=None, ): """Initializes this transition kernel. Args: target_log_prob_fn: Python callable which takes an argument like `current_state` and returns its (possibly unnormalized) log-density under the target distribution. initial_state: Python `list` of `Tensor`s representing the initial state of each parameter. initial_covariance: Python `list` of `Tensor`s representing the initial covariance of the proposal. The `initial_covariance` and `initial_state` should have identical `dtype`s and batch dimensions. If `initial_covariance` is `None` then it initialized to a Python `list` of `Tensor`s where each tensor is the identity matrix multiplied by 0.001; the `list` structure will be identical to `initial_state`. The covariance matrix is tuned during the evolution of the MCMC chain. Default value: `None`. initial_covariance_scaling: Python floating point number representing a the initial value of the `covariance_scaling`. The value of `covariance_scaling` is tuned during the evolution of the MCMC chain. Let d represent the number of parameters e.g. as given by the `initial_state`. The ratio given by the `covariance_scaling` divided by d is used to multiply the running covariance. The covariance scaling factor multiplied by the covariance matrix is used in the proposal at each step. Default value: 2.38**2. covariance_scaling_reducer: Python floating point number, bounded over the range (0.5,1.0], representing the constant factor used during the adaptation of the `covariance_scaling`. Default value: 0.7. covariance_scaling_limiter: Python floating point number, bounded between 0.0 and 1.0, which places a limit on the maximum amount the `covariance_scaling` value can be purturbed at each interaction of the MCMC chain. Default value: 0.01. covariance_burnin: Python integer number of steps to take before starting to compute the running covariance. Default value: 100. target_accept_ratio: Python floating point number, bounded between 0.0 and 1.0, representing the target acceptance probability of the Metropolis–Hastings algorithm. Default value: 0.234. pu: Python floating point number, bounded between 0.0 and 1.0, representing the bounded convergence parameter. See `random_walk_mvnorm_fn()` for further details. Default value: 0.95. fixed_variance: Python floating point number representing the variance of the fixed proposal distribution. See `random_walk_mvnorm_fn` for further details. Default value: 0.01. extra_getter_fn: A callable with the signature `(kernel_results) -> extra` where `kernel_results` are the results of the `inner_kernel`, and `extra` is a nested collection of `Tensor`s. extra_setter_fn: A callable with the signature `(kernel_results, args) -> new_kernel_results` where `kernel_results` are the results of the `inner_kernel`, `args` are a nested collection of `Tensor`s with the same structure as returned by the `extra_getter_fn`, and `new_kernel_results` are a copy of `kernel_results` with `args` in the `extra` field set. log_accept_prob_getter_fn: A callable with the signature `(kernel_results) -> log_accept_prob` where `kernel_results` are the results of the `inner_kernel`, and `log_accept_prob` is either a a scalar, or has shape [num_chains]. seed: Python integer to seed the random number generator. Default value: `None`. name: Python `str` name prefixed to Ops created by this function. Default value: `None`. Returns: next_state: Tensor or list of `Tensor`s representing the state(s) of the Markov chain(s) at each result step. Has same shape as `current_state`. kernel_results: `collections.namedtuple` of internal calculations used to advance the chain. Raises: ValueError: if `initial_covariance_scaling` is less than or equal to 0.0. ValueError: if `covariance_scaling_reducer` is less than or equal to 0.5 or greater than 1.0. ValueError: if `covariance_scaling_limiter` is less than 0.0 or greater than 1.0. ValueError: if `covariance_burnin` is less than 0. ValueError: if `target_accept_ratio` is less than 0.0 or greater than 1.0. ValueError: if `pu` is less than 0.0 or greater than 1.0. ValueError: if `fixed_variance` is less than 0.0. """ with tf.name_scope( mcmc_util.make_name(name, "AdaptiveRandomWalkMetropolisHastings", "__init__")) as name: if initial_covariance_scaling <= 0.0: raise ValueError( "`{}` must be a `float` greater than 0.0".format( "initial_covariance_scaling")) if covariance_scaling_reducer <= 0.5 or covariance_scaling_reducer > 1.0: raise ValueError( "`{}` must be a `float` greater than 0.5 and less than or equal to 1.0." .format("covariance_scaling_reducer")) if covariance_scaling_limiter < 0.0 or covariance_scaling_limiter > 1.0: raise ValueError( "`{}` must be a `float` between 0.0 and 1.0.".format( "covariance_scaling_limiter")) if covariance_burnin < 0: raise ValueError( "`{}` must be a `integer` greater or equal to 0.".format( "covariance_burnin")) if target_accept_ratio <= 0.0 or target_accept_ratio > 1.0: raise ValueError( "`{}` must be a `float` between 0.0 and 1.0.".format( "target_accept_ratio")) if pu < 0.0 or pu > 1.0: raise ValueError( "`{}` must be a `float` between 0.0 and 1.0.".format("pu")) if fixed_variance < 0.0: raise ValueError( "`{}` must be a `float` greater than 0.0.".format( "fixed_variance")) if mcmc_util.is_list_like(initial_state): initial_state_parts = list(initial_state) else: initial_state_parts = [initial_state] initial_state_parts = [ tf.convert_to_tensor(s, name="initial_state") for s in initial_state_parts ] shape = tf.stack(initial_state_parts).shape dtype = dtype_util.base_dtype(tf.stack(initial_state_parts).dtype) if initial_covariance is None: initial_covariance = 0.001 * tf.eye( num_rows=shape[-1], dtype=dtype, batch_shape=[shape[0]]) else: initial_covariance = tf.stack(initial_covariance) if mcmc_util.is_list_like(initial_covariance): initial_covariance_parts = list(initial_covariance) else: initial_covariance_parts = [initial_covariance] initial_covariance_parts = [ tf.convert_to_tensor(s, name="initial_covariance") for s in initial_covariance_parts ] self._running_covar = stats.RunningCovariance(shape=(1, shape[-1]), dtype=dtype, event_ndims=1) self._accum_covar = self._running_covar.initialize() probs = tf.expand_dims(tf.ones([shape[0]], dtype=dtype) * pu, axis=1) self._u = Bernoulli(probs=probs, dtype=tf.dtypes.int32) self._initial_u = tf.zeros_like(self._u.sample(seed=seed), dtype=tf.dtypes.int32) name = mcmc_util.make_name(name, "AdaptiveRandomWalkMetropolisHastings", "") seed_stream = SeedStream(seed, salt="AdaptiveRandomWalkMetropolisHastings") self._parameters = dict( target_log_prob_fn=target_log_prob_fn, initial_state=initial_state, initial_covariance=initial_covariance, initial_covariance_scaling=initial_covariance_scaling, covariance_scaling_reducer=covariance_scaling_reducer, covariance_scaling_limiter=covariance_scaling_limiter, covariance_burnin=covariance_burnin, target_accept_ratio=target_accept_ratio, pu=pu, fixed_variance=fixed_variance, extra_getter_fn=extra_getter_fn, extra_setter_fn=extra_setter_fn, log_accept_prob_getter_fn=log_accept_prob_getter_fn, seed=seed, name=name, ) self._impl = metropolis_hastings.MetropolisHastings( inner_kernel=random_walk_metropolis.UncalibratedRandomWalk( target_log_prob_fn=target_log_prob_fn, new_state_fn=random_walk_mvnorm_fn( covariance=initial_covariance_parts, pu=pu, fixed_variance=fixed_variance, is_adaptive=self._initial_u, name=name, ), name=name, ), name=name, ) @property def target_log_prob_fn(self): return self._parameters["target_log_prob_fn"] @property def initial_state(self): return self._parameters["initial_state"] @property def initial_covariance(self): return self._parameters["initial_covariance"] @property def initial_covariance_scaling(self): return self._parameters["initial_covariance_scaling"] @property def covariance_scaling_reducer(self): return self._parameters["covariance_scaling_reducer"] @property def covariance_scaling_limiter(self): return self._parameters["covariance_scaling_limiter"] @property def covariance_burnin(self): return self._parameters["covariance_burnin"] @property def target_accept_ratio(self): return self._parameters["target_accept_ratio"] @property def pu(self): return self._parameters["pu"] @property def fixed_variance(self): return self._parameters["fixed_variance"] def extra_setter_fn( self, kernel_results, num_steps, covariance_scaling, covariance, running_covariance, is_accepted, ): return self._parameters["extra_setter_fn"]( kernel_results, num_steps, covariance_scaling, covariance, running_covariance, is_accepted, ) def extra_getter_fn(self, kernel_results): return self._parameters["extra_getter_fn"](kernel_results) def log_accept_prob_getter_fn(self, kernel_results): return self._parameters["log_accept_prob_getter_fn"](kernel_results) @property def seed(self): return self._parameters["seed"] @property def name(self): return self._parameters["name"] @property def parameters(self): """Return `dict` of ``__init__`` arguments and their values.""" return self._parameters @property def running_covar(self): return self._running_covar @property def u(self): return self._u @property def initial_u(self): return self._initial_u @property def is_calibrated(self): return True def update_covariance_scaling(self, prev_results, num_steps): previous_covar_scaling = self.extra_getter_fn( prev_results).covariance_scaling previous_log_accept_ratio = self.log_accept_prob_getter_fn( prev_results) dtype = dtype_util.base_dtype(previous_covar_scaling.dtype) covariance_scaling_reducer = tf.constant( self.covariance_scaling_reducer, dtype=dtype) covariance_scaling_limiter = tf.constant( self.covariance_scaling_limiter, dtype=dtype) target_accept_ratio = tf.constant(self.target_accept_ratio, dtype=dtype) cond = previous_log_accept_ratio - tf.math.log(target_accept_ratio) multiplier = tf.math.maximum(tf.math.sign(cond), tf.constant( 0.0, dtype)) * (tf.constant(1.0, dtype) / target_accept_ratio) - tf.constant(1.0, dtype) delta = tf.math.minimum( covariance_scaling_limiter, tf.cast(num_steps, dtype=dtype)**(-covariance_scaling_reducer), ) return previous_covar_scaling * tf.math.exp(delta * multiplier) def one_step(self, current_state, previous_kernel_results, seed=None): with tf.name_scope( mcmc_util.make_name(self.name, "AdaptiveRandomWalkMetropolisHastings", "one_step")): with tf.name_scope("initialize"): if mcmc_util.is_list_like(current_state): current_state_parts = list(current_state) else: current_state_parts = [current_state] current_state_parts = [ tf.convert_to_tensor(s, name="current_state") for s in current_state_parts ] # Note 'covariance_scaling' and 'accum_covar' are updated every step but # 'covariance' is not updated until 'num_steps' >= 'covariance_burnin'. num_steps = self.extra_getter_fn(previous_kernel_results).num_steps # for parallel processing efficiency use gather() rather than cond()? previous_is_adaptive = self.extra_getter_fn( previous_kernel_results).is_adaptive current_covariance_scaling = tf.gather( tf.stack( [ self.extra_getter_fn( previous_kernel_results).covariance_scaling, self.update_covariance_scaling(previous_kernel_results, num_steps), ], axis=-1, ), previous_is_adaptive, batch_dims=1, axis=1, ) previous_accum_covar = self.extra_getter_fn( previous_kernel_results).running_covariance current_accum_covar = self.running_covar.update( state=previous_accum_covar, new_sample=current_state_parts) previous_covariance = self.extra_getter_fn( previous_kernel_results).covariance current_covariance = tf.gather( [ previous_covariance, self.running_covar.finalize(current_accum_covar, ddof=1), ], tf.cast( num_steps >= self.covariance_burnin, dtype=tf.dtypes.int32, ), ) current_scaled_covariance = tf.squeeze( tf.expand_dims(current_covariance_scaling, axis=1) * tf.stack([current_covariance]), axis=0, ) current_scaled_covariance = tf.unstack(current_scaled_covariance) if mcmc_util.is_list_like(current_scaled_covariance): current_scaled_covariance_parts = list( current_scaled_covariance) else: current_scaled_covariance_parts = [current_scaled_covariance] current_scaled_covariance_parts = [ tf.convert_to_tensor(s, name="current_scaled_covariance") for s in current_scaled_covariance_parts ] current_is_adaptive = self.u.sample(seed=self.seed) self._impl = metropolis_hastings.MetropolisHastings( inner_kernel=random_walk_metropolis.UncalibratedRandomWalk( target_log_prob_fn=self.target_log_prob_fn, new_state_fn=random_walk_mvnorm_fn( covariance=current_scaled_covariance_parts, pu=self.pu, fixed_variance=self.fixed_variance, is_adaptive=current_is_adaptive, name=self.name, ), name=self.name, ), name=self.name, ) new_state, new_inner_results = self._impl.one_step( current_state, previous_kernel_results) new_inner_results = self.extra_setter_fn( new_inner_results, num_steps + 1, tf.squeeze(current_covariance_scaling, axis=1), current_covariance, current_accum_covar, current_is_adaptive, ) return [new_state, new_inner_results] def bootstrap_results(self, init_state): """Creates initial `state`.""" with tf.name_scope( mcmc_util.make_name(self.name, "AdaptiveRandomWalkMetropolisHastings", "bootstrap_results")): if mcmc_util.is_list_like(init_state): initial_state_parts = list(init_state) else: initial_state_parts = [init_state] initial_state_parts = [ tf.convert_to_tensor(s, name="init_state") for s in initial_state_parts ] shape = tf.stack(initial_state_parts).shape dtype = dtype_util.base_dtype(tf.stack(initial_state_parts).dtype) init_covariance_scaling = tf.cast( tf.repeat([self.initial_covariance_scaling], repeats=[shape[0]], axis=0), dtype=dtype, ) inner_results = self._impl.bootstrap_results(init_state) return self.extra_setter_fn( inner_results, 0, init_covariance_scaling / shape[-1], self.initial_covariance, self._accum_covar, self.initial_u, )
def __init__( self, target_log_prob_fn, initial_state, initial_covariance=None, initial_covariance_scaling=2.38**2, covariance_scaling_reducer=0.7, covariance_scaling_limiter=0.01, covariance_burnin=100, target_accept_ratio=0.234, pu=0.95, fixed_variance=0.01, extra_getter_fn=rwm_extra_getter_fn, extra_setter_fn=rwm_extra_setter_fn, log_accept_prob_getter_fn=rwm_log_accept_prob_getter_fn, seed=None, name=None, ): """Initializes this transition kernel. Args: target_log_prob_fn: Python callable which takes an argument like `current_state` and returns its (possibly unnormalized) log-density under the target distribution. initial_state: Python `list` of `Tensor`s representing the initial state of each parameter. initial_covariance: Python `list` of `Tensor`s representing the initial covariance of the proposal. The `initial_covariance` and `initial_state` should have identical `dtype`s and batch dimensions. If `initial_covariance` is `None` then it initialized to a Python `list` of `Tensor`s where each tensor is the identity matrix multiplied by 0.001; the `list` structure will be identical to `initial_state`. The covariance matrix is tuned during the evolution of the MCMC chain. Default value: `None`. initial_covariance_scaling: Python floating point number representing a the initial value of the `covariance_scaling`. The value of `covariance_scaling` is tuned during the evolution of the MCMC chain. Let d represent the number of parameters e.g. as given by the `initial_state`. The ratio given by the `covariance_scaling` divided by d is used to multiply the running covariance. The covariance scaling factor multiplied by the covariance matrix is used in the proposal at each step. Default value: 2.38**2. covariance_scaling_reducer: Python floating point number, bounded over the range (0.5,1.0], representing the constant factor used during the adaptation of the `covariance_scaling`. Default value: 0.7. covariance_scaling_limiter: Python floating point number, bounded between 0.0 and 1.0, which places a limit on the maximum amount the `covariance_scaling` value can be purturbed at each interaction of the MCMC chain. Default value: 0.01. covariance_burnin: Python integer number of steps to take before starting to compute the running covariance. Default value: 100. target_accept_ratio: Python floating point number, bounded between 0.0 and 1.0, representing the target acceptance probability of the Metropolis–Hastings algorithm. Default value: 0.234. pu: Python floating point number, bounded between 0.0 and 1.0, representing the bounded convergence parameter. See `random_walk_mvnorm_fn()` for further details. Default value: 0.95. fixed_variance: Python floating point number representing the variance of the fixed proposal distribution. See `random_walk_mvnorm_fn` for further details. Default value: 0.01. extra_getter_fn: A callable with the signature `(kernel_results) -> extra` where `kernel_results` are the results of the `inner_kernel`, and `extra` is a nested collection of `Tensor`s. extra_setter_fn: A callable with the signature `(kernel_results, args) -> new_kernel_results` where `kernel_results` are the results of the `inner_kernel`, `args` are a nested collection of `Tensor`s with the same structure as returned by the `extra_getter_fn`, and `new_kernel_results` are a copy of `kernel_results` with `args` in the `extra` field set. log_accept_prob_getter_fn: A callable with the signature `(kernel_results) -> log_accept_prob` where `kernel_results` are the results of the `inner_kernel`, and `log_accept_prob` is either a a scalar, or has shape [num_chains]. seed: Python integer to seed the random number generator. Default value: `None`. name: Python `str` name prefixed to Ops created by this function. Default value: `None`. Returns: next_state: Tensor or list of `Tensor`s representing the state(s) of the Markov chain(s) at each result step. Has same shape as `current_state`. kernel_results: `collections.namedtuple` of internal calculations used to advance the chain. Raises: ValueError: if `initial_covariance_scaling` is less than or equal to 0.0. ValueError: if `covariance_scaling_reducer` is less than or equal to 0.5 or greater than 1.0. ValueError: if `covariance_scaling_limiter` is less than 0.0 or greater than 1.0. ValueError: if `covariance_burnin` is less than 0. ValueError: if `target_accept_ratio` is less than 0.0 or greater than 1.0. ValueError: if `pu` is less than 0.0 or greater than 1.0. ValueError: if `fixed_variance` is less than 0.0. """ with tf.name_scope( mcmc_util.make_name(name, "AdaptiveRandomWalkMetropolisHastings", "__init__")) as name: if initial_covariance_scaling <= 0.0: raise ValueError( "`{}` must be a `float` greater than 0.0".format( "initial_covariance_scaling")) if covariance_scaling_reducer <= 0.5 or covariance_scaling_reducer > 1.0: raise ValueError( "`{}` must be a `float` greater than 0.5 and less than or equal to 1.0." .format("covariance_scaling_reducer")) if covariance_scaling_limiter < 0.0 or covariance_scaling_limiter > 1.0: raise ValueError( "`{}` must be a `float` between 0.0 and 1.0.".format( "covariance_scaling_limiter")) if covariance_burnin < 0: raise ValueError( "`{}` must be a `integer` greater or equal to 0.".format( "covariance_burnin")) if target_accept_ratio <= 0.0 or target_accept_ratio > 1.0: raise ValueError( "`{}` must be a `float` between 0.0 and 1.0.".format( "target_accept_ratio")) if pu < 0.0 or pu > 1.0: raise ValueError( "`{}` must be a `float` between 0.0 and 1.0.".format("pu")) if fixed_variance < 0.0: raise ValueError( "`{}` must be a `float` greater than 0.0.".format( "fixed_variance")) if mcmc_util.is_list_like(initial_state): initial_state_parts = list(initial_state) else: initial_state_parts = [initial_state] initial_state_parts = [ tf.convert_to_tensor(s, name="initial_state") for s in initial_state_parts ] shape = tf.stack(initial_state_parts).shape dtype = dtype_util.base_dtype(tf.stack(initial_state_parts).dtype) if initial_covariance is None: initial_covariance = 0.001 * tf.eye( num_rows=shape[-1], dtype=dtype, batch_shape=[shape[0]]) else: initial_covariance = tf.stack(initial_covariance) if mcmc_util.is_list_like(initial_covariance): initial_covariance_parts = list(initial_covariance) else: initial_covariance_parts = [initial_covariance] initial_covariance_parts = [ tf.convert_to_tensor(s, name="initial_covariance") for s in initial_covariance_parts ] self._running_covar = stats.RunningCovariance(shape=(1, shape[-1]), dtype=dtype, event_ndims=1) self._accum_covar = self._running_covar.initialize() probs = tf.expand_dims(tf.ones([shape[0]], dtype=dtype) * pu, axis=1) self._u = Bernoulli(probs=probs, dtype=tf.dtypes.int32) self._initial_u = tf.zeros_like(self._u.sample(seed=seed), dtype=tf.dtypes.int32) name = mcmc_util.make_name(name, "AdaptiveRandomWalkMetropolisHastings", "") seed_stream = SeedStream(seed, salt="AdaptiveRandomWalkMetropolisHastings") self._parameters = dict( target_log_prob_fn=target_log_prob_fn, initial_state=initial_state, initial_covariance=initial_covariance, initial_covariance_scaling=initial_covariance_scaling, covariance_scaling_reducer=covariance_scaling_reducer, covariance_scaling_limiter=covariance_scaling_limiter, covariance_burnin=covariance_burnin, target_accept_ratio=target_accept_ratio, pu=pu, fixed_variance=fixed_variance, extra_getter_fn=extra_getter_fn, extra_setter_fn=extra_setter_fn, log_accept_prob_getter_fn=log_accept_prob_getter_fn, seed=seed, name=name, ) self._impl = metropolis_hastings.MetropolisHastings( inner_kernel=random_walk_metropolis.UncalibratedRandomWalk( target_log_prob_fn=target_log_prob_fn, new_state_fn=random_walk_mvnorm_fn( covariance=initial_covariance_parts, pu=pu, fixed_variance=fixed_variance, is_adaptive=self._initial_u, name=name, ), name=name, ), name=name, )
def _left_doubling_increments(batch_shape, max_doublings, step_size, seed=None, name=None): """Computes the doubling increments for the left end point. The doubling procedure expands an initial interval to find a superset of the true slice. At each doubling iteration, the interval width is doubled to either the left or the right hand side with equal probability. If, initially, the left end point is at `L(0)` and the width of the interval is `w(0)`, then the left end point and the width at the k-th iteration (denoted L(k) and w(k) respectively) are given by the following recursions: ```none w(k) = 2 * w(k-1) L(k) = L(k-1) - w(k-1) * X_k, X_k ~ Bernoulli(0.5) or, L(0) - L(k) = w(0) Sum(2^i * X(i+1), 0 <= i < k) ``` This function computes the sequence of `L(0)-L(k)` and `w(k)` for k between 0 and `max_doublings` independently for each chain. Args: batch_shape: Positive int32 `tf.Tensor`. The batch shape. max_doublings: Scalar positive int32 `tf.Tensor`. The maximum number of doublings to consider. step_size: A real `tf.Tensor` with shape compatible with [num_chains]. The size of the initial interval. seed: Tensor seed pair. The random seed. name: Python `str` name prefixed to Ops created by this function. Default value: `None` (i.e., 'find_slice_bounds'). Returns: left_increments: A tensor of shape (max_doublings+1, batch_shape). The relative position of the left end point after the doublings. widths: A tensor of shape (max_doublings+1, ones_like(batch_shape)). The widths of the intervals at each stage of the doubling. """ with tf.name_scope(name or 'left_doubling_increments'): step_size = tf.convert_to_tensor(value=step_size) dtype = dtype_util.base_dtype(step_size.dtype) # Output shape of the left increments tensor. output_shape = ps.concat(([max_doublings + 1], batch_shape), axis=0) # A sample realization of X_k. expand_left = Bernoulli(0.5, dtype=dtype).sample(sample_shape=output_shape, seed=seed) # The widths of the successive intervals. Starts with 1.0 and ends with # 2^max_doublings. width_multipliers = tf.cast(2**tf.range(0, max_doublings + 1), dtype=dtype) # Output shape of the `widths` tensor. widths_shape = ps.concat( ([max_doublings + 1], ps.ones_like(batch_shape)), axis=0) width_multipliers = tf.reshape(width_multipliers, shape=widths_shape) # Widths shape is [max_doublings + 1, 1, 1, 1...]. widths = width_multipliers * step_size # Take the cumulative sum of the left side increments in slice width to give # the resulting distance from the inital lower bound. left_increments = tf.cumsum(widths * expand_left, exclusive=True, axis=0) return left_increments, widths