def draw_samples(self, num_samples, X1, X0=None, Y0=None, base_rvs=None, means=None, cov=None, name=None, **kwargs): if (X0 is None or Y0 is None): raise NotImplementedError( "Sampling from the prior not yet supported") if (cov is None): means, cov = self.predict(X1, X0, Y0, full_cov=True, **kwargs) chol = self.compute_cholesky(X1, cov=cov) if (base_rvs is None): rvs_shape = [num_samples, tf.shape(chol)[-1]] base_rvs = tf.random_normal(rvs_shape, dtype=chol.dtype) # Produce MVN samples with shape [..., num_samples, mvn_dim] residuals = tf.tensordot(chol, base_rvs, [[-1], [-1]]) if (means is None): # If cov is provided but means are not, samples will be zero-mean samples = utils.swap_axes(residuals, -1, -2, name=name) else: samples = utils.swap_axes(means + residuals, -1, -2, name=name) return samples
def draw_samples(self, num_samples, means=None, cov=None, base_rvs=None, parallelism=None, chol=None, model=None, name=None, **kwargs): if (model is None): model = self.model with tf.name_scope('draw_samples') as scope: # Compute Cholesky factor LL^{T} = K if (chol is None): jitter = getattr(model, 'jitter', None) chol = utils.jitter_cholesky(cov, jitter=jitter) # Generate i.i.d. standard normal random variables if (base_rvs is None): if (parallelism is None): parallelism = chol.get_shape()[-1] rvs_shape = tf.TensorShape((num_samples, parallelism)) base_rvs = tf.random_normal(rvs_shape, dtype=chol.dtype) # Produce MVN samples residuals = tf.tensordot(chol, base_rvs, [[-1], [-1]]) if (means is None): samples = utils.swap_axes(residuals, -1, -2, name=name) else: samples = utils.swap_axes(means + residuals, -1, -2, name=name) return samples
def draw_samples(self, num_samples, means, cov, lower=None, beta=None, name=None, **kwargs): ''' Draw samples from the positive orthant of a multivariate normal distribution with rescaled covariance \tilde{\Sigma} := (0.5 * \pi * \beta) * \Sigma. ''' if (lower is None): lower = self.lower if (beta is None): beta = self.beta with tf.name_scope('draw_samples') as scope: # Sample and transform Gaussian residuals raw_residuals = super().draw_samples(num_samples, None, cov, **kwargs) residuals = self.transform_residuals(raw_residuals, beta, lower) # Re-center samples if (means is not None): samples = tf.add(residuals, utils.swap_axes(means, -1, -2), name=name) else: samples = tf.identity(residuals, name=name) return samples
def get_marginal_posterior(self, mean, var, xov, precis, resid): ''' Compute marginal Gaussian posteriors given prior terms. # [?] use einsum for better parallel computation ''' rank = utils.tensor_rank(xov) axes = [[max(0, rank - 2)], [0]] beta = tf.tensordot(xov, precis, axes) if rank > 1: xov = utils.swap_axes(xov, -1, -2) #matrix transpose return\ ( mean + tf.tensordot(beta, resid, [[-1], [0]]), var - tf.reduce_sum(beta*xov, axis=-1, keep_dims=True) )
def _monte_carlo(self, means, cov, samples=None, weights=None, beta=None, lower=None, num_fantasies=None, transform_samples=None, parallelism=None, incumbents=None, **kwargs): ''' Monte Carlo estimate to q-LCB/q-UCB. ''' if (lower is None): lower = self.lower if (num_fantasies is None): num_fantasies = self.num_fantasies if (transform_samples is None): transform_samples = self.transform_samples with tf.name_scope('monte_carlo') as scope: if (samples is None): samples = self.draw_samples(num_fantasies, means, cov, beta=beta, lower=lower, **kwargs) elif transform_samples: #transform samples drawn from N(means, cov) # [!] Improve me, assumed shapes: # `means` = [..., parallelism, 1] # `samples` = [..., num_samples, parallelism] mu = utils.swap_axes(means, axis1=-1, axis2=-2) residuals = self.transform_residuals(samples - mu, beta, lower) samples = mu + residuals # Get sample extrema extrema_fn = tf.reduce_min if lower else tf.reduce_max extrema = extrema_fn(samples, axis=-1) if (incumbents is not None): if lower: extrema = tf.minimum(extrema, incumbents) else: extrema = tf.reduce_max(extrema, incumbents) # (Weighted) sample average estimate = self.reduce_samples(extrema, weights, axis=-1) return estimate
def mapped_fn(*sharded_args, kwargs=kwargs): pools = sharded_args[0] if use_rand_features: theta_mu = tf.transpose( means = self.mapping(pools, **{**rf_kwargs, 'theta': theta_mu}) samplesT = self.mapping(pools, **rf_kwargs) samples = utils.swap_axes(samplesT, -1, -2) return fn(means, None, samples=samples, pools=pools, inputs_old=inputs_old, outputs_old=outputs_old, **kwargs) if (posteriors is not None): # Reuse precomputed posteriors means, cov, chol = sharded_args[-1] sharded_args = sharded_args[:-1] else: # Compute marginal/joint posterior individual pools means, cov = model.get_or_create_node\ ( group='predict', fn=model.predict, args=sharded_args + constant_args, kwargs=predict_kwargs, stateful=True, ) chol = utils.jitter_cholesky(cov, jitter=model.jitter) losses = fn(means, cov, pools=pools, inputs_old=inputs_old, outputs_old=outputs_old, chol=chol, **kwargs) return losses, (means, cov, chol)
def _predict_nd(self, X1, X0, Y0, full_cov=False, chol=None, precis=None, K_10=None, K_01=None, **kwargs): ''' Subroutine for computing broadcasted rank-n GP posteriors. [!] Bug: Hacks been added to handle the case where 'X0' and 'Y0' have been padded to rank 4. ''' with tf.name_scope('_predict_nd') as scope: if (K_10 is None): if (K_01 is None): K_10 = self.covar_fn(X1, X0, **kwargs) else: K_10 = K_10 = utils.swap_axes(K_01, -1, -2) # Solve for $\beta := K_10 (K_00 + nz*I)^{-1}$ if (precis is not None): #efficient but unstable... Beta = utils.broadcast_matmul(K_10, precis) else: if (chol is None): chol = self.compute_cholesky(X0, noisy=True, **kwargs) if (K_01 is None): K_01 = utils.swap_axes(K_10, -1, -2) # Solve for all systems of equations if (Y0.shape.ndims == 4): # Hack to deal with padding L = chol[0, 0] B = tf.transpose(K_01, [2, 0, 1, 3]) X = tf.cholesky_solve(L, tf.reshape(B, [tf.shape(B)[0], -1])) Beta = tf.transpose(tf.reshape(X, tf.shape(B)), [1, 2, 3, 0]) else: Beta = utils.swap_axes(utils.cholesky_solve(chol, K_01), -1, -2) resid = Y0 - self.mean_fn(X0, Y0) if (Y0.shape.ndims == 4): resid = self._auto_tile(resid, Beta, num_ranks=-2) Means = self.mean_fn(X1, Y0) + utils.broadcast_matmul(Beta, resid) if full_cov: if (K_01 is None): K_01 = utils.swap_axes(K_10, -1, -2) Covs = tf.subtract\ ( self.covar_fn(X1, X1, **kwargs), utils.broadcast_matmul(Beta, K_01) ) return Means, Covs else: Vars = tf.subtract\ ( self.covar_fn(X1, **kwargs), tf.reduce_sum(Beta*K_10, axis=-1, keepdims=True), ) return Means, Vars
def _monte_carlo(self, means_pool, cov_pool, xov_pd, means_disc, cov_disc, chol_pool=None, future_rvs=None, fantasy_rvs=None, parallelism=None, num_futures=None, num_fantasies=None, **kwargs): ''' Generic Monte Carlo subroutine for 1-step lookahead integrands. Computes updated discretization posteriors conditioned on each sample future. ''' if (num_futures is None): num_futures = self.num_futures if (num_fantasies is None): num_fantasies = self.num_fantasies with tf.name_scope('monte_carlo'.format( as scope: if (chol_pool is None): chol_pool = utils.jitter_cholesky(cov_pool) # Fantasize residuals (Y - \mu) at pool locations resid_pool = self.draw_samples\ ( num_futures, chol=chol_pool, base_rvs=future_rvs, parallelism=parallelism, ) # Sample pool outcomes to condition upon futures = resid_pool + utils.swap_axes(means_pool, -1, -2) # Solve for beta := K(disc, pool) K_nz(pool, pool)^{-1} beta = utils.swap_axes(tf.cholesky_solve(chol_pool, xov_pd), -1, -2) # Compute changes in discretization posterior d_cov = -tf.einsum('ijk,ikl->ijl', beta, xov_pd) ds_means = tf.einsum('ijk,ilk->ilj', beta, resid_pool) # Posteriors conditional on each sampled future means_future = means_disc + tf.expand_dims(ds_means, -1) covs_future = cov_disc[None, None] + d_cov[:, None] if self.run_unit_test: means_future, covs_future = self.test_conditional_posterior\ ( means_future, covs_future, futures ) # Monte Carlo integration subroutine estimate = self.integrand\ ( means_future, covs_future, num_samples=num_fantasies, base_rvs=fantasy_rvs, futures=futures, means_pool=means_pool, means_disc=means_disc, cov_disc=cov_disc, **kwargs ) return estimate