def testBatchSlicePreservesPrecomputedDivisor(self): batch_shape = [4, 3] base_kernel = tfpk.ExponentiatedQuadratic(amplitude=self.evaluate( tf.exp(samplers.normal(batch_shape, seed=test_util.test_seed())))) fixed_inputs = self.evaluate( samplers.normal(batch_shape + [1, 2], seed=test_util.test_seed())) schur = tfpk.SchurComplement(base_kernel, fixed_inputs) schur_with_divisor = tfpk.SchurComplement.with_precomputed_divisor( base_kernel, fixed_inputs) self.assertAllEqual(schur.batch_shape, batch_shape) self.assertAllEqual(schur_with_divisor.batch_shape, batch_shape) schur_sliced = schur[tf.newaxis, 0, ..., -2:] schur_with_divisor_sliced = schur_with_divisor[tf.newaxis, 0, ..., -2:] batch_shape_sliced = tf.ones(batch_shape)[tf.newaxis, 0, ..., -2:].shape self.assertAllEqual(schur_sliced.batch_shape, batch_shape_sliced) self.assertAllEqual(schur_with_divisor_sliced.batch_shape, batch_shape_sliced) self.assertAllEqual((schur_with_divisor_sliced. _precomputed_divisor_matrix_cholesky.shape[:-2]), batch_shape_sliced) x = np.ones([1, 2], np.float32) y = np.ones([3, 2], np.float32) self.assertAllClose(schur_with_divisor.matrix(x, y), schur.matrix(x, y))
def testDistributionBatchSlicing(self): batch_shape = [4, 3] seed0, seed1 = samplers.split_seed(test_util.test_seed(), 2) log_precision = tf.Variable(samplers.normal(batch_shape, seed=seed0)) mean_times_precision = tf.Variable( samplers.normal(batch_shape, seed=seed1)) def normal_from_natural(log_precision, mean_times_precision): variance = 1. / tf.exp(log_precision) mean = mean_times_precision * variance return tfd.Normal(loc=mean, scale=tf.sqrt(variance)) dist = tfp.experimental.util.DeferredModule(normal_from_natural, log_precision, mean_times_precision) self.assertLen(dist.trainable_variables, 2) sliced = dist[:2] self.assertEqual(sliced.batch_shape, [2, 3]) # We do *not* expect the slicing itself to be deferred: like log_prob, # sample, and other methods, slicing produces a concrete value (that happens # to be a Distribution instance). self.assertLen(sliced.trainable_variables, 0) sliced_sample = sliced.sample(5) self.assertEqual(sliced_sample.shape, [5, 2, 3])
def _build_test_model(self, num_timesteps=5, num_features=2, batch_shape=(), missing_prob=0, true_noise_scale=0.1, true_level_scale=0.04, true_slope_scale=0.02, prior_class=tfd.InverseGamma, dtype=tf.float32): seed = test_util.test_seed(sampler_type='stateless') (design_seed, weights_seed, noise_seed, level_seed, slope_seed, is_missing_seed) = samplers.split_seed(seed, 6, salt='_build_test_model') design_matrix = samplers.normal([num_timesteps, num_features], dtype=dtype, seed=design_seed) weights = samplers.normal(list(batch_shape) + [num_features], dtype=dtype, seed=weights_seed) regression = tf.linalg.matvec(design_matrix, weights) noise = samplers.normal(list(batch_shape) + [num_timesteps], dtype=dtype, seed=noise_seed) * true_noise_scale level_residuals = samplers.normal(list(batch_shape) + [num_timesteps], dtype=dtype, seed=level_seed) * true_level_scale if true_slope_scale is not None: slope = tf.cumsum( samplers.normal(list(batch_shape) + [num_timesteps], dtype=dtype, seed=slope_seed) * true_slope_scale, axis=-1) level_residuals += slope level = tf.cumsum(level_residuals, axis=-1) time_series = (regression + noise + level) is_missing = samplers.uniform(list(batch_shape) + [num_timesteps], dtype=dtype, seed=is_missing_seed) < missing_prob model = gibbs_sampler.build_model_for_gibbs_fitting( observed_time_series=tfp.sts.MaskedTimeSeries( time_series[..., tf.newaxis], is_missing), design_matrix=design_matrix, weights_prior=tfd.Normal(loc=tf.cast(0., dtype), scale=tf.cast(10.0, dtype)), level_variance_prior=prior_class(concentration=tf.cast( 0.01, dtype), scale=tf.cast(0.01 * 0.01, dtype)), slope_variance_prior=None if true_slope_scale is None else prior_class(concentration=tf.cast(0.01, dtype), scale=tf.cast(0.01 * 0.01, dtype)), observation_noise_variance_prior=prior_class( concentration=tf.cast(0.01, dtype), scale=tf.cast(0.01 * 0.01, dtype))) return model, time_series, is_missing
def _joint_sample_n(self, n, seed=None): """Draw a joint sample from the prior over latents and observations. This sampler is specific to LocalLevel models and is faster than the generic LinearGaussianStateSpaceModel implementation. Args: n: `int` `Tensor` number of samples to draw. seed: Optional `int` `Tensor` seed for the random number generator. Returns: latents: `float` `Tensor` of shape `concat([[n], self.batch_shape, [self.num_timesteps, self.latent_size]], axis=0)` representing samples of latent trajectories. observations: `float` `Tensor` of shape `concat([[n], self.batch_shape, [self.num_timesteps, self.observation_size]], axis=0)` representing samples of observed series generated from the sampled `latents`. """ with tf.name_scope('joint_sample_n'): (initial_level_seed, level_jumps_seed, prior_observation_seed) = samplers.split_seed( seed, n=3, salt='LocalLevelStateSpaceModel_joint_sample_n') if self.batch_shape.is_fully_defined(): batch_shape = self.batch_shape.as_list() else: batch_shape = self.batch_shape_tensor() sample_and_batch_shape = tf.cast( prefer_static.concat([[n], batch_shape], axis=0), tf.int32) # Sample the initial timestep from the prior. Since we want # this sample to have full batch shape (not just the batch shape # of the self.initial_state_prior object which might in general be # smaller), we augment the sample shape to include whatever # extra batch dimensions are required. initial_level = self.initial_state_prior.sample( linear_gaussian_ssm._augment_sample_shape( # pylint: disable=protected-access self.initial_state_prior, sample_and_batch_shape, self.validate_args), seed=initial_level_seed) # Sample the latent random walk and observed noise, more efficiently than # the generic loop in `LinearGaussianStateSpaceModel`. level_jumps = self.level_scale[..., tf.newaxis] * samplers.normal( prefer_static.concat( [sample_and_batch_shape, [self.num_timesteps - 1]], axis=0), dtype=self.dtype, seed=level_jumps_seed) prior_level_sample = tf.cumsum(tf.concat( [initial_level, level_jumps], axis=-1), axis=-1) prior_observation_sample = prior_level_sample + ( # Sample noise. self.observation_noise_scale[..., tf.newaxis] * samplers.normal(prefer_static.shape(prior_level_sample), dtype=self.dtype, seed=prior_observation_seed)) return (prior_level_sample[..., tf.newaxis], prior_observation_sample[..., tf.newaxis])
def test_sampled_weights_follow_correct_distribution(self): seed = test_util.test_seed(sampler_type='stateless') design_seed, true_weights_seed, sampled_weights_seed = samplers.split_seed( seed, 3, 'test_sampled_weights_follow_correct_distribution') num_timesteps = 10 num_features = 2 batch_shape = [3, 1] design_matrix = self.evaluate(samplers.normal( batch_shape + [num_timesteps, num_features], seed=design_seed)) true_weights = self.evaluate(samplers.normal( batch_shape + [num_features, 1], seed=true_weights_seed) * 10.0) targets = np.matmul(design_matrix, true_weights) is_missing = np.array([False, False, False, True, True, False, False, True, False, False]) prior_scale = tf.convert_to_tensor(5.) likelihood_scale = tf.convert_to_tensor(0.1) # Analytically compute the true posterior distribution on weights. valid_design_matrix = design_matrix[..., ~is_missing, :] valid_targets = targets[..., ~is_missing, :] num_valid_observations = tf.shape(valid_design_matrix)[-2] weights_posterior_mean, weights_posterior_cov, _ = linear_gaussian_update( prior_mean=tf.zeros([num_features, 1]), prior_cov=tf.eye(num_features) * prior_scale**2, observation_matrix=tfl.LinearOperatorFullMatrix(valid_design_matrix), observation_noise=tfd.MultivariateNormalDiag( loc=tf.zeros([num_valid_observations]), scale_diag=likelihood_scale * tf.ones([num_valid_observations])), x_observed=valid_targets) # Check that the empirical moments of sampled weights match the true values. sampled_weights = tf.vectorized_map( lambda seed: gibbs_sampler._resample_weights( # pylint: disable=g-long-lambda design_matrix=tf.where(is_missing[..., tf.newaxis], tf.zeros_like(design_matrix), design_matrix), target_residuals=targets[..., 0], observation_noise_scale=likelihood_scale, weights_prior_scale=tf.linalg.LinearOperatorScaledIdentity( num_features, prior_scale), seed=seed), tfp.random.split_seed(sampled_weights_seed, tf.constant(10000))) sampled_weights_mean = tf.reduce_mean(sampled_weights, axis=0) centered_weights = sampled_weights - weights_posterior_mean[..., 0] sampled_weights_cov = tf.reduce_mean(centered_weights[..., :, tf.newaxis] * centered_weights[..., tf.newaxis, :], axis=0) (sampled_weights_mean_, weights_posterior_mean_, sampled_weights_cov_, weights_posterior_cov_) = self.evaluate(( sampled_weights_mean, weights_posterior_mean[..., 0], sampled_weights_cov, weights_posterior_cov)) self.assertAllClose(sampled_weights_mean_, weights_posterior_mean_, atol=0.01, rtol=0.05) self.assertAllClose(sampled_weights_cov_, weights_posterior_cov_, atol=0.01, rtol=0.05)
def run(seed): init_seed, sample_seed = samplers.split_seed(seed) state_seeds = samplers.split_seed(init_seed) state = [ samplers.normal(seed=state_seeds[0], shape=[]), samplers.normal(seed=state_seeds[1], shape=[4]) ] kr = sharded_kernel.bootstrap_results(state) _, kr = sharded_kernel.one_step(state, kr, seed=sample_seed) return (kr.averaged_sq_grad, kr.averaged_max_trajectory_length)
def _gen_gaussian_updating_example(x_dim, y_dim, seed): """An implementation of section 2.3.3 from [1]. We initialize a joint distribution x ~ N(mu, Lambda^{-1}) y ~ N(Ax, L^{-1}) Then condition the model on an observation for y. We can test to confirm that Cov(p(x | y_obs)) is near to Sigma = (Lambda + A^T L A)^{-1} This test can actually check whether the posterior samples have the proper covariance, and whether the windowed tuning recovers 1 / diag(Sigma) as the diagonal scaling factor. References: [1] Bishop, Christopher M. Pattern Recognition and Machine Learning. Springer, 2006. Args: x_dim: int y_dim: int seed: For reproducibility Returns: (tfd.JointDistribution, tf.Tensor), representing the joint distribution above, and the posterior variance. """ seeds = samplers.split_seed(seed, 5) x_mean = samplers.normal((x_dim,), seed=seeds[0]) x_scale_diag = samplers.normal((x_dim,), seed=seeds[1]) y_scale_diag = samplers.normal((y_dim,), seed=seeds[2]) scale_mat = samplers.normal((y_dim, x_dim), seed=seeds[3]) y_shift = samplers.normal((y_dim,), seed=seeds[4]) @tfd.JointDistributionCoroutine def model(): x = yield Root(tfd.MultivariateNormalDiag( x_mean, scale_diag=x_scale_diag, name='x')) yield tfd.MultivariateNormalDiag( tf.linalg.matvec(scale_mat, x) + y_shift, scale_diag=y_scale_diag, name='y') dists, _ = model.sample_distributions() precision_x = tf.linalg.inv(dists.x.covariance()) precision_y = tf.linalg.inv(dists.y.covariance()) true_cov = tf.linalg.inv(precision_x + tf.linalg.matmul( tf.linalg.matmul(scale_mat, precision_y, transpose_a=True), scale_mat)) return model, tf.linalg.diag_part(true_cov)
def _sample_n(self, n, seed=None): raw = samplers.normal( shape=tf.concat([[n], self.batch_shape, self.event_shape], axis=0), seed=seed, dtype=self.dtype) direction = raw / tf.norm(raw, ord=2, axis=-1)[..., tf.newaxis] distance = samplers.normal( shape=tf.concat([[n], self.batch_shape, [1] * len(self.event_shape)], axis=0), seed=seed, dtype=self.dtype) return self._loc + self.scale * direction * distance
def test_chol_symmetric_increment_batch(self): matrix = samplers.normal([7, 5, 5], seed=test_util.test_seed()) psd_matrix = self.evaluate( tf.matmul(matrix, matrix, adjoint_b=True) + tf.eye(5)) chol = tf.linalg.cholesky(psd_matrix) increment = self.evaluate( 0.1 * samplers.normal([7, 5], seed=test_util.test_seed())) for idx in range(5): expected_result = _naive_symmetric_increment(psd_matrix, idx, increment) chol_result = spike_and_slab._symmetric_increment_chol( chol, idx, increment) result = tf.matmul(chol_result, chol_result, adjoint_b=True) self.assertAllClose(expected_result, result)
def testSliceCopyOverrideNameSliceAgainCopyOverrideLogitsSliceAgain(self): seed_stream = test_util.test_seed_stream('slice_bernoulli') logits = samplers.normal([20, 3, 2, 5], seed=seed_stream()) dist = tfd.Bernoulli(logits=logits, name='b1', validate_args=True) self.assertIn('b1', dist.name) dist = dist[:10].copy(name='b2') self.assertAllEqual((10, 3, 2, 5), dist.batch_shape) self.assertIn('b2', dist.name) dist = dist.copy(name='b3')[..., 1] self.assertAllEqual((10, 3, 2), dist.batch_shape) self.assertIn('b3', dist.name) dist = dist.copy(logits=samplers.normal([2], seed=seed_stream())) self.assertAllEqual((2, ), dist.batch_shape) self.assertIn('b3', dist.name)
def _random_regression_task(self, num_outputs, num_features, batch_shape=(), weights=None, observation_noise_scale=0.1, seed=None): design_seed, weights_seed, noise_seed = samplers.split_seed(seed, n=3) batch_shape = list(batch_shape) design_matrix = samplers.uniform(batch_shape + [num_outputs, num_features], seed=design_seed) if weights is None: weights = samplers.normal(batch_shape + [num_features], seed=weights_seed) targets = (tf.linalg.matvec(design_matrix, weights) + observation_noise_scale * samplers.normal( batch_shape + [num_outputs], seed=noise_seed)) return design_matrix, weights, targets
def _sample_n(self, n, seed): df = tf.convert_to_tensor(self.df) batch_shape = self._batch_shape_tensor(df) event_shape = self._event_shape_tensor() batch_ndims = tf.shape(batch_shape)[0] ndims = batch_ndims + 3 # sample_ndims=1, event_ndims=2 shape = tf.concat([[n], batch_shape, event_shape], 0) normal_seed, gamma_seed = samplers.split_seed(seed, salt='Wishart') # Complexity: O(nbk**2) x = samplers.normal(shape=shape, mean=0., stddev=1., dtype=self.dtype, seed=normal_seed) # Complexity: O(nbk) # This parameterization is equivalent to Chi2, i.e., # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2) expanded_df = df * tf.ones(self._scale.batch_shape_tensor(), dtype=dtype_util.base_dtype(df.dtype)) g = gamma_lib.random_gamma(shape=[n], concentration=self._multi_gamma_sequence( 0.5 * expanded_df, self._dimension()), rate=0.5, seed=gamma_seed) # Complexity: O(nbk**2) x = tf.linalg.band_part(x, -1, 0) # Tri-lower. # Complexity: O(nbk) x = tf.linalg.set_diag(x, tf.sqrt(g)) # Make batch-op ready. # Complexity: O(nbk**2) perm = tf.concat([tf.range(1, ndims), [0]], 0) x = tf.transpose(a=x, perm=perm) shape = tf.concat( [batch_shape, [event_shape[0]], [event_shape[1] * n]], 0) x = tf.reshape(x, shape) # Complexity: O(nbM) where M is the complexity of the operator solving a # vector system. For LinearOperatorLowerTriangular, each matmul is O(k^3) so # this step has complexity O(nbk^3). x = self._scale.matmul(x) # Undo make batch-op ready. # Complexity: O(nbk**2) shape = tf.concat([batch_shape, event_shape, [n]], 0) x = tf.reshape(x, shape) perm = tf.concat([[ndims - 1], tf.range(0, ndims - 1)], 0) x = tf.transpose(a=x, perm=perm) if not self.input_output_cholesky: # Complexity: O(nbk**3) x = tf.matmul(x, x, adjoint_b=True) return x
def generate_and_test_samples(seed): """Generate and test samples.""" v_seed, u_seed = samplers.split_seed(seed) x = samplers.normal(shape, dtype=internal_dtype, seed=v_seed) # This implicitly broadcasts concentration up to sample shape. v = 1 + c * x # In [1], there is an 'inner' rejection sampling loop which checks that # v > 0 and generates a new normal sample if it's not, saving the rest of # the computations below. We found that merging the check for v > 0 with # the `good_sample_mask` not only simplifies the code, but leads to a # ~2x speedup for small concentrations on GPU, at the cost of deviating # slightly from the implementation given in Ref. [1]. accept_v = v > 0. logv = tf.math.log1p(c * x) x2 = x * x v3 = v * v * v logv3 = logv * 3 u = samplers.uniform(shape, dtype=internal_dtype, seed=u_seed) # In [1], the suggestion is to first check u < 1 - 0.331 * x2 * x2, and to # run the check below only if it fails, in order to avoid the relatively # expensive logarithm calls. Our algorithm operates in batch mode: we will # have to compute or not compute the logarithms for the entire batch, and # as the batch gets larger, the odds we compute it grow. Therefore we # don't bother with the "cheap" check. good_sample_mask = tf.logical_and( tf.math.log(u) < (x2 / 2. + d * (1 - v3 + logv3)), accept_v) return logv3 if log_space else v3, good_sample_mask
def test_trainable_bijectors(self, cls, batch_and_event_shape): bijector = tfp.experimental.util.make_trainable( cls, batch_and_event_shape=batch_and_event_shape, seed=test_util.test_seed(), validate_args=True) if bijector.trainable_variables: self.evaluate( [v.initializer for v in bijector.trainable_variables]) # Verify expected number of trainable variables. self.assertLen( bijector.trainable_variables, len([ k for k, p in cls.parameter_properties().items() if p.is_tensor and p.is_preferred ])) # Verify gradients to all parameters. x = self.evaluate( samplers.normal(batch_and_event_shape, seed=test_util.test_seed())) with tf.GradientTape() as tape: y = bijector.forward(x) grad = tape.gradient(y, bijector.trainable_variables) self.assertAllNotNone(grad) # Verify that the round trip doesn't broadcast, i.e., that it preserves # batch_and_event_shape. self.assertAllCloseNested( x, bijector.inverse(tf.identity(y)), # Disable bijector cache. atol=1e-2)
def build_trainable_linear_operator_full_matrix(shape, scale_initializer=1e-2, dtype=None, seed=None, name=None): """Build a trainable `LinearOperatorFullMatrix` instance. Args: shape: Shape of the `LinearOperator`, equal to `[b0, ..., bn, h, w]`, where `b0...bn` are batch dimensions `h` and `w` are the height and width of the matrix represented by the `LinearOperator`. scale_initializer: Variables are initialized with samples from `Normal(0, scale_initializer)`. dtype: `tf.dtype` of the `LinearOperator`. seed: Python integer to seed the random number generator. name: str, name for `tf.name_scope`. Returns: operator: Trainable instance of `tf.linalg.LinearOperatorFullMatrix`. """ with tf.name_scope(name or 'build_trainable_linear_operator_full_matrix'): if dtype is None: dtype = dtype_util.common_dtype([scale_initializer], dtype_hint=tf.float32) scale_initializer = tf.convert_to_tensor(scale_initializer, dtype) initial_scale_matrix = samplers.normal(mean=0., stddev=scale_initializer, shape=shape, dtype=dtype, seed=seed) return tf.linalg.LinearOperatorFullMatrix( matrix=tf.Variable(initial_scale_matrix, name='full_matrix'))
def _sample_n(self, n, seed=None): loc = tf.convert_to_tensor(self.loc) scale = tf.convert_to_tensor(self.scale) tailweight = tf.convert_to_tensor(self.tailweight) skewness = tf.convert_to_tensor(self.skewness) ig_seed, normal_seed = samplers.split_seed( seed, salt='normal_inverse_gaussian') batch_shape = self._batch_shape_tensor(loc=loc, scale=scale, tailweight=tailweight, skewness=skewness) w = tailweight * tf.math.exp( 0.5 * tf.math.log1p(-tf.math.square(skewness / tailweight))) w = tf.broadcast_to(w, batch_shape) ig_samples = inverse_gaussian.InverseGaussian( scale / w, tf.math.square(scale)).sample(n, seed=ig_seed) sample_shape = ps.concat([[n], batch_shape], axis=0) normal_samples = samplers.normal( shape=ps.convert_to_shape_tensor(sample_shape), mean=0., stddev=1., dtype=self.dtype, seed=normal_seed) return (loc + tf.math.sqrt(ig_samples) * (skewness * tf.math.sqrt(ig_samples) + normal_samples))
def test_state_space_model(self): seed = test_util.test_seed_stream() model = self._build_sts() dummy_param_vals = [ p.prior.sample(seed=seed()) for p in model.parameters ] initial_state_prior = tfd.MultivariateNormalDiag( loc=-2. + tf.zeros([model.latent_size]), scale_diag=3. * tf.ones([model.latent_size])) # Verify we build the LGSSM without errors. ssm = model.make_state_space_model( num_timesteps=10, param_vals=dummy_param_vals, initial_state_prior=initial_state_prior, initial_step=1) # Verify that the child class passes the initial step and prior arguments # through to the SSM. self.assertEqual(self.evaluate(ssm.initial_step), 1) self.assertEqual(ssm.initial_state_prior, initial_state_prior) # Verify the model has the correct latent size. self.assertEqual( self.evaluate(tf.convert_to_tensor(ssm.latent_size_tensor())), model.latent_size) # Verify that the SSM tracks its parameters. observed_time_series = self.evaluate( samplers.normal([10, 1], seed=test_util.test_seed())) ssm_copy = ssm.copy(name='copied_ssm') self.assertAllClose( *self.evaluate((ssm.log_prob(observed_time_series), ssm_copy.log_prob(observed_time_series))))
def test_trainable_bijectors(self, cls, batch_and_event_shape): init_fn, apply_fn = tfe_util.make_trainable_stateless( cls, batch_and_event_shape=batch_and_event_shape, validate_args=True) # Verify expected number of trainable variables. raw_parameters = init_fn(seed=test_util.test_seed()) bijector = apply_fn(raw_parameters) self.assertLen( raw_parameters, len([ k for k, p in bijector.parameter_properties().items() if p.is_tensor and p.is_preferred ])) # Verify gradients to all parameters. x = self.evaluate( samplers.normal(batch_and_event_shape, seed=test_util.test_seed())) y, grad = tfp.math.value_and_gradient( lambda params: apply_fn(params).forward(x), [raw_parameters]) self.assertAllNotNone(grad) # Verify that the round trip doesn't broadcast, i.e., that it preserves # batch_and_event_shape. self.assertAllCloseNested( x, bijector.inverse(tf.identity(y)), # Disable bijector cache. atol=1e-2)
def _inner(seed): x = samplers.normal(sample_shape, dtype=internal_dtype, seed=seed) # This implicitly broadcasts alpha up to sample shape. v = 1 + c * x return (x, v), v > 0.
def _sample_n(self, n, seed=None): raw = samplers.normal(shape=tf.concat( [[n], self.batch_shape, [self.dimension]], axis=0), seed=seed, dtype=self.dtype) unit_norm = raw / tf.norm(raw, ord=2, axis=-1)[..., tf.newaxis] return unit_norm
def _choose_random_direction(current_state_parts, batch_rank, seed=None): """Chooses a random direction in the event space.""" seeds = samplers.split_seed(seed, n=len(current_state_parts)) # Chooses the random directions across each of the input components. rnd_direction_parts = [ samplers.normal(ps.shape(current_state_part), dtype=tf.float32, seed=part_seed) for (current_state_part, part_seed) in zip(current_state_parts, seeds) ] # Sum squares over all of the input components. Note this takes all # components into account. sum_squares = sum( tf.reduce_sum( # pylint: disable=g-complex-comprehension rnd_direction**2, axis=ps.range(batch_rank, ps.rank(rnd_direction)), keepdims=True) for rnd_direction in rnd_direction_parts) # Normalizes the random direction fragments. rnd_direction_parts = [ rnd_direction / tf.sqrt(sum_squares) for rnd_direction in rnd_direction_parts ] return rnd_direction_parts
def test_chandrupatla_automatically_selects_bounds(self): expected_roots = 1e6 * samplers.normal( [4, 3], seed=test_util.test_seed(sampler_type='stateless')) _, value_at_roots, _ = tfp.math.find_root_chandrupatla( objective_fn=lambda x: (x - expected_roots)**5, position_tolerance=1e-8) self.assertAllClose(value_at_roots, tf.zeros_like(value_at_roots))
def sample_n(n, df, loc, scale, batch_shape, dtype, seed): """Draw n samples from a Student T distribution. Note that `scale` can be negative or zero. The sampling method comes from the fact that if: X ~ Normal(0, 1) Z ~ Chi2(df) Y = X / sqrt(Z / df) then: Y ~ StudentT(df) Args: n: int, number of samples df: Floating-point `Tensor`. The degrees of freedom of the distribution(s). `df` must contain only positive values. loc: Floating-point `Tensor`; the location(s) of the distribution(s). scale: Floating-point `Tensor`; the scale(s) of the distribution(s). Must contain only positive values. batch_shape: Callable to compute batch shape dtype: Return dtype. seed: Optional seed for random draw. Returns: samples: a `Tensor` with prepended dimensions `n`. """ normal_seed, gamma_seed = samplers.split_seed(seed, salt='student_t') shape = ps.concat([[n], batch_shape], 0) normal_sample = samplers.normal(shape, dtype=dtype, seed=normal_seed) df = df * tf.ones(batch_shape, dtype=dtype) gamma_sample = gamma_lib.random_gamma( [n], concentration=0.5 * df, rate=0.5, seed=gamma_seed) samples = normal_sample * tf.math.rsqrt(gamma_sample / df) return samples * scale + loc
def _sample_distribution(shape, var, distribution, seed, dtype): """Samples from specified distribution (by appropriately scaling `var` arg.""" distribution = str(distribution).lower() if distribution == 'truncated_normal': # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) stddev = prefer_static.sqrt(var) / 0.87962566103423978 return tf.random.stateless_truncated_normal( shape, mean=0., stddev=stddev, dtype=dtype, seed=samplers.sanitize_seed(seed)) elif distribution == 'uniform': limit = prefer_static.sqrt(3. * var) return samplers.uniform(shape, minval=-limit, maxval=limit, dtype=dtype, seed=seed) elif distribution == 'untruncated_normal': stddev = prefer_static.sqrt(var) return samplers.normal(shape, mean=0., stddev=stddev, dtype=dtype, seed=seed) raise ValueError('Unrecognized distribution: "{}".'.format(distribution))
def test_sampled_latents_have_correct_marginals(self, use_slope): seed = test_util.test_seed(sampler_type='stateless') residuals_seed, is_missing_seed, level_seed = samplers.split_seed( seed, 3, 'test_sampled_level_has_correct_marginals') num_timesteps = 10 observed_residuals = samplers.normal( [3, 1, num_timesteps], seed=residuals_seed) is_missing = samplers.uniform( [3, 1, num_timesteps], seed=is_missing_seed) > 0.8 level_scale = 1.5 * tf.ones([3, 1]) observation_noise_scale = 0.2 * tf.ones([3, 1]) if use_slope: initial_state_prior = tfd.MultivariateNormalDiag(loc=[-30., 2.], scale_diag=[1., 0.2]) slope_scale = 0.5 * tf.ones([3, 1]) ssm = tfp.sts.LocalLinearTrendStateSpaceModel( num_timesteps=num_timesteps, initial_state_prior=initial_state_prior, observation_noise_scale=observation_noise_scale, level_scale=level_scale, slope_scale=slope_scale) else: initial_state_prior = tfd.MultivariateNormalDiag(loc=[-30.], scale_diag=[100.]) slope_scale = None ssm = tfp.sts.LocalLevelStateSpaceModel( num_timesteps=num_timesteps, initial_state_prior=initial_state_prior, observation_noise_scale=observation_noise_scale, level_scale=level_scale) posterior_means, posterior_covs = ssm.posterior_marginals( observed_residuals[..., tf.newaxis], mask=is_missing) latents_samples = gibbs_sampler._resample_latents( observed_residuals=observed_residuals, level_scale=level_scale, slope_scale=slope_scale, observation_noise_scale=observation_noise_scale, initial_state_prior=initial_state_prior, is_missing=is_missing, sample_shape=10000, seed=level_seed) (posterior_means_, posterior_covs_, latents_means_, latents_covs_) = self.evaluate(( posterior_means, posterior_covs, tf.reduce_mean(latents_samples, axis=0), tfp.stats.covariance(latents_samples, sample_axis=0, event_axis=-1))) self.assertAllClose(latents_means_, posterior_means_, atol=0.1) self.assertAllClose(latents_covs_, posterior_covs_, atol=0.1)
def _sample_n(self, n, seed=None): # Generate samples using: # mu + sigma* sgn(U-0.5)* sqrt(X^2 + Y^2 + Z^2) U~Unif; X,Y,Z ~N(0,1) normal_seed, rademacher_seed = samplers.split_seed( seed, salt='DoublesidedMaxwell') loc = tf.convert_to_tensor(self.loc) scale = tf.convert_to_tensor(self.scale) shape = prefer_static.pad(self._batch_shape_tensor(loc=loc, scale=scale), paddings=[[1, 0]], constant_values=n) # Generate one-sided Maxwell variables by using 3 Gaussian variates norm_rvs = samplers.normal(shape=prefer_static.pad(shape, paddings=[[0, 1]], constant_values=3), dtype=self.dtype, seed=normal_seed) maxwell_rvs = tf.norm(norm_rvs, axis=-1) # Generate random signs for the symmetric variates. random_sign = tfp_math.random_rademacher(shape, seed=rademacher_seed) sampled = random_sign * maxwell_rvs * scale + loc return sampled
def _tril_spherical_uniform(dimension, batch_shape, dtype, seed): """Returns a `Tensor` of samples of lower triangular matrices. Each row of the lower triangular part follows a spherical uniform distribution. Args: dimension: Scalar `int` `Tensor`, representing the dimensionality of the output matrices. batch_shape: Vector-shaped, `int` `Tensor` representing batch shape of output. The output will have shape `batch_shape + [dimension, dimension]`. dtype: TF `dtype` representing `dtype` of output. seed: PRNG seed; see `tfp.random.sanitize_seed` for details. Returns: tril_spherical_uniform: `Tensor` with specified `batch_shape` and `dtype` consisting of real values drawn row-wise from a spherical uniform distribution. """ # Essentially, we will draw lower triangular samples where each lower # triangular entry follows a normal distribution, then apply `x / norm(x)` # for each row of the samples. # To avoid possible NaNs, we will use spherical_uniform directly for # the first two rows. assert dimension > 0, '`dimension` needs to be positive.' num_seeds = min(dimension, 3) seeds = list(samplers.split_seed(seed, n=num_seeds, salt='sample_lkj')) rows = [] paddings_prepend = [[0, 0]] * len(batch_shape) for n in range(1, min(dimension, 2) + 1): rows.append( tf.pad(random_ops.spherical_uniform(shape=batch_shape, dimension=n, dtype=dtype, seed=seeds.pop()), paddings_prepend + [[0, dimension - n]], constant_values=0.)) samples = tf.stack(rows, axis=-2) if dimension > 2: normal_shape = ps.concat( [batch_shape, [dimension * (dimension + 1) // 2 - 3]], axis=0) normal_samples = samplers.normal(shape=normal_shape, dtype=dtype, seed=seeds.pop()) # We fill the first two rows of the triangular matrix with ones. # Note that fill_triangular fills elements in a clockwise spiral. normal_samples = tf.concat([ normal_samples[..., :dimension], tf.ones(ps.concat([batch_shape, [1]], axis=0), dtype=dtype), normal_samples[..., dimension:(2 * dimension - 1)], tf.ones(ps.concat([batch_shape, [2]], axis=0), dtype=dtype), normal_samples[..., (2 * dimension - 1):], ], axis=-1) normal_samples = linalg.fill_triangular(normal_samples, upper=False)[..., 2:, :] remaining_rows = normal_samples / tf.norm( normal_samples, ord=2, axis=-1, keepdims=True) samples = tf.concat([samples, remaining_rows], axis=-2) return samples
def _start_trajectory_batched(self, state, target_log_prob, seed): """Computations needed to start a trajectory.""" with tf.name_scope('start_trajectory_batched'): seeds = samplers.split_seed(seed, n=len(state) + 1) momentum_seeds = distribute_lib.fold_in_axis_index( seeds[:-1], self.experimental_shard_axis_names) momentum = [ samplers.normal( # pylint: disable=g-complex-comprehension shape=ps.shape(x), dtype=x.dtype, seed=momentum_seeds[i]) for (i, x) in enumerate(state) ] init_energy = compute_hamiltonian( target_log_prob, momentum, shard_axis_names=self.experimental_shard_axis_names) if MULTINOMIAL_SAMPLE: return momentum, init_energy, None # Draw a slice variable u ~ Uniform(0, p(initial state, initial # momentum)) and compute log u. For numerical stability, we perform this # in log space where log u = log (u' * p(...)) = log u' + log # p(...) and u' ~ Uniform(0, 1). log_slice_sample = tf.math.log1p(-samplers.uniform( shape=ps.shape(init_energy), dtype=init_energy.dtype, seed=seeds[len(state)])) return momentum, init_energy, log_slice_sample
def testScalarSlice(self): logits = self.evaluate(samplers.normal([], seed=test_util.test_seed())) dist = tfd.Bernoulli(logits=logits, validate_args=True) self.assertAllEqual([], dist.batch_shape) self.assertAllEqual([1], dist[tf.newaxis].batch_shape) self.assertAllEqual([], dist[...].batch_shape) self.assertAllEqual([1, 1], dist[tf.newaxis, ..., tf.newaxis].batch_shape)
def testCopyUnknownRank(self): logits = tf1.placeholder_with_default(samplers.normal( [20, 3, 1, 5], seed=test_util.test_seed()), shape=None) dist = tfd.Bernoulli(logits=logits, name='b1', validate_args=True) self.assertIn('b1', dist.name) dist = dist.copy(name='b2') self.assertIn('b2', dist.name)