def testInverseTemperaturesValueError(self): """Using invalid `inverse_temperatures`.""" if tf.executing_eagerly(): return dtype = np.float32 with self.assertRaisesRegexp(ValueError, 'not fully defined'): target = tfd.Normal(loc=dtype(0), scale=dtype(1)) def make_kernel_fn(target_log_prob_fn, seed): return tfp.mcmc.HamiltonianMonteCarlo( target_log_prob_fn=target_log_prob_fn, seed=seed, step_size=1.0, num_leapfrog_steps=3) tfp.mcmc.ReplicaExchangeMC(target_log_prob_fn=tf.function( target.log_prob, autograph=False), inverse_temperatures=10.**tf.linspace( 0., -2., tf.random.uniform([], maxval=10, dtype=tf.int32)), make_kernel_fn=make_kernel_fn, seed=_set_seed())
def test_log_prob(self, data): batch_shape = data.draw(tfp_hps.shapes()) bcast_arg, dist_batch_shp = data.draw( tfp_hps.broadcasting_shapes(batch_shape, 2)) underlying = tfd.Normal(loc=tf.reshape( tf.range(float(np.prod(tensorshape_util.as_list(dist_batch_shp)))), dist_batch_shp), scale=0.01) if not self.is_static_shape: bcast_arg = tf.Variable(bcast_arg) self.evaluate(bcast_arg.initializer) dist = tfd.BatchBroadcast(underlying, bcast_arg) sample_shape = data.draw( hps.one_of(hps.integers(0, 13), tfp_hps.shapes())) sample_batch_event = tf.concat([ np.int32(sample_shape).reshape([-1]), batch_shape, dist.event_shape_tensor() ], axis=0) obsv = tf.broadcast_to(dist.distribution.loc, sample_batch_event) self.assertAllTrue(dist.log_prob(obsv) > dist.log_prob(obsv + .5))
def testCompareToBijector(self): """Demonstrates equivalence between TD, Bijector approach and AR dist.""" sample_shape = np.int32([4, 5]) batch_shape = np.int32([]) event_size = np.int32(2) batch_event_shape = np.concatenate([batch_shape, [event_size]], axis=0) sample0 = tf.zeros(batch_event_shape) affine = tfb.Affine(scale_tril=self._random_scale_tril(event_size)) ar = tfd.Autoregressive( self._normal_fn(affine), sample0, validate_args=True) ar_flow = tfb.MaskedAutoregressiveFlow( is_constant_jacobian=True, shift_and_log_scale_fn=lambda x: [None, affine.forward(x)], validate_args=True) td = tfd.TransformedDistribution( distribution=tfd.Normal(loc=0., scale=1.), bijector=ar_flow, event_shape=[event_size], batch_shape=batch_shape, validate_args=True) x_shape = np.concatenate([sample_shape, batch_shape, [event_size]], axis=0) x = 2. * self._rng.random_sample(x_shape).astype(np.float32) - 1. td_log_prob_, ar_log_prob_ = self.evaluate([td.log_prob(x), ar.log_prob(x)]) self.assertAllClose(td_log_prob_, ar_log_prob_, atol=0., rtol=1e-6)
def test_finite_adaptation(self): # Test that the adaptation runs for the specified number of steps. # We set up a chain with a tiny initial step size, so every step accepts, # and test that the final step size is incremented exactly # `num_adaptation_steps` times. num_results = 10 num_adaptation_steps = 3 initial_step_size = 1e-5 _, kernel_results = tfp.mcmc.sample_chain( num_results=num_results, num_burnin_steps=0, current_state=tf.constant(0.), kernel=tfp.mcmc.SimpleStepSizeAdaptation( tfp.mcmc.HamiltonianMonteCarlo( target_log_prob_fn=lambda x: tfd.Normal(0., 1.).log_prob(x), num_leapfrog_steps=2, step_size=initial_step_size, state_gradients_are_stopped=True), num_adaptation_steps=num_adaptation_steps, adaptation_rate=1.), seed=test_util.test_seed()) [_, step_size_] = self.evaluate([ kernel_results, kernel_results.new_step_size]) # Test that we've incremented the step size every time. This verifies # that adaptation ran on each of the first `num_adaptation_steps` steps. self.assertNear(initial_step_size * 2**num_adaptation_steps, step_size_[num_adaptation_steps], err=1e-6) # Test that the step size does not change after the first # `num_adaptation_steps` steps. self.assertEqual(step_size_[num_adaptation_steps:].min(), step_size_[num_adaptation_steps:].max())
def _fn(dtype, shape, name, trainable, add_variable_fn): """Creates multivariate `Deterministic` or `Normal` distribution. Args: dtype: Type of parameter's event. shape: Python `list`-like representing the parameter's event shape. name: Python `str` name prepended to any created (or existing) `tf.Variable`s. trainable: Python `bool` indicating all created `tf.Variable`s should be added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. add_variable_fn: `tf.get_variable`-like `callable` used to create (or access existing) `tf.Variable`s. Returns: Multivariate `Deterministic` or `Normal` distribution. """ loc, scale = loc_scale_fn(dtype, shape, name, trainable, add_variable_fn) if scale is None: dist = tfd.Deterministic(loc=loc) else: dist = tfd.Normal(loc=loc, scale=scale) batch_ndims = tf.size(input=dist.batch_shape_tensor()) return tfd.Independent(dist, reinterpreted_batch_ndims=batch_ndims)
def testLangevin1DNormal(self): """Sampling from the Standard Normal Distribution.""" dtype = np.float32 nchains = 32 target = tfd.Normal(loc=dtype(0), scale=dtype(1)) samples, _ = tfp.mcmc.sample_chain( num_results=500, current_state=np.ones([nchains], dtype=dtype), kernel=tfp.mcmc.MetropolisAdjustedLangevinAlgorithm( target_log_prob_fn=target.log_prob, step_size=0.75, volatility_fn=lambda *args: .5, seed=test_util.test_seed()), num_burnin_steps=200, parallel_iterations=1) # For determinism. sample_mean = tf.reduce_mean(samples, axis=(0, 1)) sample_std = tf.math.reduce_std(samples, axis=(0, 1)) sample_mean_, sample_std_ = self.evaluate([sample_mean, sample_std]) self.assertAllClose(sample_mean_, 0., atol=0.12) self.assertAllClose(sample_std_, 1., atol=0.1)
def testReprWorksCorrectlyScalar(self): normal = tfd.Normal(loc=np.float16(0), scale=np.float16(1), validate_args=True) self.assertEqual( repr(normal), '<tfp.distributions.Normal' ' \'Normal\'' ' batch_shape=[]' ' event_shape=[]' ' dtype=float16>') chi2 = tfd.Chi2(df=np.float32([1., 2.]), name='silly', validate_args=True) self.assertEqual( repr(chi2), '<tfp.distributions.Chi2' ' \'silly\'' # What a silly name that is! ' batch_shape=[2]' ' event_shape=[]' ' dtype=float32>') # There's no notion of partially known shapes in eager mode, so exit # early. if tf.executing_eagerly(): return exp = tfd.Exponential(rate=tf1.placeholder_with_default(1., shape=None), validate_args=True) self.assertEqual( repr(exp), '<tfp.distributions.Exponential' ' \'Exponential\'' ' batch_shape=?' ' event_shape=[]' ' dtype=float32>')
def _build_trainable_posterior(param, initial_loc_fn): """Built a transformed-normal variational dist over a parameter's support.""" loc = tf1.get_variable(param.name + '_loc', initializer=lambda: initial_loc_fn(param), dtype=param.prior.dtype, use_resource=True) scale = tf.math.softplus( tf1.get_variable( param.name + '_scale', initializer=lambda: -4 * tf.ones_like(initial_loc_fn(param)), dtype=param.prior.dtype, use_resource=True)) q = tfd.Normal(loc=loc, scale=scale) # Ensure the `event_shape` of the variational distribution matches the # parameter. if (param.prior.event_shape.ndims is None or param.prior.event_shape.ndims > 0): q = tfd.Independent( q, reinterpreted_batch_ndims=param.prior.event_shape.ndims) # Transform to constrained parameter space. return tfd.TransformedDistribution(q, param.bijector)
def _fn(kernel_size, bias_size, dtype=None): smallconst = np.log(np.expm1(1.)) n_weights_block = kernel_size//C n_bias_block = bias_size//C n_weight_mean_params = n_weights_block n_weight_cov_params = tfp.layers.MultivariateNormalTriL.params_size(n_weights_block) - n_weights_block n_params_total = C*(n_weight_mean_params + n_weight_cov_params + 2*n_bias_block) #print("{} params in total".format(n_params_total)) block_param_indices = tf.split(np.arange(n_params_total), C) split_array = [n_weight_mean_params, n_weight_cov_params, n_bias_block, n_bias_block] split_param_idxs = [tf.split(x, split_array, axis=0) for x in block_param_indices] model = tf.keras.Sequential([ tfpl.VariableLayer(n_params_total, dtype=dtype), tfpl.DistributionLambda(lambda t: tfd.Blockwise( [ tfd.MultivariateNormalTriL( loc=tf.gather(t,split_param_idxs[c][0], axis=-1), scale_tril=tfp.math.fill_triangular( 1e-5 + tf.nn.softplus(smallconst + tf.gather(t,split_param_idxs[c][1], axis=-1))) ) for c in range(C) ] + [ tfd.Independent( tfd.Normal(loc=tf.gather(t,split_param_idxs[c][2], axis=-1), scale=1e-5 + tf.nn.softplus(smallconst + tf.gather(t,split_param_idxs[c][3], axis=-1))), reinterpreted_batch_ndims=1) for c in range(C) ] ) ) ]) return model
def __init__(self, num_seasons, num_steps_per_season=1, drift_scale_prior=None, initial_effect_prior=None, observed_time_series=None, name=None): """Specify a seasonal effects model. Args: num_seasons: Scalar Python `int` number of seasons. num_steps_per_season: Python `int` number of steps in each season. This may be either a scalar (shape `[]`), in which case all seasons have the same length, or a NumPy array of shape `[num_seasons]`. Default value: 1. drift_scale_prior: optional `tfd.Distribution` instance specifying a prior on the `drift_scale` parameter. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. initial_effect_prior: optional `tfd.Distribution` instance specifying a normal prior on the initial effect of each season. This may be either a scalar `tfd.Normal` prior, in which case it applies independently to every season, or it may be multivariate normal (e.g., `tfd.MultivariateNormalDiag`) with event shape `[num_seasons]`, in which case it specifies a joint prior across all seasons. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. observed_time_series: optional `float` `Tensor` of shape `batch_shape + [T, 1]` (omitting the trailing unit dimension is also supported when `T > 1`), specifying an observed time series. Any priors not explicitly set will be given default values according to the scale of the observed time series (or batch of time series). Default value: `None`. name: the name of this model component. Default value: 'Seasonal'. """ with tf.compat.v1.name_scope(name, 'Seasonal', values=[observed_time_series]) as name: observed_stddev, observed_initial = ( sts_util.empirical_statistics(observed_time_series) if observed_time_series is not None else (1., 0.)) # Heuristic default priors. Overriding these may dramatically # change inference performance and results. if drift_scale_prior is None: drift_scale_prior = tfd.LogNormal(loc=tf.math.log( .01 * observed_stddev), scale=3.) if initial_effect_prior is None: initial_effect_prior = tfd.Normal( loc=observed_initial, scale=tf.abs(observed_initial) + observed_stddev) self._num_seasons = num_seasons self._num_steps_per_season = num_steps_per_season tf.debugging.assert_same_float_dtype( [drift_scale_prior, initial_effect_prior]) if isinstance(initial_effect_prior, tfd.Normal): self._initial_state_prior = tfd.MultivariateNormalDiag( loc=tf.stack([initial_effect_prior.mean()] * num_seasons, axis=-1), scale_diag=tf.stack([initial_effect_prior.stddev()] * num_seasons, axis=-1)) else: self._initial_state_prior = initial_effect_prior super(Seasonal, self).__init__(parameters=[ Parameter('drift_scale', drift_scale_prior, tfb.Softplus()), ], latent_size=num_seasons, name=name)
def test_norequired_args_maker(self): """Test that only non-default args are passed through.""" with self.assertRaisesWithPredicateMatch( ValueError, 'Must pass probs or logits, but not both.'): tfd.JointDistributionNamed( dict(logits=tfd.Normal(0., 1.), x=tfd.Bernoulli))
def __init__(self, level_scale_prior=None, slope_mean_prior=None, slope_scale_prior=None, autoregressive_coef_prior=None, initial_level_prior=None, initial_slope_prior=None, observed_time_series=None, constrain_ar_coef_stationary=True, constrain_ar_coef_positive=False, name=None): """Specify a semi-local linear trend model. Args: level_scale_prior: optional `tfd.Distribution` instance specifying a prior on the `level_scale` parameter. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. slope_mean_prior: optional `tfd.Distribution` instance specifying a prior on the `slope_mean` parameter. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. slope_scale_prior: optional `tfd.Distribution` instance specifying a prior on the `slope_scale` parameter. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. autoregressive_coef_prior: optional `tfd.Distribution` instance specifying a prior on the `autoregressive_coef` parameter. If `None`, the default prior is a standard `Normal(0., 1.)`. Note that the prior may be implicitly truncated by `constrain_ar_coef_stationary` and/or `constrain_ar_coef_positive`. Default value: `None`. initial_level_prior: optional `tfd.Distribution` instance specifying a prior on the initial level. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. initial_slope_prior: optional `tfd.Distribution` instance specifying a prior on the initial slope. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. observed_time_series: optional `float` `Tensor` of shape `batch_shape + [T, 1]` (omitting the trailing unit dimension is also supported when `T > 1`), specifying an observed time series. Any priors not explicitly set will be given default values according to the scale of the observed time series (or batch of time series). May optionally be an instance of `tfp.sts.MaskedTimeSeries`, which includes a mask `Tensor` to specify timesteps with missing observations. Default value: `None`. constrain_ar_coef_stationary: if `True`, perform inference using a parameterization that restricts `autoregressive_coef` to the interval `(-1, 1)`, or `(0, 1)` if `force_positive_ar_coef` is also `True`, corresponding to stationary processes. This will implicitly truncates the support of `autoregressive_coef_prior`. Default value: `True`. constrain_ar_coef_positive: if `True`, perform inference using a parameterization that restricts `autoregressive_coef` to be positive, or in `(0, 1)` if `constrain_ar_coef_stationary` is also `True`. This will implicitly truncate the support of `autoregressive_coef_prior`. Default value: `False`. name: the name of this model component. Default value: 'SemiLocalLinearTrend'. """ with tf.name_scope(name or 'SemiLocalLinearTrend') as name: if observed_time_series is not None: _, observed_stddev, observed_initial = sts_util.empirical_statistics( observed_time_series) else: observed_stddev, observed_initial = 1., 0. # Heuristic default priors. Overriding these may dramatically # change inference performance and results. if level_scale_prior is None: level_scale_prior = tfd.LogNormal(loc=tf.math.log( .01 * observed_stddev), scale=2.) if slope_mean_prior is None: slope_mean_prior = tfd.Normal(loc=0., scale=observed_stddev) if slope_scale_prior is None: slope_scale_prior = tfd.LogNormal(loc=tf.math.log( .01 * observed_stddev), scale=2.) if autoregressive_coef_prior is None: autoregressive_coef_prior = tfd.Normal( loc=0., scale=tf.ones_like(observed_initial)) if initial_level_prior is None: initial_level_prior = tfd.Normal( loc=observed_initial, scale=tf.abs(observed_initial) + observed_stddev) if initial_slope_prior is None: initial_slope_prior = tfd.Normal(loc=0., scale=observed_stddev) self._initial_state_prior = tfd.MultivariateNormalDiag( loc=tf.stack( [initial_level_prior.mean(), initial_slope_prior.mean()], axis=-1), scale_diag=tf.stack([ initial_level_prior.stddev(), initial_slope_prior.stddev() ], axis=-1)) # Constrain the support of the autoregressive coefficient. if constrain_ar_coef_stationary and constrain_ar_coef_positive: autoregressive_coef_bijector = tfb.Sigmoid( ) # support in (0, 1) elif constrain_ar_coef_positive: autoregressive_coef_bijector = tfb.Softplus( ) # support in (0, infty) elif constrain_ar_coef_stationary: autoregressive_coef_bijector = tfb.Tanh() # support in (-1, 1) else: autoregressive_coef_bijector = tfb.Identity() # unconstrained stddev_preconditioner = tfb.Scale(scale=observed_stddev) scaled_softplus = tfb.Chain( [stddev_preconditioner, tfb.Softplus()]) super(SemiLocalLinearTrend, self).__init__(parameters=[ Parameter('level_scale', level_scale_prior, scaled_softplus), Parameter('slope_mean', slope_mean_prior, stddev_preconditioner), Parameter('slope_scale', slope_scale_prior, scaled_softplus), Parameter('autoregressive_coef', autoregressive_coef_prior, autoregressive_coef_bijector), ], latent_size=2, name=name)
def fn(sample): return tfd.Independent( tfd.Normal(loc=loc + 0. * sample, scale=1.), reinterpreted_batch_ndims=tf.convert_to_tensor(event_ndims))
def proposal_log_prob(x): counter['proposal_calls'] += 1 event_dims = tf.range(independent_chain_ndims, tf.rank(x)) return tf.reduce_sum(tfd.Normal(loc=0., scale=1.).log_prob(x), axis=event_dims)
def target_log_prob(a, b): return ( tfd.Normal(0., 1.).log_prob(a) + distribute_lib.psum(tfd.Normal( distribute_lib.pbroadcast(a, 'foo'), 1.).log_prob(b), 'foo'))
def testMatrixEvent(self): batch_shape = [2] event_shape = [2, 3, 3] batch_shape_var = tf.Variable(np.int32(batch_shape), shape=tf.TensorShape(None), name='dynamic_batch_shape') event_shape_var = tf.Variable(np.int32(event_shape), shape=tf.TensorShape(None), name='dynamic_event_shape') scale = 2. loc = 0. fake_mvn_dynamic = self._cls()(distribution=tfd.Normal(loc=loc, scale=scale), bijector=DummyMatrixTransform(), batch_shape=batch_shape_var, event_shape=event_shape_var, validate_args=True) fake_mvn_static = self._cls()(distribution=tfd.Normal(loc=loc, scale=scale), bijector=DummyMatrixTransform(), batch_shape=batch_shape, event_shape=event_shape, validate_args=True) def actual_mvn_log_prob(x): # This distribution is the normal PDF, reduced over the # last 3 dimensions + a jacobian term which corresponds # to the determinant of x. return ( np.sum(stats.norm(loc, scale).logpdf(x), axis=(-1, -2, -3)) + np.sum(np.linalg.det(x), axis=-1)) self.assertAllEqual([2, 3, 3], fake_mvn_static.event_shape) self.assertAllEqual([2], fake_mvn_static.batch_shape) if not tf.executing_eagerly(): self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.event_shape) self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.batch_shape) num_samples = 5e3 self.evaluate( [event_shape_var.initializer, batch_shape_var.initializer]) for fake_mvn in [fake_mvn_static, fake_mvn_dynamic]: # Ensure sample works by checking first, second moments. y = fake_mvn.sample(int(num_samples), seed=test_util.test_seed()) x = y[0:5, ...] [ x_, fake_event_shape_, fake_batch_shape_, fake_log_prob_, fake_prob_, ] = self.evaluate([ x, fake_mvn.event_shape_tensor(), fake_mvn.batch_shape_tensor(), fake_mvn.log_prob(x), fake_mvn.prob(x), ]) # Ensure all other functions work as intended. self.assertAllEqual([5, 2, 2, 3, 3], x_.shape) self.assertAllEqual([2, 3, 3], fake_event_shape_) self.assertAllEqual([2], fake_batch_shape_) self.assertAllClose(actual_mvn_log_prob(x_), fake_log_prob_, atol=0., rtol=1e-6) # With this many dimensions and samples, the direct space probability # may underflow. self.assertAllClose(np.exp(actual_mvn_log_prob(x_)), fake_prob_, atol=1e-12, rtol=1e-5)
def test_nested_transform(self): target_dist = tfd.Normal(loc=0., scale=1.) b1 = tfb.Scale(0.5) b2 = tfb.Exp() chain = tfb.Chain([b2, b1]) # applies bijectors right to left (b1 then b2). inner_kernel = tfp.mcmc.TransformedTransitionKernel( inner_kernel=tfp.mcmc.HamiltonianMonteCarlo( target_log_prob_fn=target_dist.log_prob, num_leapfrog_steps=27, step_size=10), bijector=b1) outer_kernel = tfp.mcmc.TransformedTransitionKernel( inner_kernel=inner_kernel, bijector=b2) chain_kernel = tfp.mcmc.TransformedTransitionKernel( inner_kernel=tfp.mcmc.HamiltonianMonteCarlo( target_log_prob_fn=target_dist.log_prob, num_leapfrog_steps=27, step_size=10), bijector=chain) outer_pkr_one, outer_pkr_two = self.evaluate([ outer_kernel.bootstrap_results(2.), outer_kernel.bootstrap_results(9.), ]) # the outermost kernel only applies the outermost bijector self.assertNear(np.log(2.), outer_pkr_one.transformed_state, err=1e-6) self.assertNear(np.log(9.), outer_pkr_two.transformed_state, err=1e-6) chain_pkr_one, chain_pkr_two = self.evaluate([ chain_kernel.bootstrap_results(2.), chain_kernel.bootstrap_results(9.), ]) # all bijectors are applied to the inner kernel, from innermost to outermost # this behavior is completely analogous to a bijector Chain self.assertNear(chain_pkr_one.transformed_state, outer_pkr_one.inner_results.transformed_state, err=1e-6) self.assertEqual(chain_pkr_one.inner_results.accepted_results, outer_pkr_one.inner_results.inner_results.accepted_results) self.assertNear(chain_pkr_two.transformed_state, outer_pkr_two.inner_results.transformed_state, err=1e-6) self.assertEqual(chain_pkr_two.inner_results.accepted_results, outer_pkr_two.inner_results.inner_results.accepted_results) seed = test_util.test_seed(sampler_type='stateless') outer_results_one, outer_results_two = self.evaluate([ outer_kernel.one_step(2., outer_pkr_one, seed=seed), outer_kernel.one_step(9., outer_pkr_two, seed=seed) ]) chain_results_one, chain_results_two = self.evaluate([ chain_kernel.one_step(2., chain_pkr_one, seed=seed), chain_kernel.one_step(9., chain_pkr_two, seed=seed) ]) self.assertNear(chain_results_one[0], outer_results_one[0], err=1e-6) self.assertNear(chain_results_two[0], outer_results_two[0], err=1e-6)
def testGradientsSecondOrder(self): f = lambda x: 2. * x**2. df = lambda x: tfp.math.value_and_gradient(f, x)[1] x = ed.RandomVariable(tfd.Normal(0., 1.)) _, d2ydx2 = tfp.math.value_and_gradient(df, x) self.assertEqual(self.evaluate(d2ydx2), 4.)
def mk_normal(): return tfd.Normal(loc=1., scale=2.)
def testUnivariateNormalTargetConservation(self): normal_dist = tfd.Normal(loc=1., scale=2.) self.evaluate(assert_univariate_target_conservation( self, normal_dist, step_size=0.2))
def __init__(self, design_matrix, weights_prior_scale=0.1, weights_batch_shape=None, name=None): """Specify a sparse linear regression model. Args: design_matrix: float `Tensor` of shape `concat([batch_shape, [num_timesteps, num_features]])`. This may also optionally be an instance of `tf.linalg.LinearOperator`. weights_prior_scale: float `Tensor` defining the scale of the Horseshoe prior on regression weights. Small values encourage the weights to be sparse. The shape must broadcast with `weights_batch_shape`. Default value: `0.1`. weights_batch_shape: if `None`, defaults to `design_matrix.batch_shape_tensor()`. Must broadcast with the batch shape of `design_matrix`. Default value: `None`. name: the name of this model component. Default value: 'SparseLinearRegression'. """ with tf.compat.v1.name_scope( name, 'SparseLinearRegression', values=[design_matrix, weights_prior_scale]) as name: if not isinstance(design_matrix, tfl.LinearOperator): design_matrix = tfl.LinearOperatorFullMatrix( tf.convert_to_tensor(value=design_matrix, name='design_matrix'), name='design_matrix_linop') if tf.compat.dimension_value(design_matrix.shape[-1]) is not None: num_features = design_matrix.shape[-1] else: num_features = design_matrix.shape_tensor()[-1] if weights_batch_shape is None: weights_batch_shape = design_matrix.batch_shape_tensor() else: weights_batch_shape = tf.convert_to_tensor(value=weights_batch_shape, dtype=tf.int32) weights_shape = tf.concat([weights_batch_shape, [num_features]], axis=0) dtype = design_matrix.dtype self._design_matrix = design_matrix self._weights_prior_scale = weights_prior_scale ones_like_weights_batch = tf.ones(weights_batch_shape, dtype=dtype) ones_like_weights = tf.ones(weights_shape, dtype=dtype) super(SparseLinearRegression, self).__init__( parameters=[ Parameter('global_scale_variance', prior=tfd.InverseGamma( 0.5 * ones_like_weights_batch, 0.5 * ones_like_weights_batch), bijector=tfb.Softplus()), Parameter('global_scale_noncentered', prior=tfd.HalfNormal( scale=ones_like_weights_batch), bijector=tfb.Softplus()), Parameter('local_scale_variances', prior=tfd.Independent(tfd.InverseGamma( 0.5 * ones_like_weights, 0.5 * ones_like_weights), reinterpreted_batch_ndims=1), bijector=tfb.Softplus()), Parameter('local_scales_noncentered', prior=tfd.Independent(tfd.HalfNormal( scale=ones_like_weights), reinterpreted_batch_ndims=1), bijector=tfb.Softplus()), Parameter('weights_noncentered', prior=tfd.Independent(tfd.Normal( loc=tf.zeros_like(ones_like_weights), scale=ones_like_weights), reinterpreted_batch_ndims=1), bijector=tfb.Identity()) ], latent_size=0, name=name)
def log_prob0(x): return tf.squeeze(tfd.Independent( tfd.Normal(tf.range(6, dtype=tf.float32), tf.constant(1.)), reinterpreted_batch_ndims=1).log_prob(x))
def fn(sample=0.): return tfd.Normal(loc=tf.zeros_like(sample), scale=1.)
def testSampleEndtoEnd(self): """An end-to-end test of sampling using NUTS.""" strm = tfp_test_util.test_seed_stream() predictors = tf.cast([ 201., 244., 47., 287., 203., 58., 210., 202., 198., 158., 165., 201., 157., 131., 166., 160., 186., 125., 218., 146. ], tf.float32) obs = tf.cast([ 592., 401., 583., 402., 495., 173., 479., 504., 510., 416., 393., 442., 317., 311., 400., 337., 423., 334., 533., 344. ], tf.float32) y_sigma = tf.cast([ 61., 25., 38., 15., 21., 15., 27., 14., 30., 16., 14., 25., 52., 16., 34., 31., 42., 26., 16., 22. ], tf.float32) # Robust linear regression model robust_lm = tfd.JointDistributionSequential( [ tfd.Normal(loc=0., scale=1.), # b0 tfd.Normal(loc=0., scale=1.), # b1 tfd.HalfNormal(5.), # df lambda df, b1, b0: tfd.Independent( # pylint: disable=g-long-lambda tfd.StudentT( # Likelihood df=df[:, None], loc=b0[:, None] + b1[:, None] * predictors[None, :], scale=y_sigma[None, :])), ], validate_args=True) log_prob = lambda b0, b1, df: robust_lm.log_prob([b0, b1, df, obs]) init_step_size = [1., .2, .5] step_size0 = [tf.cast(x, dtype=tf.float32) for x in init_step_size] number_of_steps, burnin, nchain = 200, 50, 10 @tf.function(autograph=False) def run_chain_and_get_diagnostic(): # random initialization of the starting postion of each chain b0, b1, df, _ = robust_lm.sample(nchain, seed=strm()) # bijector to map contrained parameters to real unconstraining_bijectors = [ tfb.Identity(), tfb.Identity(), tfb.Exp(), ] def trace_fn(_, pkr): return (pkr.inner_results.inner_results.step_size, pkr.inner_results.inner_results.log_accept_ratio) kernel = tfp.mcmc.DualAveragingStepSizeAdaptation( tfp.mcmc.TransformedTransitionKernel( inner_kernel=tfp.mcmc.NoUTurnSampler( target_log_prob_fn=log_prob, step_size=step_size0, parallel_iterations=1, seed=strm()), bijector=unconstraining_bijectors), target_accept_prob=.8, num_adaptation_steps=burnin, step_size_setter_fn=lambda pkr, new_step_size: pkr._replace( # pylint: disable=g-long-lambda inner_results=pkr.inner_results._replace(step_size=new_step_size) ), step_size_getter_fn=lambda pkr: pkr.inner_results.step_size, log_accept_prob_getter_fn=lambda pkr: pkr.inner_results. log_accept_ratio, ) # Sampling from the chain and get diagnostics mcmc_trace, (step_size, log_accept_ratio) = tfp.mcmc.sample_chain( num_results=number_of_steps, num_burnin_steps=burnin, current_state=[b0, b1, df], kernel=kernel, trace_fn=trace_fn, parallel_iterations=1) rhat = tfp.mcmc.potential_scale_reduction(mcmc_trace) return ( [s[-1] for s in step_size], # final step size tf.math.exp(tfp.math.reduce_logmeanexp(log_accept_ratio)), [tf.reduce_mean(rhat_) for rhat_ in rhat], # average rhat ) # Sample from posterior distribution and get diagnostic [ final_step_size, average_accept_ratio, average_rhat ] = self.evaluate(run_chain_and_get_diagnostic()) # Check that step size adaptation reduced the initial step size self.assertAllLess( np.asarray(final_step_size) - np.asarray(init_step_size), 0.) # Check that average acceptance ratio is close to target self.assertAllClose( average_accept_ratio, .8 * np.ones_like(average_accept_ratio), atol=0.1, rtol=0.1) # Check that mcmc sample quality is acceptable with tuning self.assertAllClose( average_rhat, np.ones_like(average_rhat), atol=0.05, rtol=0.05)
def test_ordereddict_sample_log_prob(self): build_ordereddict = lambda e, scale, loc, m, x: collections.OrderedDict( [ # pylint: disable=g-long-lambda ('e', e), ('scale', scale), ('loc', loc), ('m', m), ('x', x) ]) # pylint: disable=bad-whitespace model = build_ordereddict( e=tfd.Independent(tfd.Exponential(rate=[100, 120]), 1), scale=lambda e: tfd.Gamma(concentration=e[..., 0], rate=e[..., 1]), loc=tfd.Normal(loc=0, scale=2.), m=tfd.Normal, x=lambda m: tfd.Sample(tfd.Bernoulli(logits=m), 12)) # pylint: enable=bad-whitespace d = tfd.JointDistributionNamed(model, validate_args=True) self.assertEqual(( ('e', ()), ('scale', ('e', )), ('loc', ()), ('m', ('loc', 'scale')), ('x', ('m', )), ), d.resolve_graph()) xs = d.sample(seed=test_util.test_seed()) self.assertLen(xs, 5) # We'll verify the shapes work as intended when we plumb these back into the # respective log_probs. ds, _ = d.sample_distributions(value=xs) self.assertLen(ds, 5) values = tuple(ds.values()) self.assertIsInstance(values[0], tfd.Independent) self.assertIsInstance(values[1], tfd.Gamma) self.assertIsInstance(values[2], tfd.Normal) self.assertIsInstance(values[3], tfd.Normal) self.assertIsInstance(values[4], tfd.Sample) # Static properties. self.assertAllEqual( build_ordereddict(e=tf.float32, scale=tf.float32, loc=tf.float32, m=tf.float32, x=tf.int32), d.dtype) batch_shape_tensor_, event_shape_tensor_ = self.evaluate( [d.batch_shape_tensor(), d.event_shape_tensor()]) expected_batch_shape = build_ordereddict(e=[], scale=[], loc=[], m=[], x=[]) for (expected, actual_tensorshape, actual_shape_tensor_) in zip(expected_batch_shape, d.batch_shape, batch_shape_tensor_): self.assertAllEqual(expected, actual_tensorshape) self.assertAllEqual(expected, actual_shape_tensor_) expected_event_shape = build_ordereddict(e=[2], scale=[], loc=[], m=[], x=[12]) for (expected, actual_tensorshape, actual_shape_tensor_) in zip(expected_event_shape, d.event_shape, event_shape_tensor_): self.assertAllEqual(expected, actual_tensorshape) self.assertAllEqual(expected, actual_shape_tensor_) expected_jlp = sum( d.log_prob(x) for d, x in zip(ds.values(), xs.values())) actual_jlp = d.log_prob(xs) self.assertAllClose(*self.evaluate([expected_jlp, actual_jlp]), atol=0., rtol=1e-4)
def testNotIterable(self): normal = tfd.Normal(loc=0., scale=1.) with self.assertRaisesRegexp(TypeError, '\'Normal\' object is not iterable'): list(normal)
def test_dict_sample_log_prob(self): # pylint: disable=bad-whitespace d = tfd.JointDistributionNamed(dict( e=tfd.Independent(tfd.Exponential(rate=[100, 120]), 1), scale=lambda e: tfd.Gamma(concentration=e[..., 0], rate=e[..., 1]), loc=tfd.Normal(loc=0, scale=2.), m=tfd.Normal, x=lambda m: tfd.Sample(tfd.Bernoulli(logits=m), 12)), validate_args=True) # pylint: enable=bad-whitespace self.assertEqual(( ('e', ()), ('scale', ('e', )), ('loc', ()), ('m', ('loc', 'scale')), ('x', ('m', )), ), d.resolve_graph()) xs = d.sample(seed=test_util.test_seed()) self.assertLen(xs, 5) # We'll verify the shapes work as intended when we plumb these back into the # respective log_probs. ds, _ = d.sample_distributions(value=xs) self.assertLen(ds, 5) self.assertIsInstance(ds['e'], tfd.Independent) self.assertIsInstance(ds['scale'], tfd.Gamma) self.assertIsInstance(ds['loc'], tfd.Normal) self.assertIsInstance(ds['m'], tfd.Normal) self.assertIsInstance(ds['x'], tfd.Sample) # Static properties. self.assertAllEqual( { 'e': tf.float32, 'scale': tf.float32, 'loc': tf.float32, 'm': tf.float32, 'x': tf.int32 }, d.dtype) batch_shape_tensor_, event_shape_tensor_ = self.evaluate( [d.batch_shape_tensor(), d.event_shape_tensor()]) expected_batch_shape = { 'e': [], 'scale': [], 'loc': [], 'm': [], 'x': [] } batch_tensorshape = d.batch_shape for k in expected_batch_shape: self.assertAllEqual(expected_batch_shape[k], batch_tensorshape[k]) self.assertAllEqual(expected_batch_shape[k], batch_shape_tensor_[k]) expected_event_shape = { 'e': [2], 'scale': [], 'loc': [], 'm': [], 'x': [12] } event_tensorshape = d.event_shape for k in expected_event_shape: self.assertAllEqual(expected_event_shape[k], event_tensorshape[k]) self.assertAllEqual(expected_event_shape[k], event_shape_tensor_[k]) expected_jlp = sum(ds[k].log_prob(xs[k]) for k in ds.keys()) actual_jlp = d.log_prob(xs) self.assertAllClose(*self.evaluate([expected_jlp, actual_jlp]), atol=0., rtol=1e-4)
def normal_differential_entropy(scale): return tfd.Normal(0., scale, validate_args=True).entropy()
def __init__(self, num_seasons, num_steps_per_season=1, drift_scale_prior=None, initial_effect_prior=None, constrain_mean_effect_to_zero=True, observed_time_series=None, name=None): """Specify a seasonal effects model. Args: num_seasons: Scalar Python `int` number of seasons. num_steps_per_season: Python `int` number of steps in each season. This may be either a scalar (shape `[]`), in which case all seasons have the same length, or a NumPy array of shape `[num_seasons]`, in which seasons have different length, but remain constant around different cycles, or a NumPy array of shape `[num_cycles, num_seasons]`, in which num_steps_per_season for each season also varies in different cycle (e.g., a 4 years cycle with leap day). Default value: 1. drift_scale_prior: optional `tfd.Distribution` instance specifying a prior on the `drift_scale` parameter. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. initial_effect_prior: optional `tfd.Distribution` instance specifying a normal prior on the initial effect of each season. This may be either a scalar `tfd.Normal` prior, in which case it applies independently to every season, or it may be multivariate normal (e.g., `tfd.MultivariateNormalDiag`) with event shape `[num_seasons]`, in which case it specifies a joint prior across all seasons. If `None`, a heuristic default prior is constructed based on the provided `observed_time_series`. Default value: `None`. constrain_mean_effect_to_zero: if `True`, use a model parameterization that constrains the mean effect across all seasons to be zero. This constraint is generally helpful in identifying the contributions of different model components and can lead to more interpretable posterior decompositions. It may be undesirable if you plan to directly examine the latent space of the underlying state space model. Default value: `True`. observed_time_series: optional `float` `Tensor` of shape `batch_shape + [T, 1]` (omitting the trailing unit dimension is also supported when `T > 1`), specifying an observed time series. Any priors not explicitly set will be given default values according to the scale of the observed time series (or batch of time series). May optionally be an instance of `tfp.sts.MaskedTimeSeries`, which includes a mask `Tensor` to specify timesteps with missing observations. Default value: `None`. name: the name of this model component. Default value: 'Seasonal'. """ with tf.compat.v1.name_scope(name, 'Seasonal', values=[observed_time_series]) as name: _, observed_stddev, observed_initial = ( sts_util.empirical_statistics(observed_time_series) if observed_time_series is not None else (0., 1., 0.)) # Heuristic default priors. Overriding these may dramatically # change inference performance and results. if drift_scale_prior is None: drift_scale_prior = tfd.LogNormal(loc=tf.math.log( .01 * observed_stddev), scale=3.) if initial_effect_prior is None: initial_effect_prior = tfd.Normal( loc=observed_initial, scale=tf.abs(observed_initial) + observed_stddev) dtype = tf.debugging.assert_same_float_dtype( [drift_scale_prior, initial_effect_prior]) if isinstance(initial_effect_prior, tfd.Normal): initial_state_prior = tfd.MultivariateNormalDiag( loc=tf.stack([initial_effect_prior.mean()] * num_seasons, axis=-1), scale_diag=tf.stack([initial_effect_prior.stddev()] * num_seasons, axis=-1)) else: initial_state_prior = initial_effect_prior if constrain_mean_effect_to_zero: # Transform the prior to the residual parameterization used by # `ConstrainedSeasonalStateSpaceModel`, imposing a zero-sum constraint. # This doesn't change the marginal prior on individual effects, but # does introduce dependence between the effects. (effects_to_residuals, _) = build_effects_to_residuals_matrix(num_seasons, dtype=dtype) effects_to_residuals_linop = tf.linalg.LinearOperatorFullMatrix( effects_to_residuals ) # Use linop so that matmul broadcasts. initial_state_prior_loc = effects_to_residuals_linop.matvec( initial_state_prior.mean()) initial_state_prior_scale_linop = effects_to_residuals_linop.matmul( initial_state_prior.scale) # returns LinearOperator initial_state_prior = tfd.MultivariateNormalFullCovariance( loc=initial_state_prior_loc, covariance_matrix=initial_state_prior_scale_linop.matmul( initial_state_prior_scale_linop.to_dense(), adjoint_arg=True)) self._constrain_mean_effect_to_zero = constrain_mean_effect_to_zero self._initial_state_prior = initial_state_prior self._num_seasons = num_seasons self._num_steps_per_season = num_steps_per_season super(Seasonal, self).__init__( parameters=[ Parameter('drift_scale', drift_scale_prior, tfb.Softplus()), ], latent_size=(num_seasons - 1 if self.constrain_mean_effect_to_zero else num_seasons), name=name)
def test_variable_tracking_works(self): scale = tf.Variable(1.) normal = tfd.Normal(loc=0, scale=scale, validate_args=True) self.assertIsInstance(normal, tf.Module) self.assertEqual((scale, ), normal.trainable_variables)