def model_fn(self, likelihood_model, sample_scales): x_global_scale_variance = yield JDCRoot( Independent(tfd.InverseGamma(concentration=0.5, scale=0.5))) x_global_scale_noncentered = yield JDCRoot( Independent(tfd.HalfNormal(scale=1.0))) x_global_scale = x_global_scale_noncentered * tf.sqrt( x_global_scale_variance) x_local1_scale_variance = yield JDCRoot( Independent( tfd.InverseGamma( concentration=tf.fill( [self.num_samples, self.num_features], 0.5), scale=tf.fill([self.num_samples, self.num_features], 0.5)))) x_local1_scale_noncentered = yield JDCRoot( Independent( tfd.HalfNormal( scale=tf.ones([self.num_samples, self.num_features])))) x_local1_scale = x_local1_scale_noncentered * tf.sqrt( x_local1_scale_variance) x_bias = yield JDCRoot( Independent( tfd.Normal(loc=tf.fill([self.num_features], np.float32(self.x_bias_loc0)), scale=np.float32(self.x_bias_scale0)))) x = yield Independent( tfd.Normal(loc=x_bias - sample_scales, scale=x_local1_scale * x_global_scale)) yield from likelihood_model(x)
def test_executor_logp_tensorflow(transformed_model): norm = tfd.HalfNormal(1) _, state = pm.evaluate_model(transformed_model(), values=dict(n=math.pi)) np.testing.assert_allclose(state.collect_log_prob(), norm.log_prob(math.pi), equal_nan=False)
def test_executor_logp_tensorflow(transformed_model): tfp = pytest.importorskip("tensorflow_probability") tfd = tfp.distributions norm = tfd.HalfNormal(1) _, state = pm.evaluate_model(transformed_model(), values=dict(n=math.pi)) np.testing.assert_allclose(state.collect_log_prob(), norm.log_prob(math.pi), equal_nan=False)
def test_transformed_executor_logp_tensorflow(transformed_model): norm_log = tfd.TransformedDistribution(tfd.HalfNormal(1), bij.Invert(bij.Exp())) _, state = pm.evaluate_model_transformed(transformed_model(), values=dict(__log_n=-math.pi)) np.testing.assert_allclose( state.collect_log_prob(), norm_log.log_prob(-math.pi), equal_nan=False ) _, state = pm.evaluate_model_transformed(transformed_model(), values=dict(n=math.exp(-math.pi))) np.testing.assert_allclose( state.collect_log_prob(), norm_log.log_prob(-math.pi), equal_nan=False )
def joint_dist(): alpha = yield tfd.Normal(loc=0.0, scale=1.0) home = yield tfd.Normal(loc=0.0, scale=1.0) sd_att = yield tfd.HalfNormal(scale=1.0) sd_def = yield tfd.HalfNormal(scale=1.0) attack = yield tfd.Normal(loc=tf.zeros(nt), scale=sd_att) defend = yield tfd.Normal(loc=tf.zeros(nt), scale=sd_def) home_log_rate = ( alpha + home + tf.gather(attack, home_id, axis=-1) - tf.gather(defend, away_id, axis=-1) ) away_log_rate = ( alpha + tf.gather(attack, away_id, axis=-1) - tf.gather(defend, home_id, axis=-1) ) yield tfd.Poisson(log_rate=home_log_rate) yield tfd.Poisson(log_rate=away_log_rate)
def unormalized_log_prob_parts(self, data, prior_weight=1., **params): """Energy function Keyword Arguments: data {dict} -- Should be a single batch (default: {None}) Returns: tf.Tensor -- Energy of broadcasted shape """ prior_parts = self.prior_distribution.log_prob_parts(params) prior_parts = {k: v * prior_weight for k, v in prior_parts.items()} log_likelihood = self.log_likelihood_components( data=data, **params)['log_likelihood'] # For prior on theta s = params['s'] theta = self.encode(x=data[self.count_key], u=params['u'], s=s) rv_theta = tfd.Independent( tfd.HalfNormal(scale=tf.ones_like(theta, dtype=self.dtype)), reinterpreted_batch_ndims=2) prior_parts['z'] = rv_theta.log_prob(theta) finite_portion = tf.where(tf.math.is_finite(log_likelihood), log_likelihood, tf.zeros_like(log_likelihood)) min_val = tf.reduce_min(finite_portion) - 10. max_val = 0. log_likelihood = tf.clip_by_value(log_likelihood, min_val, max_val) log_likelihood = tf.where(tf.math.is_finite(log_likelihood), log_likelihood, tf.ones_like(log_likelihood) * min_val) log_likelihood = tf.reduce_sum(log_likelihood, -1) log_likelihood = tf.reduce_sum(log_likelihood, -1) prior_parts['x'] = log_likelihood return prior_parts
def _init_distribution(conditions, **kwargs): scale = conditions["scale"] return tfd.HalfNormal(scale=scale, **kwargs)
def create_distributions(self): """Create distribution objects """ self.bijectors = { 'u': tfb.Softplus(), 'v': tfb.Softplus(), 'u_eta': tfb.Softplus(), 'u_tau': tfb.Softplus(), 's': tfb.Softplus(), 's_eta': tfb.Softplus(), 's_tau': tfb.Softplus(), 'w': tfb.Softplus() } symmetry_breaking_decay = self.symmetry_breaking_decay**tf.cast( tf.range(self.latent_dim), self.dtype)[tf.newaxis, ...] distribution_dict = { 'v': tfd.Independent(tfd.HalfNormal(scale=0.1 * tf.ones( (self.latent_dim, self.feature_dim), dtype=self.dtype)), reinterpreted_batch_ndims=2), 'w': tfd.Independent(tfd.HalfNormal( scale=tf.ones((1, self.feature_dim), dtype=self.dtype)), reinterpreted_batch_ndims=2) } if self.horseshoe_plus: distribution_dict = { **distribution_dict, 'u': lambda u_eta, u_tau: tfd.Independent(tfd.HalfNormal( scale=u_eta * u_tau * symmetry_breaking_decay), reinterpreted_batch_ndims= 2), 'u_eta': tfd.Independent(tfd.HalfCauchy( loc=tf.zeros((self.feature_dim, self.latent_dim), dtype=self.dtype), scale=tf.ones((self.feature_dim, self.latent_dim), dtype=self.dtype)), reinterpreted_batch_ndims=2), 'u_tau': tfd.Independent(tfd.HalfCauchy( loc=tf.zeros((1, self.latent_dim), dtype=self.dtype), scale=tf.ones((1, self.latent_dim), dtype=self.dtype) * self.u_tau_scale), reinterpreted_batch_ndims=2), } distribution_dict['s'] = lambda s_eta, s_tau: tfd.Independent( tfd.HalfNormal(scale=s_eta * s_tau), reinterpreted_batch_ndims=2) distribution_dict['s_eta'] = tfd.Independent( tfd.HalfCauchy(loc=tf.zeros((2, self.feature_dim), dtype=self.dtype), scale=tf.ones((2, self.feature_dim), dtype=self.dtype)), reinterpreted_batch_ndims=2) distribution_dict['s_tau'] = tfd.Independent( tfd.HalfCauchy(loc=tf.zeros((1, self.feature_dim), dtype=self.dtype), scale=tf.ones( (1, self.feature_dim), dtype=self.dtype) * self.s_tau_scale), reinterpreted_batch_ndims=2) self.bijectors['u_eta_a'] = tfb.Softplus() self.bijectors['u_tau_a'] = tfb.Softplus() self.bijectors['s_eta_a'] = tfb.Softplus() self.bijectors['s_tau_a'] = tfb.Softplus() distribution_dict['u_eta'] = lambda u_eta_a: tfd.Independent( SqrtInverseGamma(concentration=0.5 * tf.ones( (self.feature_dim, self.latent_dim), dtype=self.dtype), scale=1.0 / u_eta_a), reinterpreted_batch_ndims=2) distribution_dict['u_eta_a'] = tfd.Independent( tfd.InverseGamma(concentration=0.5 * tf.ones( (self.feature_dim, self.latent_dim), dtype=self.dtype), scale=tf.ones( (self.feature_dim, self.latent_dim), dtype=self.dtype)), reinterpreted_batch_ndims=2) distribution_dict['u_tau'] = lambda u_tau_a: tfd.Independent( SqrtInverseGamma(concentration=0.5 * tf.ones( (1, self.latent_dim), dtype=self.dtype), scale=1.0 / u_tau_a), reinterpreted_batch_ndims=2) distribution_dict['u_tau_a'] = tfd.Independent( tfd.InverseGamma(concentration=0.5 * tf.ones( (1, self.latent_dim), dtype=self.dtype), scale=tf.ones( (1, self.latent_dim), dtype=self.dtype) / self.u_tau_scale**2), reinterpreted_batch_ndims=2) distribution_dict['s_eta'] = lambda s_eta_a: tfd.Independent( SqrtInverseGamma(concentration=0.5 * tf.ones( (2, self.feature_dim), dtype=self.dtype), scale=1.0 / s_eta_a), reinterpreted_batch_ndims=2) distribution_dict['s_eta_a'] = tfd.Independent( tfd.InverseGamma(concentration=0.5 * tf.ones( (2, self.feature_dim), dtype=self.dtype), scale=tf.ones((2, self.feature_dim), dtype=self.dtype)), reinterpreted_batch_ndims=2) distribution_dict['s_tau'] = lambda s_tau_a: tfd.Independent( SqrtInverseGamma(concentration=0.5 * tf.ones( (1, self.feature_dim), dtype=self.dtype), scale=1.0 / s_tau_a), reinterpreted_batch_ndims=2) distribution_dict['s_tau_a'] = tfd.Independent( tfd.InverseGamma(concentration=0.5 * tf.ones( (1, self.feature_dim), dtype=self.dtype), scale=tf.ones( (1, self.feature_dim), dtype=self.dtype) / self.s_tau_scale**2), reinterpreted_batch_ndims=2) else: distribution_dict = { **distribution_dict, 'u': tfd.Independent( AbsHorseshoe( scale=(self.u_tau_scale * symmetry_breaking_decay * tf.ones((self.feature_dim, self.latent_dim), dtype=self.dtype)), reinterpreted_batch_ndims=2)), 's': tfd.Independent(AbsHorseshoe( scale=self.s_tau_scale * tf.ones((1, self.feature_dim), dtype=self.dtype)), reinterpreted_batch_ndims=2) } self.prior_distribution = tfd.JointDistributionNamed(distribution_dict) surrogate_dict = { 'v': self.bijectors['v'](build_trainable_normal_dist( -6. * tf.ones( (self.latent_dim, self.feature_dim), dtype=self.dtype), 5e-4 * tf.ones( (self.latent_dim, self.feature_dim), dtype=self.dtype), 2, strategy=self.strategy)), 'w': self.bijectors['w'](build_trainable_normal_dist( -6 * tf.ones((1, self.feature_dim), dtype=self.dtype), 5e-4 * tf.ones((1, self.feature_dim), dtype=self.dtype), 2, strategy=self.strategy)) } if self.horseshoe_plus: surrogate_dict = { **surrogate_dict, 'u': self.bijectors['u'](build_trainable_normal_dist( -6. * tf.ones( (self.feature_dim, self.latent_dim), dtype=self.dtype), 5e-4 * tf.ones( (self.feature_dim, self.latent_dim), dtype=self.dtype), 2, strategy=self.strategy)), 'u_eta': self.bijectors['u_eta'](build_trainable_InverseGamma_dist( 3 * tf.ones( (self.feature_dim, self.latent_dim), dtype=self.dtype), tf.ones((self.feature_dim, self.latent_dim), dtype=self.dtype), 2, strategy=self.strategy)), 'u_tau': self.bijectors['u_tau'](build_trainable_InverseGamma_dist( 3 * tf.ones((1, self.latent_dim), dtype=self.dtype), tf.ones((1, self.latent_dim), dtype=self.dtype), 2, strategy=self.strategy)), } surrogate_dict['s_eta'] = self.bijectors['s_eta']( build_trainable_InverseGamma_dist(tf.ones( (2, self.feature_dim), dtype=self.dtype), tf.ones( (2, self.feature_dim), dtype=self.dtype), 2, strategy=self.strategy)) surrogate_dict['s_tau'] = self.bijectors['s_tau']( build_trainable_InverseGamma_dist(1 * tf.ones( (1, self.feature_dim), dtype=self.dtype), tf.ones( (1, self.feature_dim), dtype=self.dtype), 2, strategy=self.strategy)) surrogate_dict['s'] = self.bijectors['s']( build_trainable_normal_dist( tf.ones((2, self.feature_dim), dtype=self.dtype) * tf.cast([[-2.], [-1.]], dtype=self.dtype), 1e-3 * tf.ones((2, self.feature_dim), dtype=self.dtype), 2, strategy=self.strategy)) self.bijectors['u_eta_a'] = tfb.Softplus() self.bijectors['u_tau_a'] = tfb.Softplus() surrogate_dict['u_eta_a'] = self.bijectors['u_eta_a']( build_trainable_InverseGamma_dist( 2. * tf.ones( (self.feature_dim, self.latent_dim), dtype=self.dtype), tf.ones((self.feature_dim, self.latent_dim), dtype=self.dtype), 2, strategy=self.strategy)) surrogate_dict['u_tau_a'] = self.bijectors['u_tau_a']( build_trainable_InverseGamma_dist( 2. * tf.ones((1, self.latent_dim), dtype=self.dtype), tf.ones((1, self.latent_dim), dtype=self.dtype) / self.u_tau_scale**2, 2, strategy=self.strategy)) self.bijectors['s_eta_a'] = tfb.Softplus() self.bijectors['s_tau_a'] = tfb.Softplus() surrogate_dict['s_eta_a'] = self.bijectors['s_eta_a']( build_trainable_InverseGamma_dist(2. * tf.ones( (2, self.feature_dim), dtype=self.dtype), tf.ones( (2, self.feature_dim), dtype=self.dtype), 2, strategy=self.strategy)) surrogate_dict['s_tau_a'] = self.bijectors['s_tau_a']( build_trainable_InverseGamma_dist( 2. * tf.ones((1, self.feature_dim), dtype=self.dtype), (tf.ones((1, self.feature_dim), dtype=self.dtype) / self.s_tau_scale**2), 2, strategy=self.strategy)) else: surrogate_dict = { **surrogate_dict, 's': self.bijectors['s'](build_trainable_normal_dist( tf.ones((2, self.feature_dim), dtype=self.dtype) * tf.cast([[-2.], [-1.]], dtype=self.dtype), 1e-3 * tf.ones((2, self.feature_dim), dtype=self.dtype), 2, strategy=self.strategy)), 'u': self.bijectors['u'](build_trainable_normal_dist( -9. * tf.ones( (self.feature_dim, self.latent_dim), dtype=self.dtype), 5e-4 * tf.ones( (self.feature_dim, self.latent_dim), dtype=self.dtype), 2, strategy=self.strategy)) } self.surrogate_distribution = tfd.JointDistributionNamed( surrogate_dict) self.surrogate_vars = self.surrogate_distribution.variables self.var_list = list(surrogate_dict.keys()) self.set_calibration_expectations()
def _base_dist(self, sigma: TensorLike, *args, **kwargs): return tfd.HalfNormal(scale=sigma, **kwargs)