def init_bijectors(self, n_layers, hidden_layers): with tf.variable_scope(self.name): bijectors = [] for i in range(n_layers): if self.flow_type == "MAF": bijectors.append(tfb.MaskedAutoregressiveFlow( shift_and_log_scale_fn=tfb.masked_autoregressive_default_template( hidden_layers=hidden_layers, name = "MAF_template_{}".format(i)), name = "MAF_{}".format(i))) elif self.flow_type == "IAF": bijectors.append( tfb.Invert( tfb.MaskedAutoregressiveFlow( shift_and_log_scale_fn=tfb.masked_autoregressive_default_template( hidden_layers=hidden_layers, name = "MAF_template_{}".format(i)) ), name = "IAF_{}".format(i) ) ) bijectors.append(tfb.Permute(permutation=self.init_once( np.random.permutation(self.event_size).astype("int32"), name="permutation_{}".format(i)))) flow_bijector = tfb.Chain(list(reversed(bijectors[:-1]))) return flow_bijector
def init_bijectors(self, n_layers, hidden_layers): with tf.variable_scope(self.name): bijectors = [] for i in range(n_layers): if self.flow_type == "MAF": bijectors.append( tfb.MaskedAutoregressiveFlow( shift_and_log_scale_fn=tfb. masked_autoregressive_default_template( hidden_layers=hidden_layers, activation=tf.nn.relu, log_scale_min_clip=self.log_scale_min_clip, log_scale_max_clip=self.log_scale_max_clip, shift_only=self.shift_only, log_scale_clip_gradient=self. log_scale_clip_gradient, name="MAF_template_{}".format(i)), name="MAF_{}".format(i))) elif self.flow_type == "IAF": bijectors.append( tfb.Invert(tfb.MaskedAutoregressiveFlow( shift_and_log_scale_fn=tfb. masked_autoregressive_default_template( hidden_layers=hidden_layers, activation=tf.nn.relu, log_scale_min_clip=self.log_scale_min_clip, log_scale_max_clip=self.log_scale_max_clip, shift_only=self.shift_only, log_scale_clip_gradient=self. log_scale_clip_gradient, name="MAF_template_{}".format(i))), name="IAF_{}".format(i))) elif self.flow_type == "RealNVP": bijectors.append( tfb.RealNVP(num_masked=self.event_size - 1, shift_and_log_scale_fn=tfb. real_nvp_default_template( hidden_layers=hidden_layers, activation=tf.nn.relu, shift_only=self.shift_only, name="RealNVP_template_{}".format(i)), name="RealNVP_{}".format(i))) else: raise ValueError("Unknown flow type {}".format( self.flow_type)) bijectors.append( tfb.Permute(permutation=list(range(1, self.event_size)) + [0])) # bijectors.append( # tfb.Permute( # self.init_once(np.random.permutation(self.event_size).astype("int32"), # name="permutation_{}".format(i)) # ) # ) flow_bijector = tfb.Chain(list(reversed(bijectors[:-1])), validate_args=True, name="NF_chain") return flow_bijector
def __init__(self, n_layers, event_size, flow_to_reverse=None, hidden_layers=[32], sample_num=100, flow_type="IAF", shift_only=False, log_scale_min_clip=-0.1, log_scale_max_clip=0.1, log_scale_clip_gradient=False, name="NF"): if flow_to_reverse is None: self.event_size = event_size self.sample_num = sample_num self.flow_type = flow_type self.shift_only = shift_only self.log_scale_min_clip = log_scale_min_clip self.log_scale_max_clip = log_scale_max_clip self.log_scale_clip_gradient = log_scale_clip_gradient self.name = name self.bijector = self.init_bijectors(n_layers, hidden_layers) else: self.event_size = flow_to_reverse.event_size self.sample_num = flow_to_reverse.sample_num self.flow_type = flow_to_reverse.flow_type + "_reversed" self.name = flow_to_reverse.name + "_reversed" self.bijector = tfb.Invert(flow_to_reverse.bijector)
def init_bijectors(self, a1: tf.Tensor, b1: tf.Tensor, theta: tf.Tensor, a2: tf.Tensor, b2: tf.Tensor, name: str = 'bernstein_flow') -> tfb.Bijector: """ Builds a normalizing flow using a Bernstein polynomial as Bijector. :param a1: The scale of f1. :type a1: Tensor :param b1: The shift of f1. :type b1: Tensor :param theta: The Bernstein coefficients. :type theta: Tensor :param a2: The scale of f3. :type a2: Tensor :param b2: The shift of f3. :type b2: Tensor :param name: The name to give Ops created by the initializer. :type name: string :returns: The Bernstein flow. :rtype: Bijector """ bijectors = [] # f1: ŷ = sigma(a1(x)*y - b1(x)) f1_scale = tfb.Scale(a1, name='f1_scale') bijectors.append(f1_scale) f1_shift = tfb.Shift(b1, name='f1_shift') bijectors.append(f1_shift) # clip to range [0, 1] bijectors.append(tfb.SoftClip(low=0, high=1, hinge_softness=1.5)) # f2: ẑ = Bernstein Polynomial f2 = BernsteinBijector(theta=theta, name='f2') bijectors.append(f2) # clip to range [min(theta), max(theta)] # bijectors.append( # tfb.Invert( # tfb.SoftClip( # high=tf.math.reduce_max(theta, axis=-1), # low=tf.math.reduce_min(theta, axis=-1), # hinge_softness=0.5 # ) # ) # ) # f3: z = a2(x)*ẑ - b2(x) f3_scale = tfb.Scale(a2, name='f3_scale') bijectors.append(f3_scale) f3_shift = tfb.Shift(b2, name='f3_shift') bijectors.append(f3_shift) bijectors = list(reversed(bijectors)) return tfb.Invert(tfb.Chain(bijectors))
def __init__(self, a, theta, alpha, beta, validate_args=False, allow_nan_stats=True, name='Amoroso'): parameters = dict(locals()) with tf.name_scope(name) as name: self._a = tensor_util.convert_nonref_to_tensor(a) self._theta = tensor_util.convert_nonref_to_tensor(theta) self._alpha = tensor_util.convert_nonref_to_tensor(alpha) self._beta = tensor_util.convert_nonref_to_tensor(beta) gamma = tfd.Gamma(alpha, 1.) chain = tfb.Invert( tfb.Chain([ tfb.Exp(), tfb.Scale(beta), tfb.Shift(-tf.math.log(theta)), tfb.Log(), tfb.Shift(-a), ])) super().__init__(distribution=gamma, bijector=chain, validate_args=validate_args, parameters=parameters, name=name)
def __init__(self, *args, **kwargs): self._parents = [] # Override default bijector if provided self._bijector = kwargs.pop("bijector", self._bijector) self._untransformed_distribution = self._base_dist(*args, **kwargs) self._sample_shape = () self._dim_names = () ctx = contexts.get_context() self.name = kwargs.get("name", None) if isinstance(ctx, contexts.InferenceContext) and self.name is None: # Unfortunately autograph does not allow changing the AST, # thus we instead retrieve the name from when it was set # ForwardContext where AST parsing is possible. order_id = len(ctx.vars) # where am I in the order of RV creation? self.name = ctx._names[order_id] if not isinstance(ctx, contexts.FreeForwardContext) and self.name is None: # We only require names for book keeping during inference raise ValueError("No name was set. Supply one via the name kwarg.") self._creation_context_id = id(ctx) self._backend_tensor = None self._distribution = tfd.TransformedDistribution( distribution=self._untransformed_distribution, bijector=bijectors.Invert(self._bijector) ) ctx.add_variable(self)
def transformed_interceptor(rv_ctor, *rv_args, **rv_kwargs): global bijectors try: bijector = bijectors.pop(0) except IndexError: bijector = None if bijector is None: return edward2.interceptable(rv_ctor)(*rv_args, **rv_kwargs) distribution = rv_ctor(*rv_args, **rv_kwargs).distribution if invert: bijector = tfb.Invert(bijector) name = rv_kwargs.pop('name', None) value = rv_kwargs.pop('value', None) transformed_value = value if value is not None: transformed_value = bijector.forward(value) rv = edward2.TransformedDistribution(distribution, bijector, value=transformed_value, name=name) return bijector.inverse(rv)
def __init__(self, *args, **kwargs): """Initialize UnitContinuousRV. Developer Note -------------- The inverse of the sigmoid bijector is the logodds bijector. """ super().__init__(*args, **kwargs) self._transformed_distribution = tfd.TransformedDistribution( distribution=self._distribution, bijector=bijectors.Invert(bijectors.Sigmoid()))
def __init__(self, *args, **kwargs): """Initialize PositiveContinuousRV. Developer Note -------------- The inverse of the exponential bijector is the log bijector. """ super().__init__(*args, **kwargs) self._transformed_distribution = tfd.TransformedDistribution( distribution=self._distribution, bijector=bijectors.Invert(bijectors.Exp()))
def test_transformed_executor_logp_tensorflow(transformed_model): norm_log = tfd.TransformedDistribution(tfd.HalfNormal(1), bij.Invert(bij.Exp())) _, state = pm.evaluate_model_transformed(transformed_model(), values=dict(__log_n=-math.pi)) np.testing.assert_allclose( state.collect_log_prob(), norm_log.log_prob(-math.pi), equal_nan=False ) _, state = pm.evaluate_model_transformed(transformed_model(), values=dict(n=math.exp(-math.pi))) np.testing.assert_allclose( state.collect_log_prob(), norm_log.log_prob(-math.pi), equal_nan=False )
def _init_distribution(conditions): concentration, scale = conditions["concentration"], conditions["scale"] scale_tensor, concentration_tensor = ( tf.convert_to_tensor(scale), tf.convert_to_tensor(concentration), ) broadcast_shape = dist_util.prefer_static_broadcast_shape( scale_tensor.shape, concentration_tensor.shape ) return tfd.TransformedDistribution( distribution=tfd.Uniform(low=tf.zeros(broadcast_shape), high=tf.ones(broadcast_shape)), bijector=bij.Invert(bij.WeibullCDF(scale=scale, concentration=concentration)), name="Weibull", )
def get_iaf_elbo(target, num_mc_samples, param_shapes): shape_sizes = [ _tensorshape_size(pshape) for pshape in param_shapes.values() ] overall_shape = [sum(shape_sizes)] def unmarshal(variational_sample): results = [] n_dimensions_used = 0 for (n_to_add, result_shape) in zip(shape_sizes, param_shapes.values()): result = variational_sample[Ellipsis, n_dimensions_used:n_dimensions_used + n_to_add] results.append(tf.reshape(result, result_shape)) n_dimensions_used += n_to_add return tuple(results) variational_dist = tfd.TransformedDistribution( distribution=tfd.Normal(loc=0., scale=1.), bijector=tfb.Invert( tfb.MaskedAutoregressiveFlow( shift_and_log_scale_fn=tfb. masked_autoregressive_default_template( hidden_layers=[256, 256]))), event_shape=overall_shape, name='q_iaf') variational_samples = variational_dist.sample(num_mc_samples) target_q_sum = tf.reduce_sum( variational_dist.log_prob(variational_samples)) target_sum = 0. for s in range(num_mc_samples): params = unmarshal(variational_samples[s, Ellipsis]) target_sum = target_sum + target(*params) energy = target_sum / float(num_mc_samples) entropy = -target_q_sum / float(num_mc_samples) elbo = energy + entropy tf.summary.scalar('energy', energy) tf.summary.scalar('entropy', entropy) tf.summary.scalar('elbo', elbo) return elbo
def german_credit_model(): x_numeric = tf.constant(numericals.astype(np.float32)) x_categorical = [tf.one_hot(c, c.max() + 1) for c in categoricals] all_x = tf.concat([x_numeric] + x_categorical, 1) num_features = int(all_x.shape[1]) overall_log_scale = ed.Normal(loc=0., scale=10., name='overall_log_scale') beta_log_scales = ed.TransformedDistribution( tfd.Gamma(0.5 * tf.ones([num_features]), 0.5), bijector=tfb.Invert(tfb.Exp()), name='beta_log_scales') beta = ed.Normal(loc=tf.zeros([num_features]), scale=tf.exp(overall_log_scale + beta_log_scales), name='beta') logits = tf.einsum('nd,md->mn', all_x, beta[tf.newaxis, :]) return ed.Bernoulli(logits=logits, name='y')
def test_noiseless_is_consistent_with_cumsum_bijector(self): num_timesteps = 10 ssm = AutoregressiveMovingAverageStateSpaceModel( num_timesteps=num_timesteps, ar_coefficients=[0.7, -0.2, 0.1], ma_coefficients=[0.6], level_scale=0.6, level_drift=-0.3, observation_noise_scale=0., initial_state_prior=tfd.MultivariateNormalDiag(loc=tf.zeros([3]), scale_diag=tf.ones( [3]))) cumsum_ssm = IntegratedStateSpaceModel(ssm) x, lp = cumsum_ssm.experimental_sample_and_log_prob( [2], seed=test_util.test_seed()) flatten_event = tfb.Reshape([num_timesteps], event_shape_in=[num_timesteps, 1]) cumsum_dist = tfb.Chain( [tfb.Invert(flatten_event), tfb.Cumsum(), flatten_event])(ssm) self.assertAllClose(lp, cumsum_dist.log_prob(x), atol=1e-5)
def recenter(rv_constructor, *rv_args, **rv_kwargs): rv_name = rv_kwargs.get('name') rv_value = rv_kwargs.pop('value', None) base_bijector = None if rv_constructor.__name__ == 'TransformedDistribution': if (rv_args[1].__class__.__name__ == 'Invert' and rv_args[1].bijector.__class__.__name__ == 'SoftClip'): distribution = rv_args[0] base_bijector = rv_args[1].bijector rv_constructor = distribution.__class__ rv_kwargs = distribution.parameters rv_args = rv_args[2:] # We were given a value for the transformed RV. Let's pretend it was # for the original. if rv_value is not None: rv_value = base_bijector.forward(rv_value) if (rv_constructor.__name__ == 'Normal' and not rv_name.startswith('y')): # NB: assume everything is kwargs for now. x_loc = rv_kwargs['loc'] x_scale = rv_kwargs['scale'] name = rv_kwargs['name'] a, b, _ = get_or_init(name, loc_shape=tf.shape(x_loc), scale_shape=tf.shape(x_scale), parameterisation_type='scalar') kwargs_std = {} kwargs_std['loc'] = tf.multiply(x_loc, a) kwargs_std['scale'] = tf.pow( x_scale, b) # tf.multiply(x_scale - 1., b) + 1. kwargs_std['name'] = name scale = x_scale / kwargs_std['scale'] # tf.pow(x_scale, 1. - b) shift = x_loc - tf.multiply(scale, kwargs_std['loc']) b = tfb.AffineScalar(scale=scale, shift=shift) if rv_value is not None: rv_value = b.inverse(rv_value) learnable_parameters[name + '_prior_mean'] = tf.convert_to_tensor(x_loc) learnable_parameters[name + '_prior_scale'] = tf.convert_to_tensor( x_scale) # If original RV was constrained, transform the constraint to the new # standardized RV. For now we assume a double-sided constraint. if base_bijector is not None: constraint_std = tfb.SoftClip( low=b.inverse(base_bijector.low), high=b.inverse(base_bijector.high), hinge_softness=base_bijector.hinge_softness / scale if base_bijector.hinge_softness is not None else None) rv_std = edward2.TransformedDistribution( rv_constructor(**kwargs_std), tfb.Invert(constraint_std), value=constraint_std.inverse(rv_value) if rv_value is not None else None) b = b(constraint_std) else: kwargs_std['value'] = rv_value rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std) bijectors[name] = b return b.forward(rv_std) elif ((rv_constructor.__name__.startswith('MultivariateNormal') or rv_constructor.__name__.startswith('GaussianProcess')) and not rv_kwargs['name'].startswith('y')): name = rv_kwargs['name'] if rv_constructor.__name__.startswith('GaussianProcess'): gp_dist = rv_constructor(*rv_args, **rv_kwargs).distribution X = gp_dist._get_index_points() x_loc = gp_dist.mean_fn(X) x_cov = gp_dist._compute_covariance(index_points=X) else: x_loc = rv_kwargs['loc'] x_cov = rv_kwargs['covariance_matrix'] a, b, c = get_or_init(name, loc_shape=tf.shape(x_loc), scale_shape=tf.shape(x_cov)[:-1], parameterisation_type=parameterisation_type) ndims = tf.shape(x_cov)[-1] x_loc = tf.broadcast_to(x_loc, tf.shape(x_cov)[:-1]) cov_dtype = tf.float64 if FLAGS.float64 else x_cov.dtype x_cov = tf.cast(x_cov, cov_dtype) if parameterisation_type == 'eig': """Extra cost of the eigendecomposition? we do the eig to get Lambda, Q. We rescale Lambda and create the prior dist linop - point one: the prior is an MVN (albeit an efficient one), where in NCP it's just Normal Then we construct the remaining scale matrix. (an n**3 matmul) And unlike a cholesky factor these matrices aren't triangular, so multiplication or division - can we """ Lambda, Q = eigh_with_safe_gradient(x_cov) Lambda = tf.abs(Lambda) Lambda = tf.cast(Lambda, tf.float32) Q = tf.cast(Q, tf.float32) Lambda_hat_b = tf.pow(Lambda, b) if tied_pparams: # If the scale parameterization is in the eigenbasis, # apply it to the mean in the same basis. loc_in_eigenbasis = tf.linalg.matvec(Q, x_loc, adjoint_a=True) reparam_loc = tf.linalg.matvec( Q, tf.multiply(loc_in_eigenbasis, a)) else: reparam_loc = tf.multiply(x_loc, a) kwargs_std = {} kwargs_std['loc'] = reparam_loc kwargs_std['scale'] = LinearOperatorEigenScale( Q, d=tf.sqrt(Lambda_hat_b)) kwargs_std['name'] = name Q_linop = LinearOperatorOrthogonal(Q, det_is_positive=True) scale = tf.linalg.LinearOperatorComposition([ Q_linop, tf.linalg.LinearOperatorDiag(tf.sqrt(Lambda + 1e-10)), tf.linalg.LinearOperatorDiag( 1. / tf.sqrt(Lambda_hat_b + 1e-10)), Q_linop.adjoint(), ]) shift = x_loc - scale.matvec(reparam_loc) b = tfb.AffineLinearOperator(scale=scale, shift=shift) if 'value' in rv_kwargs: kwargs_std['value'] = b.inverse(rv_kwargs['value']) elif parameterisation_type == 'chol': L = tf.linalg.cholesky(x_cov + 1e-6 * tf.eye(ndims, dtype=x_cov.dtype)) L = tf.cast(L, tf.float32) reparam_loc = x_loc * a reparam_scale = tf.linalg.LinearOperatorLowerTriangular( tf.linalg.diag(1 - b) + b[..., tf.newaxis] * L) kwargs_std = {} kwargs_std['loc'] = reparam_loc kwargs_std['scale'] = reparam_scale kwargs_std['name'] = name Dinv = tf.linalg.triangular_solve( tf.cast(reparam_scale.to_dense(), cov_dtype), tf.eye(ndims, dtype=cov_dtype)) Dinv = tf.cast(Dinv, tf.float32) scale = tf.matmul(L, Dinv) shift = x_loc - tf.linalg.matvec(scale, reparam_loc) b = tfb.AffineLinearOperator( scale=tf.linalg.LinearOperatorFullMatrix(scale), shift=shift) if 'value' in rv_kwargs: kwargs_std['value'] = b.inverse(rv_kwargs['value']) elif parameterisation_type == 'indep': # Assumes `C^-1 = diag(c)` is a learned diagonal matrix of 'evidence # precisions'. This approximates the true posterior under an iid # Gaussian observation model: prior_chol = tf.linalg.cholesky(x_cov) prior_inv = tf.linalg.cholesky_solve( prior_chol, tf.eye(ndims, dtype=prior_chol.dtype)) approx_posterior_prec = prior_inv + tf.cast( tf.linalg.diag(c), prior_inv.dtype) approx_posterior_prec_chol = tf.linalg.cholesky( approx_posterior_prec) approx_posterior_cov = tf.linalg.cholesky_solve( approx_posterior_prec_chol, tf.eye(ndims, dtype=approx_posterior_prec_chol.dtype)) cov_chol = tf.linalg.cholesky(approx_posterior_cov) cov_chol = tf.cast(cov_chol, tf.float32) prior_chol = tf.cast(prior_chol, tf.float32) scale_linop = tf.linalg.LinearOperatorLowerTriangular(cov_chol) reparam_loc = x_loc * a reparam_scale = tf.linalg.LinearOperatorComposition([ tf.linalg.LinearOperatorInversion(scale_linop), tf.linalg.LinearOperatorLowerTriangular(prior_chol) ]) kwargs_std = {} kwargs_std['loc'] = reparam_loc kwargs_std['scale'] = reparam_scale kwargs_std['name'] = name shift = x_loc - scale_linop.matvec(reparam_loc) b = tfb.AffineLinearOperator(scale=scale_linop, shift=shift) if 'value' in rv_kwargs: kwargs_std['value'] = b.inverse(rv_kwargs['value']) elif parameterisation_type == 'eigindep': # Combines 'eig' and 'indep' parameterizations, modeling the posterior # as # (V D**(-b) V' + diag(c))^-1 # where VDV' is the eigendecomposition of the prior cov, and b and c # are learned vectors. b, c = [tf.cast(x, cov_dtype) for x in (b, c)] Lambda, Q = eigh_with_safe_gradient(x_cov) Lambda = tf.abs(Lambda) Lambda_hat_b = 1e-6 + tf.pow(Lambda, b) prior = tf.matmul( Q, tf.matmul(tf.linalg.diag(Lambda_hat_b), Q, adjoint_b=True)) prior_chol = tf.linalg.cholesky( prior + 1e-6 * tf.eye(ndims, dtype=prior.dtype)) prior_prec = tf.linalg.cholesky_solve( prior_chol + 1e-6 * tf.eye(ndims, dtype=prior_chol.dtype), tf.eye(ndims, dtype=prior_chol.dtype)) approx_posterior_prec = prior_prec + tf.linalg.diag(c) approx_posterior_prec_chol = tf.linalg.cholesky( approx_posterior_prec) approx_posterior_cov = tf.linalg.cholesky_solve( approx_posterior_prec_chol + 1e-6 * tf.eye(ndims, dtype=approx_posterior_prec_chol.dtype), tf.eye(ndims, dtype=approx_posterior_prec_chol.dtype)) cov_chol = tf.linalg.cholesky( approx_posterior_cov + 1e-6 * tf.eye(ndims, dtype=approx_posterior_cov.dtype)) cov_chol = tf.cast(cov_chol, tf.float32) prior_chol = tf.cast(prior_chol, tf.float32) scale_linop = tf.linalg.LinearOperatorLowerTriangular(cov_chol) reparam_loc = tf.multiply(x_loc, a) reparam_scale = tf.linalg.LinearOperatorComposition([ tf.linalg.LinearOperatorInversion(scale_linop), tf.linalg.LinearOperatorLowerTriangular(prior_chol) ]) kwargs_std = {} kwargs_std['loc'] = reparam_loc kwargs_std['scale'] = reparam_scale kwargs_std['name'] = name shift = x_loc - scale_linop.matvec(reparam_loc) b = tfb.AffineLinearOperator(scale=scale_linop, shift=shift) if 'value' in rv_kwargs: kwargs_std['value'] = b.inverse(rv_kwargs['value']) else: raise Exception('unrecognized reparameterization strategy!') if rv_constructor.__name__.startswith('GaussianProcess'): rv_std = edward2.MultivariateNormalLinearOperator( *rv_args, **kwargs_std) else: rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std) bijectors[name] = b return b.forward(rv_std) else: return interceptable(rv_constructor)(*rv_args, **rv_kwargs)