def testDiag(self): with self.test_session(): shift = np.array([-1, 0, 1], dtype=np.float32) diag = np.array([[1, 2, 3], [2, 5, 6]], dtype=np.float32) scale = tf.linalg.LinearOperatorDiag(diag, is_non_singular=True) affine = tfb.AffineLinearOperator(shift=shift, scale=scale, validate_args=True) x = np.array([[1, 0, -1], [2, 3, 4]], dtype=np.float32) y = diag * x + shift ildj = -np.sum(np.log(np.abs(diag)), axis=-1) self.assertEqual(affine.name, "affine_linear_operator") self.assertAllClose(y, self.evaluate(affine.forward(x))) self.assertAllClose(x, self.evaluate(affine.inverse(y))) self.assertAllClose( ildj, self.evaluate(affine.inverse_log_det_jacobian(y, event_ndims=1))) self.assertAllClose( self.evaluate( -affine.inverse_log_det_jacobian(y, event_ndims=1)), self.evaluate(affine.forward_log_det_jacobian(x, event_ndims=1)))
def testTriL(self): with self.test_session(): shift = np.array([-1, 0, 1], dtype=np.float32) tril = np.array([[[3, 0, 0], [2, -1, 0], [3, 2, 1]], [[2, 0, 0], [3, -2, 0], [4, 3, 2]]], dtype=np.float32) scale = tf.linalg.LinearOperatorLowerTriangular( tril, is_non_singular=True) affine = tfb.AffineLinearOperator(shift=shift, scale=scale, validate_args=True) x = np.array([[[1, 0, -1], [2, 3, 4]], [[4, 1, -7], [6, 9, 8]]], dtype=np.float32) # If we made the bijector do x*A+b then this would be simplified to: # y = np.matmul(x, tril) + shift. y = np.squeeze(np.matmul(tril, np.expand_dims(x, -1)), -1) + shift ildj = -np.sum( np.log(np.abs(np.diagonal(tril, axis1=-2, axis2=-1)))) self.assertEqual(affine.name, "affine_linear_operator") self.assertAllClose(y, self.evaluate(affine.forward(x))) self.assertAllClose(x, self.evaluate(affine.inverse(y))) self.assertAllClose( ildj, self.evaluate(affine.inverse_log_det_jacobian(y, event_ndims=2))) self.assertAllClose( self.evaluate( -affine.inverse_log_det_jacobian(y, event_ndims=2)), self.evaluate(affine.forward_log_det_jacobian(x, event_ndims=2)))
def testTriLAdjoint(self): shift = np.array([-1, 0, 1], dtype=np.float32) tril = np.array([[[3, 0, 0], [2, -1, 0], [3, 2, 1]], [[2, 0, 0], [3, -2, 0], [4, 3, 2]]], dtype=np.float32) scale = tf.linalg.LinearOperatorLowerTriangular(tril, is_non_singular=True) affine = tfb.AffineLinearOperator(shift=shift, scale=scale, adjoint=True, validate_args=True) x = np.array([[[1, 0, -1], [2, 3, 4]], [[4, 1, -7], [6, 9, 8]]], dtype=np.float32) # If we made the bijector do x*A+b then this would be simplified to: # y = np.matmul(x, tril) + shift. triu = tril.transpose([0, 2, 1]) y = np.matmul(triu, x[..., np.newaxis])[..., 0] + shift ildj = -np.sum(np.log(np.abs(np.diagonal(tril, axis1=-2, axis2=-1)))) self.assertStartsWith(affine.name, "affine_linear_operator") self.assertAllClose(y, self.evaluate(affine.forward(x))) self.assertAllClose(x, self.evaluate(affine.inverse(y))) self.assertAllClose( ildj, self.evaluate(affine.inverse_log_det_jacobian(y, event_ndims=2))) self.assertAllClose( self.evaluate(-affine.inverse_log_det_jacobian(y, event_ndims=2)), self.evaluate(affine.forward_log_det_jacobian(x, event_ndims=2)))
def testMeanShapeOverride(self): shift = np.array([[-1, 0, 1], [-1, -2, -3]], dtype=np.float32) diag = np.array([[1, 2, 3], [2, 3, 2]], dtype=np.float32) fake_mvn = self._cls()(tfd.Normal(loc=0.0, scale=1.0), tfb.AffineLinearOperator( shift, scale=tf.linalg.LinearOperatorDiag( diag, is_non_singular=True), validate_args=True), batch_shape=[2], event_shape=[3], validate_args=True) self.assertAllClose(shift, self.evaluate(fake_mvn.mean()))
def testMean(self): shift = np.array([[-1, 0, 1], [-1, -2, -3]], dtype=np.float32) diag = np.array([[1, 2, 3], [2, 3, 2]], dtype=np.float32) fake_mvn = self._cls()( tfd.MultivariateNormalDiag(loc=tf.zeros_like(shift), scale_diag=tf.ones_like(diag), validate_args=True), tfb.AffineLinearOperator(shift, scale=tf.linalg.LinearOperatorDiag( diag, is_non_singular=True), validate_args=True), validate_args=True) self.assertAllClose(shift, self.evaluate(fake_mvn.mean()))
def testIdentity(self): affine = tfb.AffineLinearOperator(validate_args=True) x = np.array([[1, 0, -1], [2, 3, 4]], dtype=np.float32) y = x ildj = 0. self.assertStartsWith(affine.name, "affine_linear_operator") self.assertAllClose(y, self.evaluate(affine.forward(x))) self.assertAllClose(x, self.evaluate(affine.inverse(y))) self.assertAllClose( ildj, self.evaluate(affine.inverse_log_det_jacobian(y, event_ndims=2))) self.assertAllClose( self.evaluate(-affine.inverse_log_det_jacobian(y, event_ndims=2)), self.evaluate(affine.forward_log_det_jacobian(x, event_ndims=2)))
def testIdentity(self): with self.test_session(): affine = tfb.AffineLinearOperator(validate_args=True) x = np.array([[1, 0, -1], [2, 3, 4]], dtype=np.float32) y = x ildj = 0. self.assertEqual(affine.name, "affine_linear_operator") self.assertAllClose(y, affine.forward(x).eval()) self.assertAllClose(x, affine.inverse(y).eval()) self.assertAllClose( ildj, affine.inverse_log_det_jacobian(y, event_ndims=2).eval()) self.assertAllClose( -affine.inverse_log_det_jacobian(y, event_ndims=2).eval(), affine.forward_log_det_jacobian(x, event_ndims=2).eval())
def testEntropy(self): shift = np.array([[-1, 0, 1], [-1, -2, -3]], dtype=np.float32) diag = np.array([[1, 2, 3], [2, 3, 2]], dtype=np.float32) actual_mvn_entropy = np.concatenate([[ stats.multivariate_normal(shift[i], np.diag(diag[i]**2)).entropy() ] for i in range(len(diag))]) fake_mvn = self._cls()( tfd.MultivariateNormalDiag(loc=tf.zeros_like(shift), scale_diag=tf.ones_like(diag), validate_args=True), tfb.AffineLinearOperator(shift, scale=tf.linalg.LinearOperatorDiag( diag, is_non_singular=True), validate_args=True), validate_args=True) self.assertAllClose(actual_mvn_entropy, self.evaluate(fake_mvn.entropy()))
def __init__(self, loc=None, scale=None, validate_args=False, allow_nan_stats=True, name="VectorExponentialLinearOperator"): """Construct Vector Exponential distribution supported on a subset of `R^k`. The `batch_shape` is the broadcast shape between `loc` and `scale` arguments. The `event_shape` is given by last dimension of the matrix implied by `scale`. The last dimension of `loc` (if provided) must broadcast with this. Recall that `covariance = scale @ scale.T`. Additional leading dimensions (if any) will index batches. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape `[B1, ..., Bb, k, k]`. validate_args: Python `bool`, default `False`. Whether to validate input with asserts. If `validate_args` is `False`, and the inputs are invalid, correct behavior is not guaranteed. allow_nan_stats: Python `bool`, default `True`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any batch member If `True`, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. name: The name to give Ops created by the initializer. Raises: ValueError: if `scale` is unspecified. TypeError: if not `scale.dtype.is_floating` """ parameters = dict(locals()) if scale is None: raise ValueError("Missing required `scale` parameter.") if not scale.dtype.is_floating: raise TypeError( "`scale` parameter must have floating-point dtype.") with tf.name_scope(name, values=[loc] + scale.graph_parents) as name: # Since expand_dims doesn't preserve constant-ness, we obtain the # non-dynamic value if possible. loc = loc if loc is None else tf.convert_to_tensor( loc, name="loc", dtype=scale.dtype) batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, scale) super(VectorExponentialLinearOperator, self).__init__(distribution=exponential.Exponential( rate=tf.ones([], dtype=scale.dtype), allow_nan_stats=allow_nan_stats), bijector=bijectors.AffineLinearOperator( shift=loc, scale=scale, validate_args=validate_args), batch_shape=batch_shape, event_shape=event_shape, validate_args=validate_args, name=name) self._parameters = parameters
def testVariableGradient(self): b = tfb.AffineLinearOperator(shift=tf.Variable(-1.)) with tf.GradientTape() as tape: y = b.forward(.1) self.assertIsNotNone(tape.gradient(y, b.shift))
def __init__(self, mix_loc, temperature, distribution, loc=None, scale=None, quadrature_size=8, quadrature_fn=quadrature_scheme_softmaxnormal_quantiles, validate_args=False, allow_nan_stats=True, name="VectorDiffeomixture"): """Constructs the VectorDiffeomixture on `R^d`. The vector diffeomixture (VDM) approximates the compound distribution ```none p(x) = int p(x | z) p(z) dz, where z is in the K-simplex, and p(x | z) := p(x | loc=sum_k z[k] loc[k], scale=sum_k z[k] scale[k]) ``` Args: mix_loc: `float`-like `Tensor` with shape `[b1, ..., bB, K-1]`. In terms of samples, larger `mix_loc[..., k]` ==> `Z` is more likely to put more weight on its `kth` component. temperature: `float`-like `Tensor`. Broadcastable with `mix_loc`. In terms of samples, smaller `temperature` means one component is more likely to dominate. I.e., smaller `temperature` makes the VDM look more like a standard mixture of `K` components. distribution: `tf.Distribution`-like instance. Distribution from which `d` iid samples are used as input to the selected affine transformation. Must be a scalar-batch, scalar-event distribution. Typically `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is a function of non-trainable parameters. WARNING: If you backprop through a VectorDiffeomixture sample and the `distribution` is not `FULLY_REPARAMETERIZED` yet is a function of trainable variables, then the gradient will be incorrect! loc: Length-`K` list of `float`-type `Tensor`s. The `k`-th element represents the `shift` used for the `k`-th affine transformation. If the `k`-th item is `None`, `loc` is implicitly `0`. When specified, must have shape `[B1, ..., Bb, d]` where `b >= 0` and `d` is the event size. scale: Length-`K` list of `LinearOperator`s. Each should be positive-definite and operate on a `d`-dimensional vector space. The `k`-th element represents the `scale` used for the `k`-th affine transformation. `LinearOperator`s must have shape `[B1, ..., Bb, d, d]`, `b >= 0`, i.e., characterizes `b`-batches of `d x d` matrices quadrature_size: Python `int` scalar representing number of quadrature points. Larger `quadrature_size` means `q_N(x)` better approximates `p(x)`. quadrature_fn: Python callable taking `normal_loc`, `normal_scale`, `quadrature_size`, `validate_args` and returning `tuple(grid, probs)` representing the SoftmaxNormal grid and corresponding normalized weight. normalized) weight. Default value: `quadrature_scheme_softmaxnormal_quantiles`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: if `not scale or len(scale) < 2`. ValueError: if `len(loc) != len(scale)` ValueError: if `quadrature_grid_and_probs is not None` and `len(quadrature_grid_and_probs[0]) != len(quadrature_grid_and_probs[1])` ValueError: if `validate_args` and any not scale.is_positive_definite. TypeError: if any scale.dtype != scale[0].dtype. TypeError: if any loc.dtype != scale[0].dtype. NotImplementedError: if `len(scale) != 2`. ValueError: if `not distribution.is_scalar_batch`. ValueError: if `not distribution.is_scalar_event`. """ parameters = dict(locals()) with tf.name_scope(name, values=[mix_loc, temperature]) as name: if not scale or len(scale) < 2: raise ValueError( "Must specify list (or list-like object) of scale " "LinearOperators, one for each component with " "num_component >= 2.") if loc is None: loc = [None] * len(scale) if len(loc) != len(scale): raise ValueError("loc/scale must be same-length lists " "(or same-length list-like objects).") dtype = scale[0].dtype.base_dtype loc = [ tf.convert_to_tensor(loc_, dtype=dtype, name="loc{}".format(k)) if loc_ is not None else None for k, loc_ in enumerate(loc) ] for k, scale_ in enumerate(scale): if validate_args and not scale_.is_positive_definite: raise ValueError( "scale[{}].is_positive_definite = {} != True".format( k, scale_.is_positive_definite)) if scale_.dtype.base_dtype != dtype: raise TypeError( "dtype mismatch; scale[{}].base_dtype=\"{}\" != \"{}\"" .format(k, scale_.dtype.base_dtype.name, dtype.name)) self._endpoint_affine = [ bijectors.AffineLinearOperator( shift=loc_, scale=scale_, validate_args=validate_args, name="endpoint_affine_{}".format(k)) for k, (loc_, scale_) in enumerate(zip(loc, scale)) ] # TODO(jvdillon): Remove once we support k-mixtures. # We make this assertion here because otherwise `grid` would need to be a # vector not a scalar. if len(scale) != 2: raise NotImplementedError( "Currently only bimixtures are supported; " "len(scale)={} is not 2.".format(len(scale))) mix_loc = tf.convert_to_tensor(mix_loc, dtype=dtype, name="mix_loc") temperature = tf.convert_to_tensor(temperature, dtype=dtype, name="temperature") self._grid, probs = tuple( quadrature_fn(mix_loc / temperature, 1. / temperature, quadrature_size, validate_args)) # Note: by creating the logits as `log(prob)` we ensure that # `self.mixture_distribution.logits` is equivalent to # `math_ops.log(self.mixture_distribution.probs)`. self._mixture_distribution = categorical_lib.Categorical( logits=tf.log(probs), validate_args=validate_args, allow_nan_stats=allow_nan_stats) asserts = distribution_util.maybe_check_scalar_distribution( distribution, dtype, validate_args) if asserts: self._grid = control_flow_ops.with_dependencies( asserts, self._grid) self._distribution = distribution self._interpolated_affine = [ bijectors.AffineLinearOperator( shift=loc_, scale=scale_, validate_args=validate_args, name="interpolated_affine_{}".format(k)) for k, (loc_, scale_) in enumerate( zip(interpolate_loc(self._grid, loc), interpolate_scale(self._grid, scale))) ] [ self._batch_shape_, self._batch_shape_tensor_, self._event_shape_, self._event_shape_tensor_, ] = determine_batch_event_shapes(self._grid, self._endpoint_affine) super(VectorDiffeomixture, self).__init__( dtype=dtype, # We hard-code `FULLY_REPARAMETERIZED` because when # `validate_args=True` we verify that indeed # `distribution.reparameterization_type == FULLY_REPARAMETERIZED`. A # distribution which is a function of only non-trainable parameters # also implies we can use `FULLY_REPARAMETERIZED`. However, we cannot # easily test for that possibility thus we use `validate_args=False` # as a "back-door" to allow users a way to use non # `FULLY_REPARAMETERIZED` distribution. In such cases IT IS THE USERS # RESPONSIBILITY to verify that the base distribution is a function of # non-trainable parameters. reparameterization_type=tf.distributions.FULLY_REPARAMETERIZED, validate_args=validate_args, allow_nan_stats=allow_nan_stats, parameters=parameters, graph_parents=( distribution._graph_parents # pylint: disable=protected-access + [loc_ for loc_ in loc if loc_ is not None] + [p for scale_ in scale for p in scale_.graph_parents]), name=name)