def test_dynamic_loc_static_scale(self): loc = tf1.placeholder_with_default(np.zeros([2, 3]), shape=None) diag = tf.ones([5, 2, 3]) batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, tf.linalg.LinearOperatorDiag(diag)) if not tf.executing_eagerly(): # batch_shape depends on both args, and so is dynamic. Since loc did not # have static shape, we inferred event shape entirely from scale, and this # is available statically. self.assertIsNone(tf.get_static_value(batch_shape)) self.assertAllEqual([3], tf.get_static_value(event_shape)) batch_shape_, event_shape_ = self.evaluate([batch_shape, event_shape]) self.assertAllEqual([5, 2], batch_shape_) self.assertAllEqual([3], event_shape_)
def __init__(self, loc=None, scale=None, validate_args=False, allow_nan_stats=True, name="VectorLaplaceLinearOperator"): """Construct Vector Laplace distribution on `R^k`. The `batch_shape` is the broadcast shape between `loc` and `scale` arguments. The `event_shape` is given by last dimension of the matrix implied by `scale`. The last dimension of `loc` (if provided) must broadcast with this. Recall that `covariance = 2 * scale @ scale.T`. Additional leading dimensions (if any) will index batches. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape `[B1, ..., Bb, k, k]`. validate_args: Python `bool`, default `False`. Whether to validate input with asserts. If `validate_args` is `False`, and the inputs are invalid, correct behavior is not guaranteed. allow_nan_stats: Python `bool`, default `True`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any batch member If `True`, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. name: The name to give Ops created by the initializer. Raises: ValueError: if `scale` is unspecified. TypeError: if not `scale.dtype.is_floating` """ parameters = dict(locals()) if scale is None: raise ValueError("Missing required `scale` parameter.") if not dtype_util.is_floating(scale.dtype): raise TypeError("`scale` parameter must have floating-point dtype.") with tf.name_scope(name): # Since expand_dims doesn't preserve constant-ness, we obtain the # non-dynamic value if possible. loc = loc if loc is None else tf.convert_to_tensor( loc, name="loc", dtype=scale.dtype) batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, scale) super(VectorLaplaceLinearOperator, self).__init__( distribution=laplace.Laplace( loc=tf.zeros([], dtype=scale.dtype), scale=tf.ones([], dtype=scale.dtype)), bijector=affine_linear_operator_bijector.AffineLinearOperator( shift=loc, scale=scale, validate_args=validate_args), batch_shape=batch_shape, event_shape=event_shape, validate_args=validate_args, name=name) self._parameters = parameters
def __init__(self, loc=None, scale_diag=None, scale_identity_multiplier=None, skewness=None, tailweight=None, distribution=None, validate_args=False, allow_nan_stats=True, name="MultivariateNormalLinearOperator"): """Construct VectorSinhArcsinhDiag distribution on `R^k`. The arguments `scale_diag` and `scale_identity_multiplier` combine to define the diagonal `scale` referred to in this class docstring: ```none scale = diag(scale_diag + scale_identity_multiplier * ones(k)) ``` The `batch_shape` is the broadcast shape between `loc` and `scale` arguments. The `event_shape` is given by last dimension of the matrix implied by `scale`. The last dimension of `loc` (if provided) must broadcast with this Additional leading dimensions (if any) will index batches. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. scale_diag: Non-zero, floating-point `Tensor` representing a diagonal matrix added to `scale`. May have shape `[B1, ..., Bb, k]`, `b >= 0`, and characterizes `b`-batches of `k x k` diagonal matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. scale_identity_multiplier: Non-zero, floating-point `Tensor` representing a scale-identity-matrix added to `scale`. May have shape `[B1, ..., Bb]`, `b >= 0`, and characterizes `b`-batches of scale `k x k` identity matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. skewness: Skewness parameter. floating-point `Tensor` with shape broadcastable with `event_shape`. tailweight: Tailweight parameter. floating-point `Tensor` with shape broadcastable with `event_shape`. distribution: `tf.Distribution`-like instance. Distribution from which `k` iid samples are used as input to transformation `F`. Default is `tf.distributions.Normal(loc=0., scale=1.)`. Must be a scalar-batch, scalar-event distribution. Typically `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is a function of non-trainable parameters. WARNING: If you backprop through a VectorSinhArcsinhDiag sample and `distribution` is not `FULLY_REPARAMETERIZED` yet is a function of trainable variables, then the gradient will be incorrect! validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: if at most `scale_identity_multiplier` is specified. """ parameters = dict(locals()) with tf.name_scope(name, values=[ loc, scale_diag, scale_identity_multiplier, skewness, tailweight ]) as name: loc = tf.convert_to_tensor(loc, name="loc") if loc is not None else loc tailweight = 1. if tailweight is None else tailweight has_default_skewness = skewness is None skewness = 0. if skewness is None else skewness # Recall, with Z a random variable, # Y := loc + C * F(Z), # F(Z) := Sinh( (Arcsinh(Z) + skewness) * tailweight ) # F_0(Z) := Sinh( Arcsinh(Z) * tailweight ) # C := 2 * scale / F_0(2) # Construct shapes and 'scale' out of the scale_* and loc kwargs. # scale_linop is only an intermediary to: # 1. get shapes from looking at loc and the two scale args. # 2. combine scale_diag with scale_identity_multiplier, which gives us # 'scale', which in turn gives us 'C'. scale_linop = distribution_util.make_diag_scale( loc=loc, scale_diag=scale_diag, scale_identity_multiplier=scale_identity_multiplier, validate_args=False, assert_positive=False) batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, scale_linop) # scale_linop.diag_part() is efficient since it is a diag type linop. scale_diag_part = scale_linop.diag_part() dtype = scale_diag_part.dtype if distribution is None: distribution = tf.distributions.Normal( loc=tf.zeros([], dtype=dtype), scale=tf.ones([], dtype=dtype), allow_nan_stats=allow_nan_stats) else: asserts = distribution_util.maybe_check_scalar_distribution( distribution, dtype, validate_args) if asserts: scale_diag_part = control_flow_ops.with_dependencies( asserts, scale_diag_part) # Make the SAS bijector, 'F'. skewness = tf.convert_to_tensor(skewness, dtype=dtype, name="skewness") tailweight = tf.convert_to_tensor(tailweight, dtype=dtype, name="tailweight") f = bijectors.SinhArcsinh(skewness=skewness, tailweight=tailweight) if has_default_skewness: f_noskew = f else: f_noskew = bijectors.SinhArcsinh( skewness=skewness.dtype.as_numpy_dtype(0.), tailweight=tailweight) # Make the Affine bijector, Z --> loc + C * Z. c = 2 * scale_diag_part / f_noskew.forward( tf.convert_to_tensor(2, dtype=dtype)) affine = bijectors.Affine(shift=loc, scale_diag=c, validate_args=validate_args) bijector = bijectors.Chain([affine, f]) super(VectorSinhArcsinhDiag, self).__init__(distribution=distribution, bijector=bijector, batch_shape=batch_shape, event_shape=event_shape, validate_args=validate_args, name=name) self._parameters = parameters self._loc = loc self._scale = scale_linop self._tailweight = tailweight self._skewness = skewness
def __init__(self, loc=None, scale=None, validate_args=False, allow_nan_stats=True, name="VectorLaplaceLinearOperator"): """Construct Vector Laplace distribution on `R^k`. The `batch_shape` is the broadcast shape between `loc` and `scale` arguments. The `event_shape` is given by last dimension of the matrix implied by `scale`. The last dimension of `loc` (if provided) must broadcast with this. Recall that `covariance = 2 * scale @ scale.T`. Additional leading dimensions (if any) will index batches. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape `[B1, ..., Bb, k, k]`. validate_args: Python `bool`, default `False`. Whether to validate input with asserts. If `validate_args` is `False`, and the inputs are invalid, correct behavior is not guaranteed. allow_nan_stats: Python `bool`, default `True`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any batch member If `True`, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. name: The name to give Ops created by the initializer. Raises: ValueError: if `scale` is unspecified. TypeError: if not `scale.dtype.is_floating` """ parameters = dict(locals()) if scale is None: raise ValueError("Missing required `scale` parameter.") if not scale.dtype.is_floating: raise TypeError("`scale` parameter must have floating-point dtype.") with tf.name_scope(name, values=[loc] + scale.graph_parents): # Since expand_dims doesn't preserve constant-ness, we obtain the # non-dynamic value if possible. loc = tf.convert_to_tensor(loc, name="loc") if loc is not None else loc batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, scale) super(VectorLaplaceLinearOperator, self).__init__( distribution=laplace.Laplace( loc=tf.zeros([], dtype=scale.dtype), scale=tf.ones([], dtype=scale.dtype)), bijector=bijectors.AffineLinearOperator( shift=loc, scale=scale, validate_args=validate_args), batch_shape=batch_shape, event_shape=event_shape, validate_args=validate_args, name=name) self._parameters = parameters
def __init__(self, df, loc=None, scale_identity_multiplier=None, scale_diag=None, scale_tril=None, scale_perturb_factor=None, scale_perturb_diag=None, validate_args=False, allow_nan_stats=True, name="VectorStudentT"): """Instantiates the vector Student's t-distributions on `R^k`. The `batch_shape` is the broadcast between `df.batch_shape` and `Affine.batch_shape` where `Affine` is constructed from `loc` and `scale_*` arguments. The `event_shape` is the event shape of `Affine.event_shape`. Args: df: Floating-point `Tensor`. The degrees of freedom of the distribution(s). `df` must contain only positive values. Must be scalar if `loc`, `scale_*` imply non-scalar batch_shape or must have the same `batch_shape` implied by `loc`, `scale_*`. loc: Floating-point `Tensor`. If this is set to `None`, no `loc` is applied. scale_identity_multiplier: floating point rank 0 `Tensor` representing a scaling done to the identity matrix. When `scale_identity_multiplier = scale_diag=scale_tril = None` then `scale += IdentityMatrix`. Otherwise no scaled-identity-matrix is added to `scale`. scale_diag: Floating-point `Tensor` representing the diagonal matrix. `scale_diag` has shape [N1, N2, ..., k], which represents a k x k diagonal matrix. When `None` no diagonal term is added to `scale`. scale_tril: Floating-point `Tensor` representing the diagonal matrix. `scale_diag` has shape [N1, N2, ..., k, k], which represents a k x k lower triangular matrix. When `None` no `scale_tril` term is added to `scale`. The upper triangular elements above the diagonal are ignored. scale_perturb_factor: Floating-point `Tensor` representing factor matrix with last two dimensions of shape `(k, r)`. When `None`, no rank-r update is added to `scale`. scale_perturb_diag: Floating-point `Tensor` representing the diagonal matrix. `scale_perturb_diag` has shape [N1, N2, ..., r], which represents an r x r Diagonal matrix. When `None` low rank updates will take the form `scale_perturb_factor * scale_perturb_factor.T`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ parameters = dict(locals()) graph_parents = [ df, loc, scale_identity_multiplier, scale_diag, scale_tril, scale_perturb_factor, scale_perturb_diag ] with tf.name_scope(name) as name: with tf.name_scope("init"): dtype = dtype_util.common_dtype(graph_parents, tf.float32) df = tf.convert_to_tensor(value=df, name="df", dtype=dtype) # The shape of the _VectorStudentT distribution is governed by the # relationship between df.batch_shape and affine.batch_shape. In # pseudocode the basic procedure is: # if df.batch_shape is scalar: # if affine.batch_shape is not scalar: # # broadcast distribution.sample so # # it has affine.batch_shape. # self.batch_shape = affine.batch_shape # else: # if affine.batch_shape is scalar: # # let affine broadcasting do its thing. # self.batch_shape = df.batch_shape # All of the above magic is actually handled by TransformedDistribution. # Here we really only need to collect the affine.batch_shape and decide # what we're going to pass in to TransformedDistribution's # (override) batch_shape arg. affine = affine_bijector.Affine( shift=loc, scale_identity_multiplier=scale_identity_multiplier, scale_diag=scale_diag, scale_tril=scale_tril, scale_perturb_factor=scale_perturb_factor, scale_perturb_diag=scale_perturb_diag, validate_args=validate_args, dtype=dtype) distribution = student_t.StudentT( df=df, loc=tf.zeros([], dtype=affine.dtype), scale=tf.ones([], dtype=affine.dtype)) batch_shape, override_event_shape = ( distribution_util.shapes_from_loc_and_scale( affine.shift, affine.scale)) override_batch_shape = distribution_util.pick_vector( distribution.is_scalar_batch(), batch_shape, tf.constant([], dtype=tf.int32)) super(_VectorStudentT, self).__init__(distribution=distribution, bijector=affine, batch_shape=override_batch_shape, event_shape=override_event_shape, validate_args=validate_args, name=name) self._parameters = parameters
def __init__(self, loc=None, scale=None, validate_args=False, allow_nan_stats=True, experimental_use_kahan_sum=False, name='MultivariateNormalLinearOperator'): """Construct Multivariate Normal distribution on `R^k`. The `batch_shape` is the broadcast shape between `loc` and `scale` arguments. The `event_shape` is given by last dimension of the matrix implied by `scale`. The last dimension of `loc` (if provided) must broadcast with this. Recall that `covariance = scale @ scale.T`. Additional leading dimensions (if any) will index batches. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape `[B1, ..., Bb, k, k]`. validate_args: Python `bool`, default `False`. Whether to validate input with asserts. If `validate_args` is `False`, and the inputs are invalid, correct behavior is not guaranteed. allow_nan_stats: Python `bool`, default `True`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any batch member If `True`, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. experimental_use_kahan_sum: Python `bool`. When `True`, we use Kahan summation to aggregate independent underlying log_prob values. For best results, Kahan summation should also be applied when computing the log-determinant of the `LinearOperator` representing the scale matrix. Kahan summation improves against the precision of a naive float32 sum. This can be noticeable in particular for large dimensions in float32. See CPU caveat on `tfp.math.reduce_kahan_sum`. name: The name to give Ops created by the initializer. Raises: ValueError: if `scale` is unspecified. TypeError: if not `scale.dtype.is_floating` """ parameters = dict(locals()) self._experimental_use_kahan_sum = experimental_use_kahan_sum if scale is None: raise ValueError('Missing required `scale` parameter.') if not dtype_util.is_floating(scale.dtype): raise TypeError('`scale` parameter must have floating-point dtype.') with tf.name_scope(name) as name: dtype = dtype_util.common_dtype([loc, scale], dtype_hint=tf.float32) # Since expand_dims doesn't preserve constant-ness, we obtain the # non-dynamic value if possible. loc = tensor_util.convert_nonref_to_tensor( loc, dtype=dtype, name='loc') batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, scale) self._loc = loc self._scale = scale bijector = scale_matvec_linear_operator.ScaleMatvecLinearOperator( scale, validate_args=validate_args) if loc is not None: bijector = shift_bijector.Shift( shift=loc, validate_args=validate_args)(bijector) super(MultivariateNormalLinearOperator, self).__init__( # TODO(b/137665504): Use batch-adding meta-distribution to set the batch # shape instead of tf.zeros. # We use `Sample` instead of `Independent` because `Independent` # requires concatenating `batch_shape` and `event_shape`, which loses # static `batch_shape` information when `event_shape` is not statically # known. distribution=sample.Sample( normal.Normal( loc=tf.zeros(batch_shape, dtype=dtype), scale=tf.ones([], dtype=dtype)), event_shape, experimental_use_kahan_sum=experimental_use_kahan_sum), bijector=bijector, validate_args=validate_args, name=name) self._parameters = parameters
def __init__(self, loc=None, precision_factor=None, precision=None, validate_args=False, allow_nan_stats=True, name='MultivariateNormalPrecisionFactorLinearOperator'): """Initialize distribution. Precision is the inverse of the covariance matrix, and `precision_factor @ precision_factor.T = precision`. The `batch_shape` of this distribution is the broadcast of `loc.shape[:-1]` and `precision_factor.batch_shape`. The `event_shape` of this distribution is determined by `loc.shape[-1:]`, OR `precision_factor.shape[-1:]`, which must match. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. precision_factor: Required nonsingular `tf.linalg.LinearOperator` instance with same `dtype` and shape compatible with `loc`. precision: Optional square `tf.linalg.LinearOperator` instance with same `dtype` and shape compatible with `loc` and `precision_factor`. validate_args: Python `bool`, default `False`. Whether to validate input with asserts. If `validate_args` is `False`, and the inputs are invalid, correct behavior is not guaranteed. allow_nan_stats: Python `bool`, default `True`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any batch member If `True`, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. name: The name to give Ops created by the initializer. """ parameters = dict(locals()) with tf.name_scope(name) as name: if precision_factor is None: raise ValueError( 'Argument `precision_factor` must be provided. Found `None`') dtype = dtype_util.common_dtype([loc, precision_factor, precision], dtype_hint=tf.float32) loc = tensor_util.convert_nonref_to_tensor(loc, dtype=dtype, name='loc') self._loc = loc self._precision_factor = precision_factor self._precision = precision batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, precision_factor) # Proof of factors (used throughout code): # Let, # C = covariance, # P = inv(covariance) = precision # P = F @ F.T (so F is the `precision_factor`). # # Then, the log prob term is # x.T @ inv(C) @ x # = x.T @ P @ x # = x.T @ F @ F.T @ x # = || F.T @ x ||**2 # notice it involves F.T, which is why we set adjoint=True in various # places. # # Also, if w ~ Normal(0, I), then we can sample by setting # x = inv(F.T) @ w + loc, # since then # E[(x - loc) @ (x - loc).T] # = E[inv(F.T) @ w @ w.T @ inv(F)] # = inv(F.T) @ inv(F) # = inv(F @ F.T) # = inv(P) # = C. if precision is not None: precision.shape.assert_is_compatible_with(precision_factor.shape) bijector = invert.Invert( scale_matvec_linear_operator.ScaleMatvecLinearOperator( scale=precision_factor, validate_args=validate_args, adjoint=True) ) if loc is not None: shift = shift_bijector.Shift(shift=loc, validate_args=validate_args) bijector = shift(bijector) super(MultivariateNormalPrecisionFactorLinearOperator, self).__init__( distribution=mvn_diag.MultivariateNormalDiag( loc=tf.zeros( ps.concat([batch_shape, event_shape], axis=0), dtype=dtype)), bijector=bijector, validate_args=validate_args, name=name) self._parameters = parameters
def __init__(self, loc=None, scale=None, validate_args=False, allow_nan_stats=True, name='VectorExponentialLinearOperator'): """Construct Vector Exponential distribution supported on a subset of `R^k`. The `batch_shape` is the broadcast shape between `loc` and `scale` arguments. The `event_shape` is given by last dimension of the matrix implied by `scale`. The last dimension of `loc` (if provided) must broadcast with this. Recall that `covariance = scale @ scale.T`. Additional leading dimensions (if any) will index batches. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape `[B1, ..., Bb, k, k]`. validate_args: Python `bool`, default `False`. Whether to validate input with asserts. If `validate_args` is `False`, and the inputs are invalid, correct behavior is not guaranteed. allow_nan_stats: Python `bool`, default `True`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any batch member If `True`, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. name: The name to give Ops created by the initializer. Raises: ValueError: if `scale` is unspecified. TypeError: if not `scale.dtype.is_floating` """ parameters = dict(locals()) if loc is None: loc = 0.0 # Implicit value for backwards compatibility. if scale is None: raise ValueError('Missing required `scale` parameter.') if not dtype_util.is_floating(scale.dtype): raise TypeError( '`scale` parameter must have floating-point dtype.') with tf.name_scope(name) as name: # Since expand_dims doesn't preserve constant-ness, we obtain the # non-dynamic value if possible. loc = loc if loc is None else tf.convert_to_tensor( loc, name='loc', dtype=scale.dtype) batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, scale) self._loc = loc self._scale = scale super(VectorExponentialLinearOperator, self).__init__( # TODO(b/137665504): Use batch-adding meta-distribution to set the # batch shape instead of tf.ones. # We use `Sample` instead of `Independent` because `Independent` # requires concatenating `batch_shape` and `event_shape`, which loses # static `batch_shape` information when `event_shape` is not # statically known. distribution=sample.Sample( exponential.Exponential(rate=tf.ones(batch_shape, dtype=scale.dtype), allow_nan_stats=allow_nan_stats), event_shape), bijector=shift_bijector.Shift(shift=loc)( scale_matvec_linear_operator.ScaleMatvecLinearOperator( scale=scale, validate_args=validate_args)), validate_args=validate_args, name=name) self._parameters = parameters
def test_static_loc_static_scale_non_matching_event_size_raises(self): loc = tf.zeros([2, 4]) diag = tf.ones([5, 1, 3]) with self.assertRaisesRegexp(ValueError, 'could not be broadcast'): distribution_util.shapes_from_loc_and_scale( loc, tf.linalg.LinearOperatorDiag(diag))
def __init__( self, loc=None, cov_operator=None, validate_args=False, allow_nan_stats=True, name='MultivariateNormalLowRankUpdateLinearOperatorCovariance'): """Construct Multivariate Normal distribution on `R^k`. The `batch_shape` is the broadcast shape between `loc` and `cov_operator` arguments. The `event_shape` is given by last dimension of the matrix implied by `cov_operator`. The last dimension of `loc` (if provided) must broadcast with this. Additional leading dimensions (if any) will index batches. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. cov_operator: Instance of `LinearOperatorLowRankUpdate` with same `dtype` as `loc` and shape `[B1, ..., Bb, k, k]`. Must have structure `A + UU^T` or `A + UDU^T`, where `A` and `D` (if provided) are self-adjoint and positive definite. validate_args: Python `bool`, default `False`. Whether to validate input with asserts. If `validate_args` is `False`, and the inputs are invalid, correct behavior is not guaranteed. allow_nan_stats: Python `bool`, default `True`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any batch member. If `True`, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. name: The name to give Ops created by the initializer. Raises: ValueError: if `cov_operator` is unspecified. ValueError: if `cov_operator` does not specify the self-adjoint positive definite conditions explained above. TypeError: if not `cov_operator.dtype.is_floating` """ parameters = dict(locals()) if cov_operator is None: raise ValueError('Missing required `cov_operator` parameter.') if not dtype_util.is_floating(cov_operator.dtype): raise TypeError( '`cov_operator` parameter must have floating-point dtype.') if not isinstance(cov_operator, tf.linalg.LinearOperatorLowRankUpdate): raise TypeError( '`cov_operator` must be a LinearOperatorLowRankUpdate. ' 'Found {}'.format(type(cov_operator))) if cov_operator.u is not cov_operator.v: raise ValueError( 'The `U` and `V` (typically low rank) matrices of ' '`cov_operator` must be the same, but were not.') # For cov_operator, raise if the user explicitly set these to False, # or if False was inferred by the LinearOperator. The default value is None, # which will not trigger these raises. # pylint: disable=g-bool-id-comparison if cov_operator.is_self_adjoint is False: raise ValueError('`cov_operator` must be self-adjoint.') if cov_operator.is_positive_definite is False: raise ValueError('`cov_operator` must be positive definite.') # pylint: enable=g-bool-id-comparison # For the base_operator, we require the user to explicity set # is_self_adjoint and is_positive_definite. if not cov_operator.base_operator.is_self_adjoint: raise ValueError( 'The `base_operator` of `cov_operator` must be self-adjoint. ' 'You may have to set the `is_self_adjoint` initialization hint.' ) if not cov_operator.base_operator.is_positive_definite: raise ValueError( 'The `base_operator` of `cov_operator` must be positive ' 'definite. You may have to set the `is_positive_definite` ' 'initialization hint.') with tf.name_scope(name) as name: dtype = dtype_util.common_dtype([loc, cov_operator], dtype_hint=tf.float32) if loc is not None: loc = tensor_util.convert_nonref_to_tensor(loc, dtype=dtype, name='loc') # Get dynamic shapes (for self.*shape_tensor methods). # shapes_from_loc_and_scale tries to return TensorShapes, but may return # tensors. So we can only use it for the *shape_tensor methods. # It is useful though, since it does lots of shape checks, and is a # well-tested function. batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, cov_operator) self._batch_shape_tensor_value = ps.convert_to_shape_tensor( batch_shape, name='batch_shape') self._event_shape_tensor_value = ps.convert_to_shape_tensor( event_shape, name='event_shape') # Get static shapes (for self.*shape methods). self._batch_shape_value = cov_operator.batch_shape if loc is not None: self._batch_shape_value = tf.broadcast_static_shape( self._batch_shape_value, loc.shape[:-1]) self._event_shape_value = cov_operator.shape[-1:] if loc is not None: self._event_shape_value = tf.broadcast_static_shape( self._event_shape_value, loc.shape[-1:]) self._loc = loc self._cov_operator = cov_operator super(MultivariateNormalLowRankUpdateLinearOperatorCovariance, self).__init__(dtype=dtype, reparameterization_type=reparameterization. FULLY_REPARAMETERIZED, validate_args=validate_args, allow_nan_stats=allow_nan_stats, parameters=parameters, name=name) self._parameters = parameters
def __init__(self, df, loc=None, scale_identity_multiplier=None, scale_diag=None, scale_tril=None, scale_perturb_factor=None, scale_perturb_diag=None, validate_args=False, allow_nan_stats=True, name="VectorStudentT"): """Instantiates the vector Student's t-distributions on `R^k`. The `batch_shape` is the broadcast between `df.batch_shape` and `Affine.batch_shape` where `Affine` is constructed from `loc` and `scale_*` arguments. The `event_shape` is the event shape of `Affine.event_shape`. Args: df: Floating-point `Tensor`. The degrees of freedom of the distribution(s). `df` must contain only positive values. Must be scalar if `loc`, `scale_*` imply non-scalar batch_shape or must have the same `batch_shape` implied by `loc`, `scale_*`. loc: Floating-point `Tensor`. If this is set to `None`, no `loc` is applied. scale_identity_multiplier: floating point rank 0 `Tensor` representing a scaling done to the identity matrix. When `scale_identity_multiplier = scale_diag=scale_tril = None` then `scale += IdentityMatrix`. Otherwise no scaled-identity-matrix is added to `scale`. scale_diag: Floating-point `Tensor` representing the diagonal matrix. `scale_diag` has shape [N1, N2, ..., k], which represents a k x k diagonal matrix. When `None` no diagonal term is added to `scale`. scale_tril: Floating-point `Tensor` representing the diagonal matrix. `scale_diag` has shape [N1, N2, ..., k, k], which represents a k x k lower triangular matrix. When `None` no `scale_tril` term is added to `scale`. The upper triangular elements above the diagonal are ignored. scale_perturb_factor: Floating-point `Tensor` representing factor matrix with last two dimensions of shape `(k, r)`. When `None`, no rank-r update is added to `scale`. scale_perturb_diag: Floating-point `Tensor` representing the diagonal matrix. `scale_perturb_diag` has shape [N1, N2, ..., r], which represents an r x r Diagonal matrix. When `None` low rank updates will take the form `scale_perturb_factor * scale_perturb_factor.T`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ parameters = dict(locals()) graph_parents = [df, loc, scale_identity_multiplier, scale_diag, scale_tril, scale_perturb_factor, scale_perturb_diag] with tf.name_scope(name) as name: with tf.name_scope("init", values=graph_parents): # The shape of the _VectorStudentT distribution is governed by the # relationship between df.batch_shape and affine.batch_shape. In # pseudocode the basic procedure is: # if df.batch_shape is scalar: # if affine.batch_shape is not scalar: # # broadcast distribution.sample so # # it has affine.batch_shape. # self.batch_shape = affine.batch_shape # else: # if affine.batch_shape is scalar: # # let affine broadcasting do its thing. # self.batch_shape = df.batch_shape # All of the above magic is actually handled by TransformedDistribution. # Here we really only need to collect the affine.batch_shape and decide # what we're going to pass in to TransformedDistribution's # (override) batch_shape arg. affine = bijectors.Affine( shift=loc, scale_identity_multiplier=scale_identity_multiplier, scale_diag=scale_diag, scale_tril=scale_tril, scale_perturb_factor=scale_perturb_factor, scale_perturb_diag=scale_perturb_diag, validate_args=validate_args) distribution = student_t.StudentT( df=df, loc=tf.zeros([], dtype=affine.dtype), scale=tf.ones([], dtype=affine.dtype)) batch_shape, override_event_shape = ( distribution_util.shapes_from_loc_and_scale( affine.shift, affine.scale)) override_batch_shape = distribution_util.pick_vector( distribution.is_scalar_batch(), batch_shape, tf.constant([], dtype=tf.int32)) super(_VectorStudentT, self).__init__( distribution=distribution, bijector=affine, batch_shape=override_batch_shape, event_shape=override_event_shape, validate_args=validate_args, name=name) self._parameters = parameters
def __init__(self, loc=None, scale_diag=None, scale_identity_multiplier=None, skewness=None, tailweight=None, distribution=None, validate_args=False, allow_nan_stats=True, name="MultivariateNormalLinearOperator"): """Construct VectorSinhArcsinhDiag distribution on `R^k`. The arguments `scale_diag` and `scale_identity_multiplier` combine to define the diagonal `scale` referred to in this class docstring: ```none scale = diag(scale_diag + scale_identity_multiplier * ones(k)) ``` The `batch_shape` is the broadcast shape between `loc` and `scale` arguments. The `event_shape` is given by last dimension of the matrix implied by `scale`. The last dimension of `loc` (if provided) must broadcast with this Additional leading dimensions (if any) will index batches. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. scale_diag: Non-zero, floating-point `Tensor` representing a diagonal matrix added to `scale`. May have shape `[B1, ..., Bb, k]`, `b >= 0`, and characterizes `b`-batches of `k x k` diagonal matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. scale_identity_multiplier: Non-zero, floating-point `Tensor` representing a scale-identity-matrix added to `scale`. May have shape `[B1, ..., Bb]`, `b >= 0`, and characterizes `b`-batches of scale `k x k` identity matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. skewness: Skewness parameter. floating-point `Tensor` with shape broadcastable with `event_shape`. tailweight: Tailweight parameter. floating-point `Tensor` with shape broadcastable with `event_shape`. distribution: `tf.Distribution`-like instance. Distribution from which `k` iid samples are used as input to transformation `F`. Default is `tfd.Normal(loc=0., scale=1.)`. Must be a scalar-batch, scalar-event distribution. Typically `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is a function of non-trainable parameters. WARNING: If you backprop through a VectorSinhArcsinhDiag sample and `distribution` is not `FULLY_REPARAMETERIZED` yet is a function of trainable variables, then the gradient will be incorrect! validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: if at most `scale_identity_multiplier` is specified. """ parameters = dict(locals()) with tf.name_scope( name, values=[ loc, scale_diag, scale_identity_multiplier, skewness, tailweight ]) as name: dtype = dtype_util.common_dtype( [loc, scale_diag, scale_identity_multiplier, skewness, tailweight], tf.float32) loc = loc if loc is None else tf.convert_to_tensor( loc, name="loc", dtype=dtype) tailweight = 1. if tailweight is None else tailweight has_default_skewness = skewness is None skewness = 0. if skewness is None else skewness # Recall, with Z a random variable, # Y := loc + C * F(Z), # F(Z) := Sinh( (Arcsinh(Z) + skewness) * tailweight ) # F_0(Z) := Sinh( Arcsinh(Z) * tailweight ) # C := 2 * scale / F_0(2) # Construct shapes and 'scale' out of the scale_* and loc kwargs. # scale_linop is only an intermediary to: # 1. get shapes from looking at loc and the two scale args. # 2. combine scale_diag with scale_identity_multiplier, which gives us # 'scale', which in turn gives us 'C'. scale_linop = distribution_util.make_diag_scale( loc=loc, scale_diag=scale_diag, scale_identity_multiplier=scale_identity_multiplier, validate_args=False, assert_positive=False, dtype=dtype) batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, scale_linop) # scale_linop.diag_part() is efficient since it is a diag type linop. scale_diag_part = scale_linop.diag_part() dtype = scale_diag_part.dtype if distribution is None: distribution = normal.Normal( loc=tf.zeros([], dtype=dtype), scale=tf.ones([], dtype=dtype), allow_nan_stats=allow_nan_stats) else: asserts = distribution_util.maybe_check_scalar_distribution( distribution, dtype, validate_args) if asserts: scale_diag_part = control_flow_ops.with_dependencies( asserts, scale_diag_part) # Make the SAS bijector, 'F'. skewness = tf.convert_to_tensor(skewness, dtype=dtype, name="skewness") tailweight = tf.convert_to_tensor( tailweight, dtype=dtype, name="tailweight") f = sinh_arcsinh_bijector.SinhArcsinh( skewness=skewness, tailweight=tailweight) if has_default_skewness: f_noskew = f else: f_noskew = sinh_arcsinh_bijector.SinhArcsinh( skewness=skewness.dtype.as_numpy_dtype(0.), tailweight=tailweight) # Make the Affine bijector, Z --> loc + C * Z. c = 2 * scale_diag_part / f_noskew.forward( tf.convert_to_tensor(2, dtype=dtype)) affine = affine_bijector.Affine( shift=loc, scale_diag=c, validate_args=validate_args) bijector = chain_bijector.Chain([affine, f]) super(VectorSinhArcsinhDiag, self).__init__( distribution=distribution, bijector=bijector, batch_shape=batch_shape, event_shape=event_shape, validate_args=validate_args, name=name) self._parameters = parameters self._loc = loc self._scale = scale_linop self._tailweight = tailweight self._skewness = skewness
def test_static_loc_static_scale_non_matching_event_size_raises(self): loc = tf.constant(np.zeros((2, 4))) scale = tf.linalg.LinearOperatorDiag(np.ones((5, 1, 3))) with self.assertRaisesRegexp(ValueError, "could not be broadcast"): distribution_util.shapes_from_loc_and_scale(loc, scale)