def testValidateArgs(self): with self.cached_session(): with self.assertRaisesOpError("diagonal part must be non-zero"): scale = distribution_util.make_diag_scale(scale_diag=[[0., 1], [1., 1.]], validate_args=True) scale.to_dense().eval()
def testAssertPositive(self): with self.cached_session(): with self.assertRaisesOpError("diagonal part must be positive"): scale = distribution_util.make_diag_scale(scale_diag=[[-1., 1], [1., 1.]], validate_args=True, assert_positive=True) scale.to_dense().eval()
def _testLegalInputs(self, loc=None, shape_hint=None, scale_params=None): for args in _powerset(scale_params.items()): with self.cached_session(): args = dict(args) scale_args = dict({ "loc": loc, "shape_hint": shape_hint }, **args) expected_scale = _make_diag_scale(**scale_args) if expected_scale is None: # Not enough shape information was specified. with self.assertRaisesRegexp(ValueError, ("is specified.")): scale = distribution_util.make_diag_scale(**scale_args) scale.to_dense().eval() else: scale = distribution_util.make_diag_scale(**scale_args) self.assertAllClose(expected_scale, scale.to_dense().eval())
def __init__(self, loc=None, scale_diag=None, scale_identity_multiplier=None, validate_args=False, allow_nan_stats=True, name="MultivariateNormalDiag"): """Construct Multivariate Normal distribution on `R^k`. The `batch_shape` is the broadcast shape between `loc` and `scale` arguments. The `event_shape` is given by last dimension of the matrix implied by `scale`. The last dimension of `loc` (if provided) must broadcast with this. Recall that `covariance = scale @ scale.T`. A (non-batch) `scale` matrix is: ```none scale = diag(scale_diag + scale_identity_multiplier * ones(k)) ``` where: * `scale_diag.shape = [k]`, and, * `scale_identity_multiplier.shape = []`. Additional leading dimensions (if any) will index batches. If both `scale_diag` and `scale_identity_multiplier` are `None`, then `scale` is the Identity matrix. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. scale_diag: Non-zero, floating-point `Tensor` representing a diagonal matrix added to `scale`. May have shape `[B1, ..., Bb, k]`, `b >= 0`, and characterizes `b`-batches of `k x k` diagonal matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. scale_identity_multiplier: Non-zero, floating-point `Tensor` representing a scaled-identity-matrix added to `scale`. May have shape `[B1, ..., Bb]`, `b >= 0`, and characterizes `b`-batches of scaled `k x k` identity matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: if at most `scale_identity_multiplier` is specified. """ parameters = dict(locals()) with ops.name_scope(name) as name: with ops.name_scope("init", values=[ loc, scale_diag, scale_identity_multiplier ]): # No need to validate_args while making diag_scale. The returned # LinearOperatorDiag has an assert_non_singular method that is called by # the Bijector. scale = distribution_util.make_diag_scale( loc=loc, scale_diag=scale_diag, scale_identity_multiplier=scale_identity_multiplier, validate_args=False, assert_positive=False) super(MultivariateNormalDiag, self).__init__(loc=loc, scale=scale, validate_args=validate_args, allow_nan_stats=allow_nan_stats, name=name) self._parameters = parameters
def __init__(self, loc=None, scale_diag=None, scale_identity_multiplier=None, scale_perturb_factor=None, scale_perturb_diag=None, validate_args=False, allow_nan_stats=True, name="MultivariateNormalDiagPlusLowRank"): """Construct Multivariate Normal distribution on `R^k`. The `batch_shape` is the broadcast shape between `loc` and `scale` arguments. The `event_shape` is given by last dimension of the matrix implied by `scale`. The last dimension of `loc` (if provided) must broadcast with this. Recall that `covariance = scale @ scale.T`. A (non-batch) `scale` matrix is: ```none scale = diag(scale_diag + scale_identity_multiplier ones(k)) + scale_perturb_factor @ diag(scale_perturb_diag) @ scale_perturb_factor.T ``` where: * `scale_diag.shape = [k]`, * `scale_identity_multiplier.shape = []`, * `scale_perturb_factor.shape = [k, r]`, typically `k >> r`, and, * `scale_perturb_diag.shape = [r]`. Additional leading dimensions (if any) will index batches. If both `scale_diag` and `scale_identity_multiplier` are `None`, then `scale` is the Identity matrix. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. scale_diag: Non-zero, floating-point `Tensor` representing a diagonal matrix added to `scale`. May have shape `[B1, ..., Bb, k]`, `b >= 0`, and characterizes `b`-batches of `k x k` diagonal matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. scale_identity_multiplier: Non-zero, floating-point `Tensor` representing a scaled-identity-matrix added to `scale`. May have shape `[B1, ..., Bb]`, `b >= 0`, and characterizes `b`-batches of scaled `k x k` identity matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. scale_perturb_factor: Floating-point `Tensor` representing a rank-`r` perturbation added to `scale`. May have shape `[B1, ..., Bb, k, r]`, `b >= 0`, and characterizes `b`-batches of rank-`r` updates to `scale`. When `None`, no rank-`r` update is added to `scale`. scale_perturb_diag: Floating-point `Tensor` representing a diagonal matrix inside the rank-`r` perturbation added to `scale`. May have shape `[B1, ..., Bb, r]`, `b >= 0`, and characterizes `b`-batches of `r x r` diagonal matrices inside the perturbation added to `scale`. When `None`, an identity matrix is used inside the perturbation. Can only be specified if `scale_perturb_factor` is also specified. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: if at most `scale_identity_multiplier` is specified. """ parameters = dict(locals()) def _convert_to_tensor(x, name): return None if x is None else ops.convert_to_tensor(x, name=name) with ops.name_scope(name) as name: with ops.name_scope("init", values=[ loc, scale_diag, scale_identity_multiplier, scale_perturb_factor, scale_perturb_diag ]): has_low_rank = (scale_perturb_factor is not None or scale_perturb_diag is not None) scale = distribution_util.make_diag_scale( loc=loc, scale_diag=scale_diag, scale_identity_multiplier=scale_identity_multiplier, validate_args=validate_args, assert_positive=has_low_rank) scale_perturb_factor = _convert_to_tensor( scale_perturb_factor, name="scale_perturb_factor") scale_perturb_diag = _convert_to_tensor( scale_perturb_diag, name="scale_perturb_diag") if has_low_rank: scale = linalg.LinearOperatorLowRankUpdate( scale, u=scale_perturb_factor, diag_update=scale_perturb_diag, is_diag_update_positive=scale_perturb_diag is None, is_non_singular= True, # Implied by is_positive_definite=True. is_self_adjoint=True, is_positive_definite=True, is_square=True) super(MultivariateNormalDiagPlusLowRank, self).__init__(loc=loc, scale=scale, validate_args=validate_args, allow_nan_stats=allow_nan_stats, name=name) self._parameters = parameters
def __init__(self, loc=None, scale_diag=None, scale_identity_multiplier=None, skewness=None, tailweight=None, distribution=None, validate_args=False, allow_nan_stats=True, name="MultivariateNormalLinearOperator"): """Construct VectorSinhArcsinhDiag distribution on `R^k`. The arguments `scale_diag` and `scale_identity_multiplier` combine to define the diagonal `scale` referred to in this class docstring: ```none scale = diag(scale_diag + scale_identity_multiplier * ones(k)) ``` The `batch_shape` is the broadcast shape between `loc` and `scale` arguments. The `event_shape` is given by last dimension of the matrix implied by `scale`. The last dimension of `loc` (if provided) must broadcast with this Additional leading dimensions (if any) will index batches. Args: loc: Floating-point `Tensor`. If this is set to `None`, `loc` is implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where `b >= 0` and `k` is the event size. scale_diag: Non-zero, floating-point `Tensor` representing a diagonal matrix added to `scale`. May have shape `[B1, ..., Bb, k]`, `b >= 0`, and characterizes `b`-batches of `k x k` diagonal matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. scale_identity_multiplier: Non-zero, floating-point `Tensor` representing a scale-identity-matrix added to `scale`. May have shape `[B1, ..., Bb]`, `b >= 0`, and characterizes `b`-batches of scale `k x k` identity matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. skewness: Skewness parameter. floating-point `Tensor` with shape broadcastable with `event_shape`. tailweight: Tailweight parameter. floating-point `Tensor` with shape broadcastable with `event_shape`. distribution: `tf.Distribution`-like instance. Distribution from which `k` iid samples are used as input to transformation `F`. Default is `tfp.distributions.Normal(loc=0., scale=1.)`. Must be a scalar-batch, scalar-event distribution. Typically `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is a function of non-trainable parameters. WARNING: If you backprop through a VectorSinhArcsinhDiag sample and `distribution` is not `FULLY_REPARAMETERIZED` yet is a function of trainable variables, then the gradient will be incorrect! validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: if at most `scale_identity_multiplier` is specified. """ parameters = dict(locals()) with ops.name_scope(name, values=[ loc, scale_diag, scale_identity_multiplier, skewness, tailweight ]) as name: loc = ops.convert_to_tensor(loc, name="loc") if loc is not None else loc tailweight = 1. if tailweight is None else tailweight has_default_skewness = skewness is None skewness = 0. if skewness is None else skewness # Recall, with Z a random variable, # Y := loc + C * F(Z), # F(Z) := Sinh( (Arcsinh(Z) + skewness) * tailweight ) # F_0(Z) := Sinh( Arcsinh(Z) * tailweight ) # C := 2 * scale / F_0(2) # Construct shapes and 'scale' out of the scale_* and loc kwargs. # scale_linop is only an intermediary to: # 1. get shapes from looking at loc and the two scale args. # 2. combine scale_diag with scale_identity_multiplier, which gives us # 'scale', which in turn gives us 'C'. scale_linop = distribution_util.make_diag_scale( loc=loc, scale_diag=scale_diag, scale_identity_multiplier=scale_identity_multiplier, validate_args=False, assert_positive=False) batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale( loc, scale_linop) # scale_linop.diag_part() is efficient since it is a diag type linop. scale_diag_part = scale_linop.diag_part() dtype = scale_diag_part.dtype if distribution is None: distribution = normal.Normal(loc=array_ops.zeros([], dtype=dtype), scale=array_ops.ones([], dtype=dtype), allow_nan_stats=allow_nan_stats) else: asserts = distribution_util.maybe_check_scalar_distribution( distribution, dtype, validate_args) if asserts: scale_diag_part = control_flow_ops.with_dependencies( asserts, scale_diag_part) # Make the SAS bijector, 'F'. skewness = ops.convert_to_tensor(skewness, dtype=dtype, name="skewness") tailweight = ops.convert_to_tensor(tailweight, dtype=dtype, name="tailweight") f = bijectors.SinhArcsinh(skewness=skewness, tailweight=tailweight) if has_default_skewness: f_noskew = f else: f_noskew = bijectors.SinhArcsinh( skewness=skewness.dtype.as_numpy_dtype(0.), tailweight=tailweight) # Make the Affine bijector, Z --> loc + C * Z. c = 2 * scale_diag_part / f_noskew.forward( ops.convert_to_tensor(2, dtype=dtype)) affine = bijectors.Affine(shift=loc, scale_diag=c, validate_args=validate_args) bijector = bijectors.Chain([affine, f]) super(VectorSinhArcsinhDiag, self).__init__(distribution=distribution, bijector=bijector, batch_shape=batch_shape, event_shape=event_shape, validate_args=validate_args, name=name) self._parameters = parameters self._loc = loc self._scale = scale_linop self._tailweight = tailweight self._skewness = skewness