Beispiel #1
0
    def test_dynamic_loc_static_scale(self):
        loc = tf1.placeholder_with_default(np.zeros([2, 3]), shape=None)
        diag = tf.ones([5, 2, 3])
        batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale(
            loc, tf.linalg.LinearOperatorDiag(diag))

        if not tf.executing_eagerly():
            # batch_shape depends on both args, and so is dynamic.  Since loc did not
            # have static shape, we inferred event shape entirely from scale, and this
            # is available statically.
            self.assertIsNone(tf.get_static_value(batch_shape))
            self.assertAllEqual([3], tf.get_static_value(event_shape))

        batch_shape_, event_shape_ = self.evaluate([batch_shape, event_shape])
        self.assertAllEqual([5, 2], batch_shape_)
        self.assertAllEqual([3], event_shape_)
  def __init__(self,
               loc=None,
               scale=None,
               validate_args=False,
               allow_nan_stats=True,
               name="VectorLaplaceLinearOperator"):
    """Construct Vector Laplace distribution on `R^k`.

    The `batch_shape` is the broadcast shape between `loc` and `scale`
    arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `scale`. The last dimension of `loc` (if provided) must broadcast with this.

    Recall that `covariance = 2 * scale @ scale.T`.

    Additional leading dimensions (if any) will index batches.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape
        `[B1, ..., Bb, k, k]`.
      validate_args: Python `bool`, default `False`. Whether to validate input
        with asserts. If `validate_args` is `False`, and the inputs are
        invalid, correct behavior is not guaranteed.
      allow_nan_stats: Python `bool`, default `True`. If `False`, raise an
        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
        batch member If `True`, batch members with valid parameters leading to
        undefined statistics will return NaN for this statistic.
      name: The name to give Ops created by the initializer.

    Raises:
      ValueError: if `scale` is unspecified.
      TypeError: if not `scale.dtype.is_floating`
    """
    parameters = dict(locals())
    if scale is None:
      raise ValueError("Missing required `scale` parameter.")
    if not dtype_util.is_floating(scale.dtype):
      raise TypeError("`scale` parameter must have floating-point dtype.")

    with tf.name_scope(name):
      # Since expand_dims doesn't preserve constant-ness, we obtain the
      # non-dynamic value if possible.
      loc = loc if loc is None else tf.convert_to_tensor(
          loc, name="loc", dtype=scale.dtype)
      batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale(
          loc, scale)

      super(VectorLaplaceLinearOperator, self).__init__(
          distribution=laplace.Laplace(
              loc=tf.zeros([], dtype=scale.dtype),
              scale=tf.ones([], dtype=scale.dtype)),
          bijector=affine_linear_operator_bijector.AffineLinearOperator(
              shift=loc, scale=scale, validate_args=validate_args),
          batch_shape=batch_shape,
          event_shape=event_shape,
          validate_args=validate_args,
          name=name)
      self._parameters = parameters
    def __init__(self,
                 loc=None,
                 scale_diag=None,
                 scale_identity_multiplier=None,
                 skewness=None,
                 tailweight=None,
                 distribution=None,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="MultivariateNormalLinearOperator"):
        """Construct VectorSinhArcsinhDiag distribution on `R^k`.

    The arguments `scale_diag` and `scale_identity_multiplier` combine to
    define the diagonal `scale` referred to in this class docstring:

    ```none
    scale = diag(scale_diag + scale_identity_multiplier * ones(k))
    ```

    The `batch_shape` is the broadcast shape between `loc` and `scale`
    arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `scale`. The last dimension of `loc` (if provided) must broadcast with this

    Additional leading dimensions (if any) will index batches.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      scale_diag: Non-zero, floating-point `Tensor` representing a diagonal
        matrix added to `scale`. May have shape `[B1, ..., Bb, k]`, `b >= 0`,
        and characterizes `b`-batches of `k x k` diagonal matrices added to
        `scale`. When both `scale_identity_multiplier` and `scale_diag` are
        `None` then `scale` is the `Identity`.
      scale_identity_multiplier: Non-zero, floating-point `Tensor` representing
        a scale-identity-matrix added to `scale`. May have shape
        `[B1, ..., Bb]`, `b >= 0`, and characterizes `b`-batches of scale
        `k x k` identity matrices added to `scale`. When both
        `scale_identity_multiplier` and `scale_diag` are `None` then `scale`
        is the `Identity`.
      skewness:  Skewness parameter.  floating-point `Tensor` with shape
        broadcastable with `event_shape`.
      tailweight:  Tailweight parameter.  floating-point `Tensor` with shape
        broadcastable with `event_shape`.
      distribution: `tf.Distribution`-like instance. Distribution from which `k`
        iid samples are used as input to transformation `F`.  Default is
        `tf.distributions.Normal(loc=0., scale=1.)`.
        Must be a scalar-batch, scalar-event distribution.  Typically
        `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is
        a function of non-trainable parameters. WARNING: If you backprop through
        a VectorSinhArcsinhDiag sample and `distribution` is not
        `FULLY_REPARAMETERIZED` yet is a function of trainable variables, then
        the gradient will be incorrect!
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      ValueError: if at most `scale_identity_multiplier` is specified.
    """
        parameters = dict(locals())

        with tf.name_scope(name,
                           values=[
                               loc, scale_diag, scale_identity_multiplier,
                               skewness, tailweight
                           ]) as name:
            loc = tf.convert_to_tensor(loc,
                                       name="loc") if loc is not None else loc
            tailweight = 1. if tailweight is None else tailweight
            has_default_skewness = skewness is None
            skewness = 0. if skewness is None else skewness

            # Recall, with Z a random variable,
            #   Y := loc + C * F(Z),
            #   F(Z) := Sinh( (Arcsinh(Z) + skewness) * tailweight )
            #   F_0(Z) := Sinh( Arcsinh(Z) * tailweight )
            #   C := 2 * scale / F_0(2)

            # Construct shapes and 'scale' out of the scale_* and loc kwargs.
            # scale_linop is only an intermediary to:
            #  1. get shapes from looking at loc and the two scale args.
            #  2. combine scale_diag with scale_identity_multiplier, which gives us
            #     'scale', which in turn gives us 'C'.
            scale_linop = distribution_util.make_diag_scale(
                loc=loc,
                scale_diag=scale_diag,
                scale_identity_multiplier=scale_identity_multiplier,
                validate_args=False,
                assert_positive=False)
            batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale(
                loc, scale_linop)
            # scale_linop.diag_part() is efficient since it is a diag type linop.
            scale_diag_part = scale_linop.diag_part()
            dtype = scale_diag_part.dtype

            if distribution is None:
                distribution = tf.distributions.Normal(
                    loc=tf.zeros([], dtype=dtype),
                    scale=tf.ones([], dtype=dtype),
                    allow_nan_stats=allow_nan_stats)
            else:
                asserts = distribution_util.maybe_check_scalar_distribution(
                    distribution, dtype, validate_args)
                if asserts:
                    scale_diag_part = control_flow_ops.with_dependencies(
                        asserts, scale_diag_part)

            # Make the SAS bijector, 'F'.
            skewness = tf.convert_to_tensor(skewness,
                                            dtype=dtype,
                                            name="skewness")
            tailweight = tf.convert_to_tensor(tailweight,
                                              dtype=dtype,
                                              name="tailweight")
            f = bijectors.SinhArcsinh(skewness=skewness, tailweight=tailweight)
            if has_default_skewness:
                f_noskew = f
            else:
                f_noskew = bijectors.SinhArcsinh(
                    skewness=skewness.dtype.as_numpy_dtype(0.),
                    tailweight=tailweight)

            # Make the Affine bijector, Z --> loc + C * Z.
            c = 2 * scale_diag_part / f_noskew.forward(
                tf.convert_to_tensor(2, dtype=dtype))
            affine = bijectors.Affine(shift=loc,
                                      scale_diag=c,
                                      validate_args=validate_args)

            bijector = bijectors.Chain([affine, f])

            super(VectorSinhArcsinhDiag,
                  self).__init__(distribution=distribution,
                                 bijector=bijector,
                                 batch_shape=batch_shape,
                                 event_shape=event_shape,
                                 validate_args=validate_args,
                                 name=name)
        self._parameters = parameters
        self._loc = loc
        self._scale = scale_linop
        self._tailweight = tailweight
        self._skewness = skewness
  def __init__(self,
               loc=None,
               scale=None,
               validate_args=False,
               allow_nan_stats=True,
               name="VectorLaplaceLinearOperator"):
    """Construct Vector Laplace distribution on `R^k`.

    The `batch_shape` is the broadcast shape between `loc` and `scale`
    arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `scale`. The last dimension of `loc` (if provided) must broadcast with this.

    Recall that `covariance = 2 * scale @ scale.T`.

    Additional leading dimensions (if any) will index batches.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape
        `[B1, ..., Bb, k, k]`.
      validate_args: Python `bool`, default `False`. Whether to validate input
        with asserts. If `validate_args` is `False`, and the inputs are
        invalid, correct behavior is not guaranteed.
      allow_nan_stats: Python `bool`, default `True`. If `False`, raise an
        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
        batch member If `True`, batch members with valid parameters leading to
        undefined statistics will return NaN for this statistic.
      name: The name to give Ops created by the initializer.

    Raises:
      ValueError: if `scale` is unspecified.
      TypeError: if not `scale.dtype.is_floating`
    """
    parameters = dict(locals())
    if scale is None:
      raise ValueError("Missing required `scale` parameter.")
    if not scale.dtype.is_floating:
      raise TypeError("`scale` parameter must have floating-point dtype.")

    with tf.name_scope(name, values=[loc] + scale.graph_parents):
      # Since expand_dims doesn't preserve constant-ness, we obtain the
      # non-dynamic value if possible.
      loc = tf.convert_to_tensor(loc, name="loc") if loc is not None else loc
      batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale(
          loc, scale)

      super(VectorLaplaceLinearOperator, self).__init__(
          distribution=laplace.Laplace(
              loc=tf.zeros([], dtype=scale.dtype),
              scale=tf.ones([], dtype=scale.dtype)),
          bijector=bijectors.AffineLinearOperator(
              shift=loc, scale=scale, validate_args=validate_args),
          batch_shape=batch_shape,
          event_shape=event_shape,
          validate_args=validate_args,
          name=name)
      self._parameters = parameters
Beispiel #5
0
    def __init__(self,
                 df,
                 loc=None,
                 scale_identity_multiplier=None,
                 scale_diag=None,
                 scale_tril=None,
                 scale_perturb_factor=None,
                 scale_perturb_diag=None,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="VectorStudentT"):
        """Instantiates the vector Student's t-distributions on `R^k`.

    The `batch_shape` is the broadcast between `df.batch_shape` and
    `Affine.batch_shape` where `Affine` is constructed from `loc` and
    `scale_*` arguments.

    The `event_shape` is the event shape of `Affine.event_shape`.

    Args:
      df: Floating-point `Tensor`. The degrees of freedom of the
        distribution(s). `df` must contain only positive values. Must be
        scalar if `loc`, `scale_*` imply non-scalar batch_shape or must have the
        same `batch_shape` implied by `loc`, `scale_*`.
      loc: Floating-point `Tensor`. If this is set to `None`, no `loc` is
        applied.
      scale_identity_multiplier: floating point rank 0 `Tensor` representing a
        scaling done to the identity matrix. When `scale_identity_multiplier =
        scale_diag=scale_tril = None` then `scale += IdentityMatrix`. Otherwise
        no scaled-identity-matrix is added to `scale`.
      scale_diag: Floating-point `Tensor` representing the diagonal matrix.
        `scale_diag` has shape [N1, N2, ..., k], which represents a k x k
        diagonal matrix. When `None` no diagonal term is added to `scale`.
      scale_tril: Floating-point `Tensor` representing the diagonal matrix.
        `scale_diag` has shape [N1, N2, ..., k, k], which represents a k x k
        lower triangular matrix. When `None` no `scale_tril` term is added to
        `scale`. The upper triangular elements above the diagonal are ignored.
      scale_perturb_factor: Floating-point `Tensor` representing factor matrix
        with last two dimensions of shape `(k, r)`. When `None`, no rank-r
        update is added to `scale`.
      scale_perturb_diag: Floating-point `Tensor` representing the diagonal
        matrix. `scale_perturb_diag` has shape [N1, N2, ..., r], which
        represents an r x r Diagonal matrix. When `None` low rank updates will
        take the form `scale_perturb_factor * scale_perturb_factor.T`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.
    """
        parameters = dict(locals())
        graph_parents = [
            df, loc, scale_identity_multiplier, scale_diag, scale_tril,
            scale_perturb_factor, scale_perturb_diag
        ]
        with tf.name_scope(name) as name:
            with tf.name_scope("init"):
                dtype = dtype_util.common_dtype(graph_parents, tf.float32)
                df = tf.convert_to_tensor(value=df, name="df", dtype=dtype)
                # The shape of the _VectorStudentT distribution is governed by the
                # relationship between df.batch_shape and affine.batch_shape. In
                # pseudocode the basic procedure is:
                #   if df.batch_shape is scalar:
                #     if affine.batch_shape is not scalar:
                #       # broadcast distribution.sample so
                #       # it has affine.batch_shape.
                #     self.batch_shape = affine.batch_shape
                #   else:
                #     if affine.batch_shape is scalar:
                #       # let affine broadcasting do its thing.
                #     self.batch_shape = df.batch_shape
                # All of the above magic is actually handled by TransformedDistribution.
                # Here we really only need to collect the affine.batch_shape and decide
                # what we're going to pass in to TransformedDistribution's
                # (override) batch_shape arg.
                affine = affine_bijector.Affine(
                    shift=loc,
                    scale_identity_multiplier=scale_identity_multiplier,
                    scale_diag=scale_diag,
                    scale_tril=scale_tril,
                    scale_perturb_factor=scale_perturb_factor,
                    scale_perturb_diag=scale_perturb_diag,
                    validate_args=validate_args,
                    dtype=dtype)
                distribution = student_t.StudentT(
                    df=df,
                    loc=tf.zeros([], dtype=affine.dtype),
                    scale=tf.ones([], dtype=affine.dtype))
                batch_shape, override_event_shape = (
                    distribution_util.shapes_from_loc_and_scale(
                        affine.shift, affine.scale))
                override_batch_shape = distribution_util.pick_vector(
                    distribution.is_scalar_batch(), batch_shape,
                    tf.constant([], dtype=tf.int32))
                super(_VectorStudentT,
                      self).__init__(distribution=distribution,
                                     bijector=affine,
                                     batch_shape=override_batch_shape,
                                     event_shape=override_event_shape,
                                     validate_args=validate_args,
                                     name=name)
                self._parameters = parameters
Beispiel #6
0
  def __init__(self,
               loc=None,
               scale=None,
               validate_args=False,
               allow_nan_stats=True,
               experimental_use_kahan_sum=False,
               name='MultivariateNormalLinearOperator'):
    """Construct Multivariate Normal distribution on `R^k`.

    The `batch_shape` is the broadcast shape between `loc` and `scale`
    arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `scale`. The last dimension of `loc` (if provided) must broadcast with this.

    Recall that `covariance = scale @ scale.T`.

    Additional leading dimensions (if any) will index batches.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape
        `[B1, ..., Bb, k, k]`.
      validate_args: Python `bool`, default `False`. Whether to validate input
        with asserts. If `validate_args` is `False`, and the inputs are
        invalid, correct behavior is not guaranteed.
      allow_nan_stats: Python `bool`, default `True`. If `False`, raise an
        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
        batch member If `True`, batch members with valid parameters leading to
        undefined statistics will return NaN for this statistic.
      experimental_use_kahan_sum: Python `bool`. When `True`, we use Kahan
        summation to aggregate independent underlying log_prob values. For best
        results, Kahan summation should also be applied when computing the
        log-determinant of the `LinearOperator` representing the scale matrix.
        Kahan summation improves against the precision of a naive float32 sum.
        This can be noticeable in particular for large dimensions in float32.
        See CPU caveat on `tfp.math.reduce_kahan_sum`.
      name: The name to give Ops created by the initializer.

    Raises:
      ValueError: if `scale` is unspecified.
      TypeError: if not `scale.dtype.is_floating`
    """
    parameters = dict(locals())
    self._experimental_use_kahan_sum = experimental_use_kahan_sum
    if scale is None:
      raise ValueError('Missing required `scale` parameter.')
    if not dtype_util.is_floating(scale.dtype):
      raise TypeError('`scale` parameter must have floating-point dtype.')

    with tf.name_scope(name) as name:
      dtype = dtype_util.common_dtype([loc, scale], dtype_hint=tf.float32)
      # Since expand_dims doesn't preserve constant-ness, we obtain the
      # non-dynamic value if possible.
      loc = tensor_util.convert_nonref_to_tensor(
          loc, dtype=dtype, name='loc')
      batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale(
          loc, scale)
    self._loc = loc
    self._scale = scale

    bijector = scale_matvec_linear_operator.ScaleMatvecLinearOperator(
        scale, validate_args=validate_args)
    if loc is not None:
      bijector = shift_bijector.Shift(
          shift=loc, validate_args=validate_args)(bijector)
    super(MultivariateNormalLinearOperator, self).__init__(
        # TODO(b/137665504): Use batch-adding meta-distribution to set the batch
        # shape instead of tf.zeros.
        # We use `Sample` instead of `Independent` because `Independent`
        # requires concatenating `batch_shape` and `event_shape`, which loses
        # static `batch_shape` information when `event_shape` is not statically
        # known.
        distribution=sample.Sample(
            normal.Normal(
                loc=tf.zeros(batch_shape, dtype=dtype),
                scale=tf.ones([], dtype=dtype)),
            event_shape,
            experimental_use_kahan_sum=experimental_use_kahan_sum),
        bijector=bijector,
        validate_args=validate_args,
        name=name)
    self._parameters = parameters
Beispiel #7
0
  def __init__(self,
               loc=None,
               precision_factor=None,
               precision=None,
               validate_args=False,
               allow_nan_stats=True,
               name='MultivariateNormalPrecisionFactorLinearOperator'):
    """Initialize distribution.

    Precision is the inverse of the covariance matrix, and
    `precision_factor @ precision_factor.T = precision`.

    The `batch_shape` of this distribution is the broadcast of
    `loc.shape[:-1]` and `precision_factor.batch_shape`.

    The `event_shape` of this distribution is determined by `loc.shape[-1:]`,
    OR `precision_factor.shape[-1:]`, which must match.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      precision_factor: Required nonsingular `tf.linalg.LinearOperator` instance
        with same `dtype` and shape compatible with `loc`.
      precision: Optional square `tf.linalg.LinearOperator` instance with same
        `dtype` and shape compatible with `loc` and `precision_factor`.
      validate_args: Python `bool`, default `False`. Whether to validate input
        with asserts. If `validate_args` is `False`, and the inputs are
        invalid, correct behavior is not guaranteed.
      allow_nan_stats: Python `bool`, default `True`. If `False`, raise an
        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
        batch member If `True`, batch members with valid parameters leading to
        undefined statistics will return NaN for this statistic.
      name: The name to give Ops created by the initializer.
    """
    parameters = dict(locals())
    with tf.name_scope(name) as name:
      if precision_factor is None:
        raise ValueError(
            'Argument `precision_factor` must be provided. Found `None`')

      dtype = dtype_util.common_dtype([loc, precision_factor, precision],
                                      dtype_hint=tf.float32)
      loc = tensor_util.convert_nonref_to_tensor(loc, dtype=dtype, name='loc')

      self._loc = loc
      self._precision_factor = precision_factor
      self._precision = precision

      batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale(
          loc, precision_factor)

      # Proof of factors (used throughout code):
      # Let,
      #   C = covariance,
      #   P = inv(covariance) = precision
      #   P = F @ F.T  (so F is the `precision_factor`).
      #
      # Then, the log prob term is
      #  x.T @ inv(C) @ x
      #  = x.T @ P @ x
      #  = x.T @ F @ F.T @ x
      #  = || F.T @ x ||**2
      # notice it involves F.T, which is why we set adjoint=True in various
      # places.
      #
      # Also, if w ~ Normal(0, I), then we can sample by setting
      #  x = inv(F.T) @ w + loc,
      # since then
      #  E[(x - loc) @ (x - loc).T]
      #  = E[inv(F.T) @ w @ w.T @ inv(F)]
      #  = inv(F.T) @ inv(F)
      #  = inv(F @ F.T)
      #  = inv(P)
      #  = C.

      if precision is not None:
        precision.shape.assert_is_compatible_with(precision_factor.shape)

      bijector = invert.Invert(
          scale_matvec_linear_operator.ScaleMatvecLinearOperator(
              scale=precision_factor,
              validate_args=validate_args,
              adjoint=True)
      )
      if loc is not None:
        shift = shift_bijector.Shift(shift=loc, validate_args=validate_args)
        bijector = shift(bijector)

      super(MultivariateNormalPrecisionFactorLinearOperator, self).__init__(
          distribution=mvn_diag.MultivariateNormalDiag(
              loc=tf.zeros(
                  ps.concat([batch_shape, event_shape], axis=0), dtype=dtype)),
          bijector=bijector,
          validate_args=validate_args,
          name=name)
      self._parameters = parameters
    def __init__(self,
                 loc=None,
                 scale=None,
                 validate_args=False,
                 allow_nan_stats=True,
                 name='VectorExponentialLinearOperator'):
        """Construct Vector Exponential distribution supported on a subset of `R^k`.

    The `batch_shape` is the broadcast shape between `loc` and `scale`
    arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `scale`. The last dimension of `loc` (if provided) must broadcast with this.

    Recall that `covariance = scale @ scale.T`.

    Additional leading dimensions (if any) will index batches.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape
        `[B1, ..., Bb, k, k]`.
      validate_args: Python `bool`, default `False`. Whether to validate input
        with asserts. If `validate_args` is `False`, and the inputs are
        invalid, correct behavior is not guaranteed.
      allow_nan_stats: Python `bool`, default `True`. If `False`, raise an
        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
        batch member If `True`, batch members with valid parameters leading to
        undefined statistics will return NaN for this statistic.
      name: The name to give Ops created by the initializer.

    Raises:
      ValueError: if `scale` is unspecified.
      TypeError: if not `scale.dtype.is_floating`
    """
        parameters = dict(locals())
        if loc is None:
            loc = 0.0  # Implicit value for backwards compatibility.
        if scale is None:
            raise ValueError('Missing required `scale` parameter.')
        if not dtype_util.is_floating(scale.dtype):
            raise TypeError(
                '`scale` parameter must have floating-point dtype.')

        with tf.name_scope(name) as name:
            # Since expand_dims doesn't preserve constant-ness, we obtain the
            # non-dynamic value if possible.
            loc = loc if loc is None else tf.convert_to_tensor(
                loc, name='loc', dtype=scale.dtype)
            batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale(
                loc, scale)
            self._loc = loc
            self._scale = scale
            super(VectorExponentialLinearOperator, self).__init__(
                # TODO(b/137665504): Use batch-adding meta-distribution to set the
                # batch shape instead of tf.ones.
                # We use `Sample` instead of `Independent` because `Independent`
                # requires concatenating `batch_shape` and `event_shape`, which loses
                # static `batch_shape` information when `event_shape` is not
                # statically known.
                distribution=sample.Sample(
                    exponential.Exponential(rate=tf.ones(batch_shape,
                                                         dtype=scale.dtype),
                                            allow_nan_stats=allow_nan_stats),
                    event_shape),
                bijector=shift_bijector.Shift(shift=loc)(
                    scale_matvec_linear_operator.ScaleMatvecLinearOperator(
                        scale=scale, validate_args=validate_args)),
                validate_args=validate_args,
                name=name)
            self._parameters = parameters
Beispiel #9
0
 def test_static_loc_static_scale_non_matching_event_size_raises(self):
     loc = tf.zeros([2, 4])
     diag = tf.ones([5, 1, 3])
     with self.assertRaisesRegexp(ValueError, 'could not be broadcast'):
         distribution_util.shapes_from_loc_and_scale(
             loc, tf.linalg.LinearOperatorDiag(diag))
    def __init__(
            self,
            loc=None,
            cov_operator=None,
            validate_args=False,
            allow_nan_stats=True,
            name='MultivariateNormalLowRankUpdateLinearOperatorCovariance'):
        """Construct Multivariate Normal distribution on `R^k`.

    The `batch_shape` is the broadcast shape between `loc` and
    `cov_operator` arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `cov_operator`. The last dimension of `loc` (if provided) must
    broadcast with this.

    Additional leading dimensions (if any) will index batches.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      cov_operator: Instance of `LinearOperatorLowRankUpdate` with same
        `dtype` as `loc` and shape `[B1, ..., Bb, k, k]`.  Must have structure
        `A + UU^T` or `A + UDU^T`, where `A` and `D` (if provided) are
        self-adjoint and positive definite.
      validate_args: Python `bool`, default `False`. Whether to validate input
        with asserts. If `validate_args` is `False`, and the inputs are invalid,
        correct behavior is not guaranteed.
      allow_nan_stats: Python `bool`, default `True`. If `False`, raise an
        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
        batch member. If `True`, batch members with valid parameters leading to
        undefined statistics will return NaN for this statistic.
      name: The name to give Ops created by the initializer.

    Raises:
      ValueError: if `cov_operator` is unspecified.
      ValueError: if `cov_operator` does not specify the self-adjoint
        positive definite conditions explained above.
      TypeError: if not `cov_operator.dtype.is_floating`
    """
        parameters = dict(locals())
        if cov_operator is None:
            raise ValueError('Missing required `cov_operator` parameter.')
        if not dtype_util.is_floating(cov_operator.dtype):
            raise TypeError(
                '`cov_operator` parameter must have floating-point dtype.')
        if not isinstance(cov_operator, tf.linalg.LinearOperatorLowRankUpdate):
            raise TypeError(
                '`cov_operator` must be a LinearOperatorLowRankUpdate. '
                'Found {}'.format(type(cov_operator)))

        if cov_operator.u is not cov_operator.v:
            raise ValueError(
                'The `U` and `V` (typically low rank) matrices of '
                '`cov_operator` must be the same, but were not.')

        # For cov_operator, raise if the user explicitly set these to False,
        # or if False was inferred by the LinearOperator. The default value is None,
        # which will not trigger these raises.
        # pylint: disable=g-bool-id-comparison
        if cov_operator.is_self_adjoint is False:
            raise ValueError('`cov_operator` must be self-adjoint.')
        if cov_operator.is_positive_definite is False:
            raise ValueError('`cov_operator` must be positive definite.')
        # pylint: enable=g-bool-id-comparison

        # For the base_operator, we require the user to explicity set
        # is_self_adjoint and is_positive_definite.
        if not cov_operator.base_operator.is_self_adjoint:
            raise ValueError(
                'The `base_operator` of `cov_operator` must be self-adjoint. '
                'You may have to set the `is_self_adjoint` initialization hint.'
            )
        if not cov_operator.base_operator.is_positive_definite:
            raise ValueError(
                'The `base_operator` of `cov_operator` must be positive '
                'definite. You may have to set the `is_positive_definite` '
                'initialization hint.')

        with tf.name_scope(name) as name:
            dtype = dtype_util.common_dtype([loc, cov_operator],
                                            dtype_hint=tf.float32)
            if loc is not None:
                loc = tensor_util.convert_nonref_to_tensor(loc,
                                                           dtype=dtype,
                                                           name='loc')

            # Get dynamic shapes (for self.*shape_tensor methods).
            # shapes_from_loc_and_scale tries to return TensorShapes, but may return
            # tensors. So we can only use it for the *shape_tensor methods.
            # It is useful though, since it does lots of shape checks, and is a
            # well-tested function.
            batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale(
                loc, cov_operator)
            self._batch_shape_tensor_value = ps.convert_to_shape_tensor(
                batch_shape, name='batch_shape')
            self._event_shape_tensor_value = ps.convert_to_shape_tensor(
                event_shape, name='event_shape')

            # Get static shapes (for self.*shape methods).
            self._batch_shape_value = cov_operator.batch_shape
            if loc is not None:
                self._batch_shape_value = tf.broadcast_static_shape(
                    self._batch_shape_value, loc.shape[:-1])
            self._event_shape_value = cov_operator.shape[-1:]
            if loc is not None:
                self._event_shape_value = tf.broadcast_static_shape(
                    self._event_shape_value, loc.shape[-1:])

        self._loc = loc
        self._cov_operator = cov_operator

        super(MultivariateNormalLowRankUpdateLinearOperatorCovariance,
              self).__init__(dtype=dtype,
                             reparameterization_type=reparameterization.
                             FULLY_REPARAMETERIZED,
                             validate_args=validate_args,
                             allow_nan_stats=allow_nan_stats,
                             parameters=parameters,
                             name=name)
        self._parameters = parameters
  def __init__(self,
               df,
               loc=None,
               scale_identity_multiplier=None,
               scale_diag=None,
               scale_tril=None,
               scale_perturb_factor=None,
               scale_perturb_diag=None,
               validate_args=False,
               allow_nan_stats=True,
               name="VectorStudentT"):
    """Instantiates the vector Student's t-distributions on `R^k`.

    The `batch_shape` is the broadcast between `df.batch_shape` and
    `Affine.batch_shape` where `Affine` is constructed from `loc` and
    `scale_*` arguments.

    The `event_shape` is the event shape of `Affine.event_shape`.

    Args:
      df: Floating-point `Tensor`. The degrees of freedom of the
        distribution(s). `df` must contain only positive values. Must be
        scalar if `loc`, `scale_*` imply non-scalar batch_shape or must have the
        same `batch_shape` implied by `loc`, `scale_*`.
      loc: Floating-point `Tensor`. If this is set to `None`, no `loc` is
        applied.
      scale_identity_multiplier: floating point rank 0 `Tensor` representing a
        scaling done to the identity matrix. When `scale_identity_multiplier =
        scale_diag=scale_tril = None` then `scale += IdentityMatrix`. Otherwise
        no scaled-identity-matrix is added to `scale`.
      scale_diag: Floating-point `Tensor` representing the diagonal matrix.
        `scale_diag` has shape [N1, N2, ..., k], which represents a k x k
        diagonal matrix. When `None` no diagonal term is added to `scale`.
      scale_tril: Floating-point `Tensor` representing the diagonal matrix.
        `scale_diag` has shape [N1, N2, ..., k, k], which represents a k x k
        lower triangular matrix. When `None` no `scale_tril` term is added to
        `scale`. The upper triangular elements above the diagonal are ignored.
      scale_perturb_factor: Floating-point `Tensor` representing factor matrix
        with last two dimensions of shape `(k, r)`. When `None`, no rank-r
        update is added to `scale`.
      scale_perturb_diag: Floating-point `Tensor` representing the diagonal
        matrix. `scale_perturb_diag` has shape [N1, N2, ..., r], which
        represents an r x r Diagonal matrix. When `None` low rank updates will
        take the form `scale_perturb_factor * scale_perturb_factor.T`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.
    """
    parameters = dict(locals())
    graph_parents = [df, loc, scale_identity_multiplier, scale_diag,
                     scale_tril, scale_perturb_factor, scale_perturb_diag]
    with tf.name_scope(name) as name:
      with tf.name_scope("init", values=graph_parents):
        # The shape of the _VectorStudentT distribution is governed by the
        # relationship between df.batch_shape and affine.batch_shape. In
        # pseudocode the basic procedure is:
        #   if df.batch_shape is scalar:
        #     if affine.batch_shape is not scalar:
        #       # broadcast distribution.sample so
        #       # it has affine.batch_shape.
        #     self.batch_shape = affine.batch_shape
        #   else:
        #     if affine.batch_shape is scalar:
        #       # let affine broadcasting do its thing.
        #     self.batch_shape = df.batch_shape
        # All of the above magic is actually handled by TransformedDistribution.
        # Here we really only need to collect the affine.batch_shape and decide
        # what we're going to pass in to TransformedDistribution's
        # (override) batch_shape arg.
        affine = bijectors.Affine(
            shift=loc,
            scale_identity_multiplier=scale_identity_multiplier,
            scale_diag=scale_diag,
            scale_tril=scale_tril,
            scale_perturb_factor=scale_perturb_factor,
            scale_perturb_diag=scale_perturb_diag,
            validate_args=validate_args)
        distribution = student_t.StudentT(
            df=df,
            loc=tf.zeros([], dtype=affine.dtype),
            scale=tf.ones([], dtype=affine.dtype))
        batch_shape, override_event_shape = (
            distribution_util.shapes_from_loc_and_scale(
                affine.shift, affine.scale))
        override_batch_shape = distribution_util.pick_vector(
            distribution.is_scalar_batch(), batch_shape,
            tf.constant([], dtype=tf.int32))
        super(_VectorStudentT, self).__init__(
            distribution=distribution,
            bijector=affine,
            batch_shape=override_batch_shape,
            event_shape=override_event_shape,
            validate_args=validate_args,
            name=name)
        self._parameters = parameters
  def __init__(self,
               loc=None,
               scale_diag=None,
               scale_identity_multiplier=None,
               skewness=None,
               tailweight=None,
               distribution=None,
               validate_args=False,
               allow_nan_stats=True,
               name="MultivariateNormalLinearOperator"):
    """Construct VectorSinhArcsinhDiag distribution on `R^k`.

    The arguments `scale_diag` and `scale_identity_multiplier` combine to
    define the diagonal `scale` referred to in this class docstring:

    ```none
    scale = diag(scale_diag + scale_identity_multiplier * ones(k))
    ```

    The `batch_shape` is the broadcast shape between `loc` and `scale`
    arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `scale`. The last dimension of `loc` (if provided) must broadcast with this

    Additional leading dimensions (if any) will index batches.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      scale_diag: Non-zero, floating-point `Tensor` representing a diagonal
        matrix added to `scale`. May have shape `[B1, ..., Bb, k]`, `b >= 0`,
        and characterizes `b`-batches of `k x k` diagonal matrices added to
        `scale`. When both `scale_identity_multiplier` and `scale_diag` are
        `None` then `scale` is the `Identity`.
      scale_identity_multiplier: Non-zero, floating-point `Tensor` representing
        a scale-identity-matrix added to `scale`. May have shape
        `[B1, ..., Bb]`, `b >= 0`, and characterizes `b`-batches of scale
        `k x k` identity matrices added to `scale`. When both
        `scale_identity_multiplier` and `scale_diag` are `None` then `scale`
        is the `Identity`.
      skewness:  Skewness parameter.  floating-point `Tensor` with shape
        broadcastable with `event_shape`.
      tailweight:  Tailweight parameter.  floating-point `Tensor` with shape
        broadcastable with `event_shape`.
      distribution: `tf.Distribution`-like instance. Distribution from which `k`
        iid samples are used as input to transformation `F`.  Default is
        `tfd.Normal(loc=0., scale=1.)`.
        Must be a scalar-batch, scalar-event distribution.  Typically
        `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is
        a function of non-trainable parameters. WARNING: If you backprop through
        a VectorSinhArcsinhDiag sample and `distribution` is not
        `FULLY_REPARAMETERIZED` yet is a function of trainable variables, then
        the gradient will be incorrect!
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      ValueError: if at most `scale_identity_multiplier` is specified.
    """
    parameters = dict(locals())

    with tf.name_scope(
        name,
        values=[
            loc, scale_diag, scale_identity_multiplier, skewness, tailweight
        ]) as name:
      dtype = dtype_util.common_dtype(
          [loc, scale_diag, scale_identity_multiplier, skewness, tailweight],
          tf.float32)
      loc = loc if loc is None else tf.convert_to_tensor(
          loc, name="loc", dtype=dtype)
      tailweight = 1. if tailweight is None else tailweight
      has_default_skewness = skewness is None
      skewness = 0. if skewness is None else skewness

      # Recall, with Z a random variable,
      #   Y := loc + C * F(Z),
      #   F(Z) := Sinh( (Arcsinh(Z) + skewness) * tailweight )
      #   F_0(Z) := Sinh( Arcsinh(Z) * tailweight )
      #   C := 2 * scale / F_0(2)

      # Construct shapes and 'scale' out of the scale_* and loc kwargs.
      # scale_linop is only an intermediary to:
      #  1. get shapes from looking at loc and the two scale args.
      #  2. combine scale_diag with scale_identity_multiplier, which gives us
      #     'scale', which in turn gives us 'C'.
      scale_linop = distribution_util.make_diag_scale(
          loc=loc,
          scale_diag=scale_diag,
          scale_identity_multiplier=scale_identity_multiplier,
          validate_args=False,
          assert_positive=False,
          dtype=dtype)
      batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale(
          loc, scale_linop)
      # scale_linop.diag_part() is efficient since it is a diag type linop.
      scale_diag_part = scale_linop.diag_part()
      dtype = scale_diag_part.dtype

      if distribution is None:
        distribution = normal.Normal(
            loc=tf.zeros([], dtype=dtype),
            scale=tf.ones([], dtype=dtype),
            allow_nan_stats=allow_nan_stats)
      else:
        asserts = distribution_util.maybe_check_scalar_distribution(
            distribution, dtype, validate_args)
        if asserts:
          scale_diag_part = control_flow_ops.with_dependencies(
              asserts, scale_diag_part)

      # Make the SAS bijector, 'F'.
      skewness = tf.convert_to_tensor(skewness, dtype=dtype, name="skewness")
      tailweight = tf.convert_to_tensor(
          tailweight, dtype=dtype, name="tailweight")
      f = sinh_arcsinh_bijector.SinhArcsinh(
          skewness=skewness, tailweight=tailweight)
      if has_default_skewness:
        f_noskew = f
      else:
        f_noskew = sinh_arcsinh_bijector.SinhArcsinh(
            skewness=skewness.dtype.as_numpy_dtype(0.),
            tailweight=tailweight)

      # Make the Affine bijector, Z --> loc + C * Z.
      c = 2 * scale_diag_part / f_noskew.forward(
          tf.convert_to_tensor(2, dtype=dtype))
      affine = affine_bijector.Affine(
          shift=loc, scale_diag=c, validate_args=validate_args)

      bijector = chain_bijector.Chain([affine, f])

      super(VectorSinhArcsinhDiag, self).__init__(
          distribution=distribution,
          bijector=bijector,
          batch_shape=batch_shape,
          event_shape=event_shape,
          validate_args=validate_args,
          name=name)
    self._parameters = parameters
    self._loc = loc
    self._scale = scale_linop
    self._tailweight = tailweight
    self._skewness = skewness
 def test_static_loc_static_scale_non_matching_event_size_raises(self):
     loc = tf.constant(np.zeros((2, 4)))
     scale = tf.linalg.LinearOperatorDiag(np.ones((5, 1, 3)))
     with self.assertRaisesRegexp(ValueError, "could not be broadcast"):
         distribution_util.shapes_from_loc_and_scale(loc, scale)