Beispiel #1
0
    def testTriL(self):
        with self.cached_session():
            shift = np.array([-1, 0, 1], dtype=np.float32)
            tril = np.array([[[3, 0, 0], [2, -1, 0], [3, 2, 1]],
                             [[2, 0, 0], [3, -2, 0], [4, 3, 2]]],
                            dtype=np.float32)
            scale = linalg.LinearOperatorLowerTriangular(tril,
                                                         is_non_singular=True)
            affine = AffineLinearOperator(shift=shift,
                                          scale=scale,
                                          validate_args=True)

            x = np.array([[[1, 0, -1], [2, 3, 4]], [[4, 1, -7], [6, 9, 8]]],
                         dtype=np.float32)
            # If we made the bijector do x*A+b then this would be simplified to:
            # y = np.matmul(x, tril) + shift.
            y = np.squeeze(np.matmul(tril, np.expand_dims(x, -1)), -1) + shift
            ildj = -np.sum(
                np.log(np.abs(np.diagonal(tril, axis1=-2, axis2=-1))))

            self.assertEqual(affine.name, "affine_linear_operator")
            self.assertAllClose(y, affine.forward(x).eval())
            self.assertAllClose(x, affine.inverse(y).eval())
            self.assertAllClose(
                ildj,
                affine.inverse_log_det_jacobian(y, event_ndims=2).eval())
            self.assertAllClose(
                -affine.inverse_log_det_jacobian(y, event_ndims=2).eval(),
                affine.forward_log_det_jacobian(x, event_ndims=2).eval())
  def testTriL(self):
    with self.test_session():
      shift = np.array([-1, 0, 1], dtype=np.float32)
      tril = np.array([[[1, 0, 0],
                        [2, -1, 0],
                        [3, 2, 1]],
                       [[2, 0, 0],
                        [3, -2, 0],
                        [4, 3, 2]]],
                      dtype=np.float32)
      scale = linalg.LinearOperatorLowerTriangular(tril, is_non_singular=True)
      affine = AffineLinearOperator(
          shift=shift, scale=scale, validate_args=True)

      x = np.array([[[1, 0, -1],
                     [2, 3, 4]],
                    [[4, 1, -7],
                     [6, 9, 8]]],
                   dtype=np.float32)
      # If we made the bijector do x*A+b then this would be simplified to:
      # y = np.matmul(x, tril) + shift.
      y = np.squeeze(np.matmul(tril, np.expand_dims(x, -1)), -1) + shift
      ildj = -np.sum(np.log(np.abs(np.diagonal(
          tril, axis1=-2, axis2=-1))),
                     axis=-1)

      self.assertEqual(affine.name, "affine_linear_operator")
      self.assertAllClose(y, affine.forward(x).eval())
      self.assertAllClose(x, affine.inverse(y).eval())
      self.assertAllClose(ildj, affine.inverse_log_det_jacobian(y).eval())
      self.assertAllClose(-affine.inverse_log_det_jacobian(y).eval(),
                          affine.forward_log_det_jacobian(x).eval())
    def testIdentity(self):
        with self.test_session():
            affine = AffineLinearOperator(validate_args=True)
            x = np.array([[1, 0, -1], [2, 3, 4]], dtype=np.float32)
            y = x
            ildj = 0.

            self.assertEqual(affine.name, "affine_linear_operator")
            self.assertAllClose(y, affine.forward(x).eval())
            self.assertAllClose(x, affine.inverse(y).eval())
            self.assertAllClose(ildj,
                                affine.inverse_log_det_jacobian(y).eval())
            self.assertAllClose(-affine.inverse_log_det_jacobian(y).eval(),
                                affine.forward_log_det_jacobian(x).eval())
  def testIdentity(self):
    with self.test_session():
      affine = AffineLinearOperator(
          validate_args=True)
      x = np.array([[1, 0, -1], [2, 3, 4]], dtype=np.float32)
      y = x
      ildj = 0.

      self.assertEqual(affine.name, "affine_linear_operator")
      self.assertAllClose(y, affine.forward(x).eval())
      self.assertAllClose(x, affine.inverse(y).eval())
      self.assertAllClose(ildj, affine.inverse_log_det_jacobian(y).eval())
      self.assertAllClose(-affine.inverse_log_det_jacobian(y).eval(),
                          affine.forward_log_det_jacobian(x).eval())
    def testDiag(self):
        with self.test_session():
            shift = np.array([-1, 0, 1], dtype=np.float32)
            diag = np.array([[1, 2, 3], [2, 5, 6]], dtype=np.float32)
            scale = linalg.LinearOperatorDiag(diag, is_non_singular=True)
            affine = AffineLinearOperator(shift=shift,
                                          scale=scale,
                                          validate_args=True)

            x = np.array([[1, 0, -1], [2, 3, 4]], dtype=np.float32)
            y = diag * x + shift
            ildj = -np.sum(np.log(np.abs(diag)), axis=-1)

            self.assertEqual(affine.name, "affine_linear_operator")
            self.assertAllClose(y, affine.forward(x).eval())
            self.assertAllClose(x, affine.inverse(y).eval())
            self.assertAllClose(ildj,
                                affine.inverse_log_det_jacobian(y).eval())
            self.assertAllClose(-affine.inverse_log_det_jacobian(y).eval(),
                                affine.forward_log_det_jacobian(x).eval())
  def testDiag(self):
    with self.test_session():
      shift = np.array([-1, 0, 1], dtype=np.float32)
      diag = np.array([[1, 2, 3],
                       [2, 5, 6]], dtype=np.float32)
      scale = linalg.LinearOperatorDiag(diag, is_non_singular=True)
      affine = AffineLinearOperator(
          shift=shift, scale=scale, validate_args=True)

      x = np.array([[1, 0, -1], [2, 3, 4]], dtype=np.float32)
      y = diag * x + shift
      ildj = -np.sum(np.log(np.abs(diag)), axis=-1)

      self.assertEqual(affine.name, "affine_linear_operator")
      self.assertAllClose(y, affine.forward(x).eval())
      self.assertAllClose(x, affine.inverse(y).eval())
      self.assertAllClose(ildj, affine.inverse_log_det_jacobian(y).eval())
      self.assertAllClose(-affine.inverse_log_det_jacobian(y).eval(),
                          affine.forward_log_det_jacobian(x).eval())
Beispiel #7
0
    def __init__(self,
                 mix_loc,
                 temperature,
                 distribution,
                 loc=None,
                 scale=None,
                 quadrature_size=8,
                 quadrature_fn=quadrature_scheme_softmaxnormal_quantiles,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="VectorDiffeomixture"):
        """Constructs the VectorDiffeomixture on `R^d`.

    The vector diffeomixture (VDM) approximates the compound distribution

    ```none
    p(x) = int p(x | z) p(z) dz,
    where z is in the K-simplex, and
    p(x | z) := p(x | loc=sum_k z[k] loc[k], scale=sum_k z[k] scale[k])
    ```

    Args:
      mix_loc: `float`-like `Tensor` with shape `[b1, ..., bB, K-1]`.
        In terms of samples, larger `mix_loc[..., k]` ==>
        `Z` is more likely to put more weight on its `kth` component.
      temperature: `float`-like `Tensor`. Broadcastable with `mix_loc`.
        In terms of samples, smaller `temperature` means one component is more
        likely to dominate.  I.e., smaller `temperature` makes the VDM look more
        like a standard mixture of `K` components.
      distribution: `tf.Distribution`-like instance. Distribution from which `d`
        iid samples are used as input to the selected affine transformation.
        Must be a scalar-batch, scalar-event distribution.  Typically
        `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is
        a function of non-trainable parameters. WARNING: If you backprop through
        a VectorDiffeomixture sample and the `distribution` is not
        `FULLY_REPARAMETERIZED` yet is a function of trainable variables, then
        the gradient will be incorrect!
      loc: Length-`K` list of `float`-type `Tensor`s. The `k`-th element
        represents the `shift` used for the `k`-th affine transformation.  If
        the `k`-th item is `None`, `loc` is implicitly `0`.  When specified,
        must have shape `[B1, ..., Bb, d]` where `b >= 0` and `d` is the event
        size.
      scale: Length-`K` list of `LinearOperator`s. Each should be
        positive-definite and operate on a `d`-dimensional vector space. The
        `k`-th element represents the `scale` used for the `k`-th affine
        transformation. `LinearOperator`s must have shape `[B1, ..., Bb, d, d]`,
        `b >= 0`, i.e., characterizes `b`-batches of `d x d` matrices
      quadrature_size: Python `int` scalar representing number of
        quadrature points.  Larger `quadrature_size` means `q_N(x)` better
        approximates `p(x)`.
      quadrature_fn: Python callable taking `normal_loc`, `normal_scale`,
        `quadrature_size`, `validate_args` and returning `tuple(grid, probs)`
        representing the SoftmaxNormal grid and corresponding normalized weight.
        normalized) weight.
        Default value: `quadrature_scheme_softmaxnormal_quantiles`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      ValueError: if `not scale or len(scale) < 2`.
      ValueError: if `len(loc) != len(scale)`
      ValueError: if `quadrature_grid_and_probs is not None` and
        `len(quadrature_grid_and_probs[0]) != len(quadrature_grid_and_probs[1])`
      ValueError: if `validate_args` and any not scale.is_positive_definite.
      TypeError: if any scale.dtype != scale[0].dtype.
      TypeError: if any loc.dtype != scale[0].dtype.
      NotImplementedError: if `len(scale) != 2`.
      ValueError: if `not distribution.is_scalar_batch`.
      ValueError: if `not distribution.is_scalar_event`.
    """
        parameters = dict(locals())
        with ops.name_scope(name, values=[mix_loc, temperature]) as name:
            if not scale or len(scale) < 2:
                raise ValueError(
                    "Must specify list (or list-like object) of scale "
                    "LinearOperators, one for each component with "
                    "num_component >= 2.")

            if loc is None:
                loc = [None] * len(scale)

            if len(loc) != len(scale):
                raise ValueError("loc/scale must be same-length lists "
                                 "(or same-length list-like objects).")

            dtype = scale[0].dtype.base_dtype

            loc = [
                ops.convert_to_tensor(
                    loc_, dtype=dtype, name="loc{}".format(k))
                if loc_ is not None else None for k, loc_ in enumerate(loc)
            ]

            for k, scale_ in enumerate(scale):
                if validate_args and not scale_.is_positive_definite:
                    raise ValueError(
                        "scale[{}].is_positive_definite = {} != True".format(
                            k, scale_.is_positive_definite))
                if scale_.dtype.base_dtype != dtype:
                    raise TypeError(
                        "dtype mismatch; scale[{}].base_dtype=\"{}\" != \"{}\""
                        .format(k, scale_.dtype.base_dtype.name, dtype.name))

            self._endpoint_affine = [
                AffineLinearOperator(shift=loc_,
                                     scale=scale_,
                                     validate_args=validate_args,
                                     name="endpoint_affine_{}".format(k))
                for k, (loc_, scale_) in enumerate(zip(loc, scale))
            ]

            # TODO(jvdillon): Remove once we support k-mixtures.
            # We make this assertion here because otherwise `grid` would need to be a
            # vector not a scalar.
            if len(scale) != 2:
                raise NotImplementedError(
                    "Currently only bimixtures are supported; "
                    "len(scale)={} is not 2.".format(len(scale)))

            mix_loc = ops.convert_to_tensor(mix_loc,
                                            dtype=dtype,
                                            name="mix_loc")
            temperature = ops.convert_to_tensor(temperature,
                                                dtype=dtype,
                                                name="temperature")
            self._grid, probs = tuple(
                quadrature_fn(mix_loc / temperature, 1. / temperature,
                              quadrature_size, validate_args))

            # Note: by creating the logits as `log(prob)` we ensure that
            # `self.mixture_distribution.logits` is equivalent to
            # `math_ops.log(self.mixture_distribution.probs)`.
            self._mixture_distribution = categorical_lib.Categorical(
                logits=math_ops.log(probs),
                validate_args=validate_args,
                allow_nan_stats=allow_nan_stats)

            asserts = distribution_util.maybe_check_scalar_distribution(
                distribution, dtype, validate_args)
            if asserts:
                self._grid = control_flow_ops.with_dependencies(
                    asserts, self._grid)
            self._distribution = distribution

            self._interpolated_affine = [
                AffineLinearOperator(shift=loc_,
                                     scale=scale_,
                                     validate_args=validate_args,
                                     name="interpolated_affine_{}".format(k))
                for k, (loc_, scale_) in enumerate(
                    zip(interpolate_loc(self._grid, loc),
                        interpolate_scale(self._grid, scale)))
            ]

            [
                self._batch_shape_,
                self._batch_shape_tensor_,
                self._event_shape_,
                self._event_shape_tensor_,
            ] = determine_batch_event_shapes(self._grid, self._endpoint_affine)

            super(VectorDiffeomixture, self).__init__(
                dtype=dtype,
                # We hard-code `FULLY_REPARAMETERIZED` because when
                # `validate_args=True` we verify that indeed
                # `distribution.reparameterization_type == FULLY_REPARAMETERIZED`. A
                # distribution which is a function of only non-trainable parameters
                # also implies we can use `FULLY_REPARAMETERIZED`. However, we cannot
                # easily test for that possibility thus we use `validate_args=False`
                # as a "back-door" to allow users a way to use non
                # `FULLY_REPARAMETERIZED` distribution. In such cases IT IS THE USERS
                # RESPONSIBILITY to verify that the base distribution is a function of
                # non-trainable parameters.
                reparameterization_type=distribution_lib.FULLY_REPARAMETERIZED,
                validate_args=validate_args,
                allow_nan_stats=allow_nan_stats,
                parameters=parameters,
                graph_parents=(
                    distribution._graph_parents  # pylint: disable=protected-access
                    + [loc_ for loc_ in loc if loc_ is not None] +
                    [p for scale_ in scale for p in scale_.graph_parents]),
                name=name)
    def __init__(self,
                 mix_loc,
                 mix_scale,
                 distribution,
                 loc=None,
                 scale=None,
                 quadrature_grid_and_probs=None,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="VectorDiffeomixture"):
        """Constructs the VectorDiffeomixture on `R**k`.

    Args:
      mix_loc: `float`-like `Tensor`. Represents the `location` parameter of the
        SoftmaxNormal used for selecting one of the `K` affine transformations.
      mix_scale: `float`-like `Tensor`. Represents the `scale` parameter of the
        SoftmaxNormal used for selecting one of the `K` affine transformations.
      distribution: `tf.Distribution`-like instance. Distribution from which `d`
        iid samples are used as input to the selected affine transformation.
        Must be a scalar-batch, scalar-event distribution.  Typically
        `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is
        a function of non-trainable parameters. WARNING: If you backprop through
        a VectorDiffeomixture sample and the `distribution` is not
        `FULLY_REPARAMETERIZED` yet is a function of trainable variables, then
        the gradient will be incorrect!
      loc: Length-`K` list of `float`-type `Tensor`s. The `k`-th element
        represents the `shift` used for the `k`-th affine transformation.  If
        the `k`-th item is `None`, `loc` is implicitly `0`.  When specified,
        must have shape `[B1, ..., Bb, d]` where `b >= 0` and `d` is the event
        size.
      scale: Length-`K` list of `LinearOperator`s. Each should be
        positive-definite and operate on a `d`-dimensional vector space. The
        `k`-th element represents the `scale` used for the `k`-th affine
        transformation. `LinearOperator`s must have shape `[B1, ..., Bb, d, d]`,
        `b >= 0`, i.e., characterizes `b`-batches of `d x d` matrices
      quadrature_grid_and_probs: Python pair of `list`-like objects representing
        the sample points and the corresponding (possibly normalized) weight.
        When `None`, defaults to: `np.polynomial.hermite.hermgauss(deg=8)`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      ValueError: if `not scale or len(scale) < 2`.
      ValueError: if `len(loc) != len(scale)`
      ValueError: if `quadrature_grid_and_probs is not None` and
        `len(quadrature_grid_and_probs[0]) != len(quadrature_grid_and_probs[1])`
      ValueError: if `validate_args` and any not scale.is_positive_definite.
      TypeError: if any scale.dtype != scale[0].dtype.
      TypeError: if any loc.dtype != scale[0].dtype.
      NotImplementedError: if `len(scale) != 2`.
      ValueError: if `not distribution.is_scalar_batch`.
      ValueError: if `not distribution.is_scalar_event`.
    """
        parameters = locals()
        with ops.name_scope(name, values=[mix_loc, mix_scale]):
            if not scale or len(scale) < 2:
                raise ValueError(
                    "Must specify list (or list-like object) of scale "
                    "LinearOperators, one for each component with "
                    "num_component >= 2.")

            if loc is None:
                loc = [None] * len(scale)

            if len(loc) != len(scale):
                raise ValueError("loc/scale must be same-length lists "
                                 "(or same-length list-like objects).")

            dtype = scale[0].dtype.base_dtype

            loc = [
                ops.convert_to_tensor(
                    loc_, dtype=dtype, name="loc{}".format(k))
                if loc_ is not None else None for k, loc_ in enumerate(loc)
            ]

            for k, scale_ in enumerate(scale):
                if validate_args and not scale_.is_positive_definite:
                    raise ValueError(
                        "scale[{}].is_positive_definite = {} != True".format(
                            k, scale_.is_positive_definite))
                if scale_.dtype.base_dtype != dtype:
                    raise TypeError(
                        "dtype mismatch; scale[{}].base_dtype=\"{}\" != \"{}\""
                        .format(k, scale_.dtype.base_dtype.name, dtype.name))

            self._endpoint_affine = [
                AffineLinearOperator(shift=loc_,
                                     scale=scale_,
                                     event_ndims=1,
                                     validate_args=validate_args,
                                     name="endpoint_affine_{}".format(k))
                for k, (loc_, scale_) in enumerate(zip(loc, scale))
            ]

            # TODO(jvdillon): Remove once we support k-mixtures.
            # We make this assertion here because otherwise `grid` would need to be a
            # vector not a scalar.
            if len(scale) != 2:
                raise NotImplementedError(
                    "Currently only bimixtures are supported; "
                    "len(scale)={} is not 2.".format(len(scale)))

            if quadrature_grid_and_probs is None:
                grid, probs = np.polynomial.hermite.hermgauss(deg=8)
            else:
                grid, probs = tuple(quadrature_grid_and_probs)
                if len(grid) != len(probs):
                    raise ValueError(
                        "`quadrature_grid_and_probs` must be a `tuple` of "
                        "same-length list-like objects")
            grid = grid.astype(dtype.as_numpy_dtype)
            probs = probs.astype(dtype.as_numpy_dtype)
            probs /= np.linalg.norm(probs, ord=1)
            self._quadrature_grid = grid
            self._quadrature_probs = probs

            # Note: by creating the logits as `log(prob)` we ensure that
            # `self.mixture_distribution.logits` is equivalent to
            # `math_ops.log(self.mixture_distribution.probs)`.
            self._mixture_distribution = categorical_lib.Categorical(
                logits=np.log(probs),
                validate_args=validate_args,
                allow_nan_stats=allow_nan_stats)

            mix_loc = maybe_check_mix_param(mix_loc, "mix_loc", dtype,
                                            validate_args)
            mix_scale = maybe_check_mix_param(mix_scale, "mix_scale", dtype,
                                              validate_args)

            asserts = distribution_util.maybe_check_scalar_distribution(
                distribution, dtype, validate_args)
            if asserts:
                mix_loc = control_flow_ops.with_dependencies(asserts, mix_loc)
            self._distribution = distribution

            # shape: [B, deg]
            self._interpolate_weight = math_ops.sigmoid(mix_loc + np.sqrt(2.) *
                                                        mix_scale * grid)

            self._interpolated_affine = [
                AffineLinearOperator(shift=loc_,
                                     scale=scale_,
                                     event_ndims=1,
                                     validate_args=validate_args,
                                     name="interpolated_affine_{}".format(k))
                for k, (loc_, scale_) in enumerate(
                    zip(
                        interpolate_loc(len(self._quadrature_grid),
                                        self._interpolate_weight, loc),
                        interpolate_scale(len(self._quadrature_grid),
                                          self._interpolate_weight, scale)))
            ]

            self._batch_shape_, self._event_shape_ = determine_batch_event_shapes(
                mix_loc, mix_scale, self._endpoint_affine)

            super(VectorDiffeomixture, self).__init__(
                dtype=dtype,
                # We hard-code `FULLY_REPARAMETERIZED` because when
                # `validate_args=True` we verify that indeed
                # `distribution.reparameterization_type == FULLY_REPARAMETERIZED`. A
                # distribution which is a function of only non-trainable parameters
                # also implies we can use `FULLY_REPARAMETERIZED`. However, we cannot
                # easily test for that possibility thus we use `validate_args=False`
                # as a "back-door" to allow users a way to use non
                # `FULLY_REPARAMETERIZED` distribution. In such cases IT IS THE USERS
                # RESPONSIBILITY to verify that the base distribution is a function of
                # non-trainable parameters.
                reparameterization_type=distribution_lib.FULLY_REPARAMETERIZED,
                validate_args=validate_args,
                allow_nan_stats=allow_nan_stats,
                parameters=parameters,
                graph_parents=(
                    [mix_loc, mix_scale] + distribution._graph_parents  # pylint: disable=protected-access
                    + [loc_ for loc_ in loc if loc_ is not None] +
                    [p for scale_ in scale for p in scale_.graph_parents]),
                name=name)