Esempio n. 1
0
def interpolate_scale(grid, scale):
  """Helper which interpolates between two scales."""
  if len(scale) != 2:
    raise NotImplementedError("Currently only bimixtures are supported; "
                              "len(scale)={} is not 2.".format(len(scale)))
  deg = tensor_shape.dimension_value(grid.shape.with_rank_at_least(1)[-1])
  if deg is None:
    raise ValueError("Num quadrature grid points must be known prior "
                     "to graph execution.")
  with ops.name_scope("interpolate_scale", values=[grid]):
    return [linop_add_lib.add_operators([
        linop_scale(grid[..., k, q], s)
        for k, s in enumerate(scale)
    ])[0] for q in range(deg)]
def interpolate_scale(grid, scale):
  """Helper which interpolates between two scales."""
  if len(scale) != 2:
    raise NotImplementedError("Currently only bimixtures are supported; "
                              "len(scale)={} is not 2.".format(len(scale)))
  deg = tensor_shape.dimension_value(grid.shape.with_rank_at_least(1)[-1])
  if deg is None:
    raise ValueError("Num quadrature grid points must be known prior "
                     "to graph execution.")
  with ops.name_scope("interpolate_scale", values=[grid]):
    return [linop_add_lib.add_operators([
        linop_scale(grid[..., k, q], s)
        for k, s in enumerate(scale)
    ])[0] for q in range(deg)]
Esempio n. 3
0
    def _mean_of_covariance_given_quadrature_component(self, diag_only):
        p = self.mixture_distribution.probs

        # To compute E[Cov(Z|V)], we'll add matrices within three categories:
        # scaled-identity, diagonal, and full. Then we'll combine these at the end.
        scale_identity_multiplier = None
        diag = None
        full = None

        for k, aff in enumerate(self.interpolated_affine):
            s = aff.scale  # Just in case aff.scale has side-effects, we'll call once.
            if (s is None or isinstance(
                    s, linop_identity_lib.LinearOperatorIdentity)):
                scale_identity_multiplier = add(scale_identity_multiplier,
                                                p[..., k, array_ops.newaxis])
            elif isinstance(s,
                            linop_identity_lib.LinearOperatorScaledIdentity):
                scale_identity_multiplier = add(
                    scale_identity_multiplier, (p[..., k, array_ops.newaxis] *
                                                math_ops.square(s.multiplier)))
            elif isinstance(s, linop_diag_lib.LinearOperatorDiag):
                diag = add(diag, (p[..., k, array_ops.newaxis] *
                                  math_ops.square(s.diag_part())))
            else:
                x = (p[..., k, array_ops.newaxis, array_ops.newaxis] *
                     s.matmul(s.to_dense(), adjoint_arg=True))
                if diag_only:
                    x = array_ops.matrix_diag_part(x)
                full = add(full, x)

        # We must now account for the fact that the base distribution might have a
        # non-unity variance. Recall that, since X ~ iid Law(X_0),
        #   `Cov(SX+m) = S Cov(X) S.T = S S.T Diag(Var(X_0))`.
        # We can scale by `Var(X)` (vs `Cov(X)`) since X corresponds to `d` iid
        # samples from a scalar-event distribution.
        v = self.distribution.variance()
        if scale_identity_multiplier is not None:
            scale_identity_multiplier *= v
        if diag is not None:
            diag *= v[..., array_ops.newaxis]
        if full is not None:
            full *= v[..., array_ops.newaxis]

        if diag_only:
            # Apparently we don't need the full matrix, just the diagonal.
            r = add(diag, full)
            if r is None and scale_identity_multiplier is not None:
                ones = array_ops.ones(self.event_shape_tensor(),
                                      dtype=self.dtype)
                return scale_identity_multiplier[..., array_ops.newaxis] * ones
            return add(r, scale_identity_multiplier)

        # `None` indicates we don't know if the result is positive-definite.
        is_positive_definite = (True if all(
            aff.scale.is_positive_definite
            for aff in self.endpoint_affine) else None)

        to_add = []
        if diag is not None:
            to_add.append(
                linop_diag_lib.LinearOperatorDiag(
                    diag=diag, is_positive_definite=is_positive_definite))
        if full is not None:
            to_add.append(
                linop_full_lib.LinearOperatorFullMatrix(
                    matrix=full, is_positive_definite=is_positive_definite))
        if scale_identity_multiplier is not None:
            to_add.append(
                linop_identity_lib.LinearOperatorScaledIdentity(
                    num_rows=self.event_shape_tensor()[0],
                    multiplier=scale_identity_multiplier,
                    is_positive_definite=is_positive_definite))

        return (linop_add_lib.add_operators(to_add)[0].to_dense()
                if to_add else None)
  def _mean_of_covariance_given_quadrature_component(self, diag_only):
    p = self.mixture_distribution.probs

    # To compute E[Cov(Z|V)], we'll add matrices within three categories:
    # scaled-identity, diagonal, and full. Then we'll combine these at the end.
    scale_identity_multiplier = None
    diag = None
    full = None

    for k, aff in enumerate(self.interpolated_affine):
      s = aff.scale  # Just in case aff.scale has side-effects, we'll call once.
      if (s is None
          or isinstance(s, linop_identity_lib.LinearOperatorIdentity)):
        scale_identity_multiplier = add(scale_identity_multiplier,
                                        p[..., k, array_ops.newaxis])
      elif isinstance(s, linop_identity_lib.LinearOperatorScaledIdentity):
        scale_identity_multiplier = add(
            scale_identity_multiplier,
            (p[..., k, array_ops.newaxis] * math_ops.square(s.multiplier)))
      elif isinstance(s, linop_diag_lib.LinearOperatorDiag):
        diag = add(diag, (p[..., k, array_ops.newaxis] *
                          math_ops.square(s.diag_part())))
      else:
        x = (p[..., k, array_ops.newaxis, array_ops.newaxis] *
             s.matmul(s.to_dense(), adjoint_arg=True))
        if diag_only:
          x = array_ops.matrix_diag_part(x)
        full = add(full, x)

    # We must now account for the fact that the base distribution might have a
    # non-unity variance. Recall that, since X ~ iid Law(X_0),
    #   `Cov(SX+m) = S Cov(X) S.T = S S.T Diag(Var(X_0))`.
    # We can scale by `Var(X)` (vs `Cov(X)`) since X corresponds to `d` iid
    # samples from a scalar-event distribution.
    v = self.distribution.variance()
    if scale_identity_multiplier is not None:
      scale_identity_multiplier *= v
    if diag is not None:
      diag *= v[..., array_ops.newaxis]
    if full is not None:
      full *= v[..., array_ops.newaxis]

    if diag_only:
      # Apparently we don't need the full matrix, just the diagonal.
      r = add(diag, full)
      if r is None and scale_identity_multiplier is not None:
        ones = array_ops.ones(self.event_shape_tensor(), dtype=self.dtype)
        return scale_identity_multiplier[..., array_ops.newaxis] * ones
      return add(r, scale_identity_multiplier)

    # `None` indicates we don't know if the result is positive-definite.
    is_positive_definite = (True if all(aff.scale.is_positive_definite
                                        for aff in self.endpoint_affine)
                            else None)

    to_add = []
    if diag is not None:
      to_add.append(linop_diag_lib.LinearOperatorDiag(
          diag=diag,
          is_positive_definite=is_positive_definite))
    if full is not None:
      to_add.append(linop_full_lib.LinearOperatorFullMatrix(
          matrix=full,
          is_positive_definite=is_positive_definite))
    if scale_identity_multiplier is not None:
      to_add.append(linop_identity_lib.LinearOperatorScaledIdentity(
          num_rows=self.event_shape_tensor()[0],
          multiplier=scale_identity_multiplier,
          is_positive_definite=is_positive_definite))

    return (linop_add_lib.add_operators(to_add)[0].to_dense()
            if to_add else None)
Esempio n. 5
0
def _inverse_block_lower_triangular(block_lower_triangular_operator):
    """Inverse of LinearOperatorBlockLowerTriangular.

  We recursively apply the identity:

  ```none
  |A 0|'  =  |    A'  0|
  |B C|      |-C'BA' C'|
  ```

  where `A` is n-by-n, `B` is m-by-n, `C` is m-by-m, and `'` denotes inverse.

  This identity can be verified through multiplication:

  ```none
  |A 0||    A'  0|
  |B C||-C'BA' C'|

    = |       AA'   0|
      |BA'-CC'BA' CC'|

    = |I 0|
      |0 I|
  ```

  Args:
    block_lower_triangular_operator: Instance of
      `LinearOperatorBlockLowerTriangular`.

  Returns:
    block_lower_triangular_operator_inverse: Instance of
      `LinearOperatorBlockLowerTriangular`, the inverse of
      `block_lower_triangular_operator`.
  """
    if len(block_lower_triangular_operator.operators) == 1:
        return (
            linear_operator_block_lower_triangular.
            LinearOperatorBlockLowerTriangular(
                [[block_lower_triangular_operator.operators[0][0].inverse()]],
                is_non_singular=block_lower_triangular_operator.
                is_non_singular,
                is_self_adjoint=block_lower_triangular_operator.
                is_self_adjoint,
                is_positive_definite=(
                    block_lower_triangular_operator.is_positive_definite),
                is_square=True))

    blockwise_dim = len(block_lower_triangular_operator.operators)

    # Calculate the inverse of the `LinearOperatorBlockLowerTriangular`
    # representing all but the last row of `block_lower_triangular_operator` with
    # a recursive call (the matrix `A'` in the docstring definition).
    upper_left_inverse = (
        linear_operator_block_lower_triangular.
        LinearOperatorBlockLowerTriangular(
            block_lower_triangular_operator.operators[:-1]).inverse())

    bottom_row = block_lower_triangular_operator.operators[-1]
    bottom_right_inverse = bottom_row[-1].inverse()

    # Find the bottom row of the inverse (equal to `[-C'BA', C']` in the docstring
    # definition, where `C` is the bottom-right operator of
    # `block_lower_triangular_operator` and `B` is the set of operators in the
    # bottom row excluding `C`). To find `-C'BA'`, we first iterate over the
    # column partitions of `A'`.
    inverse_bottom_row = []
    for i in range(blockwise_dim - 1):
        # Find the `i`-th block of `BA'`.
        blocks = []
        for j in range(i, blockwise_dim - 1):
            result = bottom_row[j].matmul(upper_left_inverse.operators[j][i])
            if not any(
                    isinstance(result, op_type) for op_type in
                    linear_operator_addition.SUPPORTED_OPERATORS):
                result = linear_operator_full_matrix.LinearOperatorFullMatrix(
                    result.to_dense())
            blocks.append(result)

        summed_blocks = linear_operator_addition.add_operators(blocks)
        assert len(summed_blocks) == 1
        block = summed_blocks[0]

        # Find the `i`-th block of `-C'BA'`.
        block = bottom_right_inverse.matmul(block)
        block = linear_operator_identity.LinearOperatorScaledIdentity(
            num_rows=bottom_right_inverse.domain_dimension_tensor(),
            multiplier=math_ops.cast(-1, dtype=block.dtype)).matmul(block)
        inverse_bottom_row.append(block)

    # `C'` is the last block of the inverted linear operator.
    inverse_bottom_row.append(bottom_right_inverse)

    return (
        linear_operator_block_lower_triangular.
        LinearOperatorBlockLowerTriangular(
            upper_left_inverse.operators + [inverse_bottom_row],
            is_non_singular=block_lower_triangular_operator.is_non_singular,
            is_self_adjoint=block_lower_triangular_operator.is_self_adjoint,
            is_positive_definite=(
                block_lower_triangular_operator.is_positive_definite),
            is_square=True))
Esempio n. 6
0
def mvn_conjugate_linear_update(prior_scale,
                                linear_transformation,
                                likelihood_scale,
                                observation,
                                prior_mean=None,
                                name=None):
    """Computes a conjugate normal posterior for a Bayesian linear regression.

  We assume the following model:

  ```
  latent ~ MVN(loc=prior_mean, scale=prior_scale)
  observation ~ MVN(loc=linear_transformation.matvec(latent),
                    scale=likelihood_scale)
  ```

  For Bayesian linear regression, the `latent` represents the weights, and the
  provided `linear_transformation` is the design matrix.

  This method computes the multivariate normal
  posterior `p(latent | observation)`, using `LinearOperator`s to perform
  perform computations efficiently when the matrices involved have special
  structure.

  Args:
    prior_scale: Instance of `tf.linalg.LinearOperator` of shape
      `[..., num_features, num_features]`, specifying a
      scale matrix (any matrix `L` such that `LL' = Q` where `Q` is the
      covariance) for the prior on regression weights. May optionally be a
      float `Tensor`.
    linear_transformation: Instance of `tf.linalg.LinearOperator` of shape
      `[..., num_outputs, num_features])`, specifying a transformation of the
      latent values. May optionally be a float `Tensor`.
    likelihood_scale: Instance of `tf.linalg.LinearOperator` of shape
      `[..., num_outputs, num_outputs]` specifying a scale matrix (any matrix
      `L` such that `LL' = Q` where `Q` is the covariance) for the likelihood
      of observed targets. May optionally be a float `Tensor`.
    observation: Float `Tensor` of shape `[..., num_outputs]]), specifying the
      observed values or regression targets.
    prior_mean: Optional float `Tensor` of shape `[..., num_features]`,
      specifying the prior mean. If `None`, the prior mean is assumed to be
      zero and some computation is avoided.
      Default value: `None`.
    name: Option Python `str` name given to ops created by this function.
      Default value: 'mvn_conjugate_linear_update'.
  Returns:
    posterior_mean: Float `Tensor` of shape `[..., num_features]`, giving the
      mean of the multivariate normal posterior on the latent value.
    posterior_prec: Instance of `tf.linalg.LinearOperator` of shape
      shape `[..., num_features, num_features]`, giving the
      posterior precision (inverse covariance) matrix.

  #### Mathematical details

  Let the prior precision be denoted by
  `prior_prec = prior_scale.matmul(prior_scale, adjoint_arg=True).inverse()`
  and the likelihood precision by `likelihood_prec = likelihood_scale.matmul(
  likelihood_scale, adjoint_arg=True).inverse()`. Then the posterior
  `p(latent | observation)` is multivariate normal with precision

  ```python
  posterior_prec = (
    linear_transformation.matmul(
      likelihood_prec.matmul(linear_transformation), adjoint=True) +
     prior_prec)
  ```

  and mean

  ```python
  posterior_mean = posterior_prec.solvevec(
    linear_transformation.matvec(
      likelihood_prec.matvec(observation) +
      prior_prec.matvec(prior_mean)))
  ```

  """
    with tf.name_scope(name or 'mvn_conjugate_linear_update'):

        def ensure_is_linop(x):
            return x if hasattr(
                x, 'solve') else tf.linalg.LinearOperatorFullMatrix(x)

        prior_scale = ensure_is_linop(prior_scale)
        likelihood_scale = ensure_is_linop(likelihood_scale)
        linear_transformation = ensure_is_linop(linear_transformation)

        observation = tf.convert_to_tensor(observation, name='observation')
        if prior_mean is not None:
            prior_mean = tf.convert_to_tensor(prior_mean, name='prior_mean')

        prior_prec_chol = prior_scale.inverse()
        prior_prec = prior_prec_chol.matmul(prior_prec_chol, adjoint=True)

        # Compute `evidence_prec = X.T @ Q^-1 @ X`, with
        #  Q = likelihood covariance (`likelihood_scale @ likelihood_scale.T`)
        #  X = linear transformation.
        scaled_transform = likelihood_scale.solve(linear_transformation)
        evidence_prec = scaled_transform.matmul(scaled_transform, adjoint=True)

        try:  # Attempt to add prior + evidence efficiently by exploiting structure.
            sum_terms = linear_operator_addition.add_operators(
                [prior_prec,
                 evidence_prec])  # Unregistered linops raise a TypeError.
            if len(sum_terms) > 1:
                raise TypeError(
                    'LinearOperator addition failed to reduce terms.')
            posterior_prec = sum_terms[0]
        except TypeError:  # We have to do things the hard way.
            posterior_prec = tf.linalg.LinearOperatorFullMatrix(
                prior_prec.to_dense() + evidence_prec.to_dense())

        # Hint to LinearOperator that precision matrices are always PSD.
        # pylint: disable=protected-access
        posterior_prec._is_positive_definite = True
        posterior_prec._is_self_adjoint = True
        posterior_prec._is_square = True
        # pylint: enable=protected-access

        # The posterior mean is a weighted combination of the prior mean and the
        # observed value, scaled by the posterior covariance.
        prior_plus_observed_value = scaled_transform.matvec(
            likelihood_scale.solvevec(observation), adjoint=True)
        if prior_mean is not None:
            prior_plus_observed_value += prior_prec.matvec(prior_mean)
        posterior_mean = posterior_prec.solvevec(prior_plus_observed_value)

        return posterior_mean, posterior_prec