def interpolate_scale(grid, scale): """Helper which interpolates between two scales.""" if len(scale) != 2: raise NotImplementedError("Currently only bimixtures are supported; " "len(scale)={} is not 2.".format(len(scale))) deg = tensor_shape.dimension_value(grid.shape.with_rank_at_least(1)[-1]) if deg is None: raise ValueError("Num quadrature grid points must be known prior " "to graph execution.") with ops.name_scope("interpolate_scale", values=[grid]): return [linop_add_lib.add_operators([ linop_scale(grid[..., k, q], s) for k, s in enumerate(scale) ])[0] for q in range(deg)]
def interpolate_scale(grid, scale): """Helper which interpolates between two scales.""" if len(scale) != 2: raise NotImplementedError("Currently only bimixtures are supported; " "len(scale)={} is not 2.".format(len(scale))) deg = tensor_shape.dimension_value(grid.shape.with_rank_at_least(1)[-1]) if deg is None: raise ValueError("Num quadrature grid points must be known prior " "to graph execution.") with ops.name_scope("interpolate_scale", values=[grid]): return [linop_add_lib.add_operators([ linop_scale(grid[..., k, q], s) for k, s in enumerate(scale) ])[0] for q in range(deg)]
def _mean_of_covariance_given_quadrature_component(self, diag_only): p = self.mixture_distribution.probs # To compute E[Cov(Z|V)], we'll add matrices within three categories: # scaled-identity, diagonal, and full. Then we'll combine these at the end. scale_identity_multiplier = None diag = None full = None for k, aff in enumerate(self.interpolated_affine): s = aff.scale # Just in case aff.scale has side-effects, we'll call once. if (s is None or isinstance( s, linop_identity_lib.LinearOperatorIdentity)): scale_identity_multiplier = add(scale_identity_multiplier, p[..., k, array_ops.newaxis]) elif isinstance(s, linop_identity_lib.LinearOperatorScaledIdentity): scale_identity_multiplier = add( scale_identity_multiplier, (p[..., k, array_ops.newaxis] * math_ops.square(s.multiplier))) elif isinstance(s, linop_diag_lib.LinearOperatorDiag): diag = add(diag, (p[..., k, array_ops.newaxis] * math_ops.square(s.diag_part()))) else: x = (p[..., k, array_ops.newaxis, array_ops.newaxis] * s.matmul(s.to_dense(), adjoint_arg=True)) if diag_only: x = array_ops.matrix_diag_part(x) full = add(full, x) # We must now account for the fact that the base distribution might have a # non-unity variance. Recall that, since X ~ iid Law(X_0), # `Cov(SX+m) = S Cov(X) S.T = S S.T Diag(Var(X_0))`. # We can scale by `Var(X)` (vs `Cov(X)`) since X corresponds to `d` iid # samples from a scalar-event distribution. v = self.distribution.variance() if scale_identity_multiplier is not None: scale_identity_multiplier *= v if diag is not None: diag *= v[..., array_ops.newaxis] if full is not None: full *= v[..., array_ops.newaxis] if diag_only: # Apparently we don't need the full matrix, just the diagonal. r = add(diag, full) if r is None and scale_identity_multiplier is not None: ones = array_ops.ones(self.event_shape_tensor(), dtype=self.dtype) return scale_identity_multiplier[..., array_ops.newaxis] * ones return add(r, scale_identity_multiplier) # `None` indicates we don't know if the result is positive-definite. is_positive_definite = (True if all( aff.scale.is_positive_definite for aff in self.endpoint_affine) else None) to_add = [] if diag is not None: to_add.append( linop_diag_lib.LinearOperatorDiag( diag=diag, is_positive_definite=is_positive_definite)) if full is not None: to_add.append( linop_full_lib.LinearOperatorFullMatrix( matrix=full, is_positive_definite=is_positive_definite)) if scale_identity_multiplier is not None: to_add.append( linop_identity_lib.LinearOperatorScaledIdentity( num_rows=self.event_shape_tensor()[0], multiplier=scale_identity_multiplier, is_positive_definite=is_positive_definite)) return (linop_add_lib.add_operators(to_add)[0].to_dense() if to_add else None)
def _mean_of_covariance_given_quadrature_component(self, diag_only): p = self.mixture_distribution.probs # To compute E[Cov(Z|V)], we'll add matrices within three categories: # scaled-identity, diagonal, and full. Then we'll combine these at the end. scale_identity_multiplier = None diag = None full = None for k, aff in enumerate(self.interpolated_affine): s = aff.scale # Just in case aff.scale has side-effects, we'll call once. if (s is None or isinstance(s, linop_identity_lib.LinearOperatorIdentity)): scale_identity_multiplier = add(scale_identity_multiplier, p[..., k, array_ops.newaxis]) elif isinstance(s, linop_identity_lib.LinearOperatorScaledIdentity): scale_identity_multiplier = add( scale_identity_multiplier, (p[..., k, array_ops.newaxis] * math_ops.square(s.multiplier))) elif isinstance(s, linop_diag_lib.LinearOperatorDiag): diag = add(diag, (p[..., k, array_ops.newaxis] * math_ops.square(s.diag_part()))) else: x = (p[..., k, array_ops.newaxis, array_ops.newaxis] * s.matmul(s.to_dense(), adjoint_arg=True)) if diag_only: x = array_ops.matrix_diag_part(x) full = add(full, x) # We must now account for the fact that the base distribution might have a # non-unity variance. Recall that, since X ~ iid Law(X_0), # `Cov(SX+m) = S Cov(X) S.T = S S.T Diag(Var(X_0))`. # We can scale by `Var(X)` (vs `Cov(X)`) since X corresponds to `d` iid # samples from a scalar-event distribution. v = self.distribution.variance() if scale_identity_multiplier is not None: scale_identity_multiplier *= v if diag is not None: diag *= v[..., array_ops.newaxis] if full is not None: full *= v[..., array_ops.newaxis] if diag_only: # Apparently we don't need the full matrix, just the diagonal. r = add(diag, full) if r is None and scale_identity_multiplier is not None: ones = array_ops.ones(self.event_shape_tensor(), dtype=self.dtype) return scale_identity_multiplier[..., array_ops.newaxis] * ones return add(r, scale_identity_multiplier) # `None` indicates we don't know if the result is positive-definite. is_positive_definite = (True if all(aff.scale.is_positive_definite for aff in self.endpoint_affine) else None) to_add = [] if diag is not None: to_add.append(linop_diag_lib.LinearOperatorDiag( diag=diag, is_positive_definite=is_positive_definite)) if full is not None: to_add.append(linop_full_lib.LinearOperatorFullMatrix( matrix=full, is_positive_definite=is_positive_definite)) if scale_identity_multiplier is not None: to_add.append(linop_identity_lib.LinearOperatorScaledIdentity( num_rows=self.event_shape_tensor()[0], multiplier=scale_identity_multiplier, is_positive_definite=is_positive_definite)) return (linop_add_lib.add_operators(to_add)[0].to_dense() if to_add else None)
def _inverse_block_lower_triangular(block_lower_triangular_operator): """Inverse of LinearOperatorBlockLowerTriangular. We recursively apply the identity: ```none |A 0|' = | A' 0| |B C| |-C'BA' C'| ``` where `A` is n-by-n, `B` is m-by-n, `C` is m-by-m, and `'` denotes inverse. This identity can be verified through multiplication: ```none |A 0|| A' 0| |B C||-C'BA' C'| = | AA' 0| |BA'-CC'BA' CC'| = |I 0| |0 I| ``` Args: block_lower_triangular_operator: Instance of `LinearOperatorBlockLowerTriangular`. Returns: block_lower_triangular_operator_inverse: Instance of `LinearOperatorBlockLowerTriangular`, the inverse of `block_lower_triangular_operator`. """ if len(block_lower_triangular_operator.operators) == 1: return ( linear_operator_block_lower_triangular. LinearOperatorBlockLowerTriangular( [[block_lower_triangular_operator.operators[0][0].inverse()]], is_non_singular=block_lower_triangular_operator. is_non_singular, is_self_adjoint=block_lower_triangular_operator. is_self_adjoint, is_positive_definite=( block_lower_triangular_operator.is_positive_definite), is_square=True)) blockwise_dim = len(block_lower_triangular_operator.operators) # Calculate the inverse of the `LinearOperatorBlockLowerTriangular` # representing all but the last row of `block_lower_triangular_operator` with # a recursive call (the matrix `A'` in the docstring definition). upper_left_inverse = ( linear_operator_block_lower_triangular. LinearOperatorBlockLowerTriangular( block_lower_triangular_operator.operators[:-1]).inverse()) bottom_row = block_lower_triangular_operator.operators[-1] bottom_right_inverse = bottom_row[-1].inverse() # Find the bottom row of the inverse (equal to `[-C'BA', C']` in the docstring # definition, where `C` is the bottom-right operator of # `block_lower_triangular_operator` and `B` is the set of operators in the # bottom row excluding `C`). To find `-C'BA'`, we first iterate over the # column partitions of `A'`. inverse_bottom_row = [] for i in range(blockwise_dim - 1): # Find the `i`-th block of `BA'`. blocks = [] for j in range(i, blockwise_dim - 1): result = bottom_row[j].matmul(upper_left_inverse.operators[j][i]) if not any( isinstance(result, op_type) for op_type in linear_operator_addition.SUPPORTED_OPERATORS): result = linear_operator_full_matrix.LinearOperatorFullMatrix( result.to_dense()) blocks.append(result) summed_blocks = linear_operator_addition.add_operators(blocks) assert len(summed_blocks) == 1 block = summed_blocks[0] # Find the `i`-th block of `-C'BA'`. block = bottom_right_inverse.matmul(block) block = linear_operator_identity.LinearOperatorScaledIdentity( num_rows=bottom_right_inverse.domain_dimension_tensor(), multiplier=math_ops.cast(-1, dtype=block.dtype)).matmul(block) inverse_bottom_row.append(block) # `C'` is the last block of the inverted linear operator. inverse_bottom_row.append(bottom_right_inverse) return ( linear_operator_block_lower_triangular. LinearOperatorBlockLowerTriangular( upper_left_inverse.operators + [inverse_bottom_row], is_non_singular=block_lower_triangular_operator.is_non_singular, is_self_adjoint=block_lower_triangular_operator.is_self_adjoint, is_positive_definite=( block_lower_triangular_operator.is_positive_definite), is_square=True))
def mvn_conjugate_linear_update(prior_scale, linear_transformation, likelihood_scale, observation, prior_mean=None, name=None): """Computes a conjugate normal posterior for a Bayesian linear regression. We assume the following model: ``` latent ~ MVN(loc=prior_mean, scale=prior_scale) observation ~ MVN(loc=linear_transformation.matvec(latent), scale=likelihood_scale) ``` For Bayesian linear regression, the `latent` represents the weights, and the provided `linear_transformation` is the design matrix. This method computes the multivariate normal posterior `p(latent | observation)`, using `LinearOperator`s to perform perform computations efficiently when the matrices involved have special structure. Args: prior_scale: Instance of `tf.linalg.LinearOperator` of shape `[..., num_features, num_features]`, specifying a scale matrix (any matrix `L` such that `LL' = Q` where `Q` is the covariance) for the prior on regression weights. May optionally be a float `Tensor`. linear_transformation: Instance of `tf.linalg.LinearOperator` of shape `[..., num_outputs, num_features])`, specifying a transformation of the latent values. May optionally be a float `Tensor`. likelihood_scale: Instance of `tf.linalg.LinearOperator` of shape `[..., num_outputs, num_outputs]` specifying a scale matrix (any matrix `L` such that `LL' = Q` where `Q` is the covariance) for the likelihood of observed targets. May optionally be a float `Tensor`. observation: Float `Tensor` of shape `[..., num_outputs]]), specifying the observed values or regression targets. prior_mean: Optional float `Tensor` of shape `[..., num_features]`, specifying the prior mean. If `None`, the prior mean is assumed to be zero and some computation is avoided. Default value: `None`. name: Option Python `str` name given to ops created by this function. Default value: 'mvn_conjugate_linear_update'. Returns: posterior_mean: Float `Tensor` of shape `[..., num_features]`, giving the mean of the multivariate normal posterior on the latent value. posterior_prec: Instance of `tf.linalg.LinearOperator` of shape shape `[..., num_features, num_features]`, giving the posterior precision (inverse covariance) matrix. #### Mathematical details Let the prior precision be denoted by `prior_prec = prior_scale.matmul(prior_scale, adjoint_arg=True).inverse()` and the likelihood precision by `likelihood_prec = likelihood_scale.matmul( likelihood_scale, adjoint_arg=True).inverse()`. Then the posterior `p(latent | observation)` is multivariate normal with precision ```python posterior_prec = ( linear_transformation.matmul( likelihood_prec.matmul(linear_transformation), adjoint=True) + prior_prec) ``` and mean ```python posterior_mean = posterior_prec.solvevec( linear_transformation.matvec( likelihood_prec.matvec(observation) + prior_prec.matvec(prior_mean))) ``` """ with tf.name_scope(name or 'mvn_conjugate_linear_update'): def ensure_is_linop(x): return x if hasattr( x, 'solve') else tf.linalg.LinearOperatorFullMatrix(x) prior_scale = ensure_is_linop(prior_scale) likelihood_scale = ensure_is_linop(likelihood_scale) linear_transformation = ensure_is_linop(linear_transformation) observation = tf.convert_to_tensor(observation, name='observation') if prior_mean is not None: prior_mean = tf.convert_to_tensor(prior_mean, name='prior_mean') prior_prec_chol = prior_scale.inverse() prior_prec = prior_prec_chol.matmul(prior_prec_chol, adjoint=True) # Compute `evidence_prec = X.T @ Q^-1 @ X`, with # Q = likelihood covariance (`likelihood_scale @ likelihood_scale.T`) # X = linear transformation. scaled_transform = likelihood_scale.solve(linear_transformation) evidence_prec = scaled_transform.matmul(scaled_transform, adjoint=True) try: # Attempt to add prior + evidence efficiently by exploiting structure. sum_terms = linear_operator_addition.add_operators( [prior_prec, evidence_prec]) # Unregistered linops raise a TypeError. if len(sum_terms) > 1: raise TypeError( 'LinearOperator addition failed to reduce terms.') posterior_prec = sum_terms[0] except TypeError: # We have to do things the hard way. posterior_prec = tf.linalg.LinearOperatorFullMatrix( prior_prec.to_dense() + evidence_prec.to_dense()) # Hint to LinearOperator that precision matrices are always PSD. # pylint: disable=protected-access posterior_prec._is_positive_definite = True posterior_prec._is_self_adjoint = True posterior_prec._is_square = True # pylint: enable=protected-access # The posterior mean is a weighted combination of the prior mean and the # observed value, scaled by the posterior covariance. prior_plus_observed_value = scaled_transform.matvec( likelihood_scale.solvevec(observation), adjoint=True) if prior_mean is not None: prior_plus_observed_value += prior_prec.matvec(prior_mean) posterior_mean = posterior_prec.solvevec(prior_plus_observed_value) return posterior_mean, posterior_prec