def variance(self, name="variance"): """Variance of the Wishart distribution. This function should not be confused with the covariance of the Wishart. The covariance matrix would have shape `q x q` where, `q = dimension * (dimension+1) / 2` and having elements corresponding to some mapping from a lower-triangular matrix to a vector-space. This function returns the diagonal of the Covariance matrix but shaped as a `dimension x dimension` matrix. Args: name: The name of this op. Returns: variance: `Tensor` of dtype `self.dtype`. """ with ops.name_scope(self.name): with ops.name_scope(name, values=list(self.inputs.values())): x = math_ops.sqrt(self.df) * self.scale_operator_pd.to_dense() d = array_ops.expand_dims(array_ops.batch_matrix_diag_part(x), -1) v = math_ops.square(x) + math_ops.batch_matmul(d, d, adj_y=True) if self.cholesky_input_output_matrices: return linalg_ops.batch_cholesky(v) else: return v
def variance(self, name='variance'): """Variance of the Wishart distribution. This function should not be confused with the covariance of the Wishart. The covariance matrix would have shape `q x q` where, `q = dimension * (dimension+1) / 2` and having elements corresponding to some mapping from a lower-triangular matrix to a vector-space. This function returns the diagonal of the Covariance matrix but shaped as a `dimension x dimension` matrix. Args: name: The name of this op. Returns: variance: `Tensor` of dtype `self.dtype`. """ with ops.name_scope(self.name): with ops.name_scope(name, values=list(self.inputs.values())): x = math_ops.sqrt(self.df) * self.scale_operator_pd.to_dense() d = array_ops.expand_dims(array_ops.batch_matrix_diag_part(x), -1) v = math_ops.square(x) + math_ops.batch_matmul(d, d, adj_y=True) if self.cholesky_input_output_matrices: return linalg_ops.batch_cholesky(v) else: return v
def _variance(self): x = math_ops.sqrt(self.df) * self.scale_operator_pd.to_dense() d = array_ops.expand_dims(array_ops.batch_matrix_diag_part(x), -1) v = math_ops.square(x) + math_ops.batch_matmul(d, d, adj_y=True) if self.cholesky_input_output_matrices: return linalg_ops.batch_cholesky(v) return v
def std(self, name='std'): """Standard deviation of the Wishart distribution.""" with ops.name_scope(self.name): with ops.name_scope(name, values=list(self.inputs.values())): if self.cholesky_input_output_matrices: raise ValueError( 'Computing std. dev. when is cholesky_input_output_matrices=True ' 'does not make sense.') return linalg_ops.batch_cholesky(self.variance())
def std(self, name="std"): """Standard deviation of the Wishart distribution.""" with ops.name_scope(self.name): with ops.name_scope(name, values=list(self.inputs.values())): if self.cholesky_input_output_matrices: raise ValueError( "Computing std. dev. when is cholesky_input_output_matrices=True " "does not make sense." ) return linalg_ops.batch_cholesky(self.variance())
def __init__(self, matrix, verify_pd=True, name='OperatorPDFull'): """Initialize an OperatorPDFull. Args: matrix: Shape `[N1,...,Nb, k, k]` tensor with `b >= 0`, `k >= 1`. The last two dimensions should be `k x k` symmetric positive definite matrices. verify_pd: Whether to check that `matrix` is symmetric positive definite. If `verify_pd` is `False`, correct behavior is not guaranteed. name: A name to prepend to all ops created by this class. """ with ops.name_scope(name): with ops.name_scope('init', values=[matrix]): matrix = ops.convert_to_tensor(matrix) # Check symmetric here. Positivity will be verified by checking the # diagonal of the Cholesky factor inside the parent class. The Cholesky # factorization .batch_cholesky() does not always fail for non PSD # matrices, so don't rely on that. if verify_pd: matrix = _check_symmetric(matrix) chol = linalg_ops.batch_cholesky(matrix) super(OperatorPDFull, self).__init__(chol, verify_pd=verify_pd)
def _chol_capacitance(self, batch_mode): """Cholesky factorization of the capacitance term.""" # Cholesky factor for (D^{-1} + V^T M^{-1} V), which is sometimes # known as the "capacitance" matrix. # self._operator will use batch if need be. Automatically. We cannot force # that here. # M^{-1} V minv_v = self._operator.solve(self._v) # V^T M^{-1} V if batch_mode: vt_minv_v = math_ops.batch_matmul(self._v, minv_v, adj_x=True) else: vt_minv_v = math_ops.matmul(self._v, minv_v, transpose_a=True) # D^{-1} + V^T M^{-1} V capacitance = self._diag_inv_operator.add_to_tensor(vt_minv_v) # Cholesky[D^{-1} + V^T M^{-1} V] if batch_mode: return linalg_ops.batch_cholesky(capacitance) else: return linalg_ops.cholesky(capacitance)
def __init__(self, mu, sigma=None, sigma_chol=None, name=None): """Multivariate Normal distributions on `R^k`. User must provide means `mu`, which are tensors of rank `N+1` (`N >= 0`) with the last dimension having length `k`. User must provide exactly one of `sigma` (the covariance matrices) or `sigma_chol` (the cholesky decompositions of the covariance matrices). `sigma` or `sigma_chol` must be of rank `N+2`. The last two dimensions must both have length `k`. The first `N` dimensions correspond to batch indices. If `sigma_chol` is not provided, the batch cholesky factorization of `sigma` is calculated for you. The shapes of `mu` and `sigma` must match for the first `N` dimensions. Regardless of which parameter is provided, the covariance matrices must all be **positive definite** (an error is raised if one of them is not). Args: mu: (N+1)-D. `float` or `double` tensor, the means of the distributions. sigma: (N+2)-D. (optional) `float` or `double` tensor, the covariances of the distribution(s). The first `N+1` dimensions must match those of `mu`. Must be batch-positive-definite. sigma_chol: (N+2)-D. (optional) `float` or `double` tensor, a lower-triangular factorization of `sigma` (`sigma = sigma_chol . sigma_chol^*`). The first `N+1` dimensions must match those of `mu`. The tensor itself need not be batch lower triangular: we ignore the upper triangular part. However, the batch diagonals must be positive (i.e., sigma_chol must be batch-positive-definite). name: The name to give Ops created by the initializer. Raises: ValueError: if neither sigma nor sigma_chol is provided. TypeError: if mu and sigma (resp. sigma_chol) are different dtypes. """ if (sigma is None) == (sigma_chol is None): raise ValueError( "Exactly one of sigma and sigma_chol must be provided") with ops.op_scope([mu, sigma, sigma_chol], name, "MultivariateNormal"): sigma_or_half = sigma_chol if sigma is None else sigma mu = ops.convert_to_tensor(mu) sigma_or_half = ops.convert_to_tensor(sigma_or_half) contrib_tensor_util.assert_same_float_dtype((mu, sigma_or_half)) with ops.control_dependencies( [_assert_compatible_shapes(mu, sigma_or_half)]): mu = array_ops.identity(mu, name="mu") # Store the dimensionality of the MVNs self._k = array_ops.gather(array_ops.shape(mu), array_ops.rank(mu) - 1) if sigma_chol is not None: # Ensure we only keep the lower triangular part. sigma_chol = array_ops.batch_matrix_band_part(sigma_chol, num_lower=-1, num_upper=0) sigma_det = _determinant_from_sigma_chol(sigma_chol) with ops.control_dependencies( [_assert_batch_positive_definite(sigma_chol)]): self._sigma = math_ops.batch_matmul(sigma_chol, sigma_chol, adj_y=True, name="sigma") self._sigma_chol = array_ops.identity( sigma_chol, "sigma_chol") self._sigma_det = array_ops.identity( sigma_det, "sigma_det") self._mu = array_ops.identity(mu, "mu") else: # sigma is not None sigma_chol = linalg_ops.batch_cholesky(sigma) sigma_det = _determinant_from_sigma_chol(sigma_chol) # batch_cholesky checks for PSD; so we can just use it here. with ops.control_dependencies([sigma_chol]): self._sigma = array_ops.identity(sigma, "sigma") self._sigma_chol = array_ops.identity( sigma_chol, "sigma_chol") self._sigma_det = array_ops.identity( sigma_det, "sigma_det") self._mu = array_ops.identity(mu, "mu")
def __init__(self, mu, sigma=None, sigma_chol=None, name=None): """Multivariate Normal distributions on `R^k`. User must provide means `mu`, which are tensors of rank `N+1` (`N >= 0`) with the last dimension having length `k`. User must provide exactly one of `sigma` (the covariance matrices) or `sigma_chol` (the cholesky decompositions of the covariance matrices). `sigma` or `sigma_chol` must be of rank `N+2`. The last two dimensions must both have length `k`. The first `N` dimensions correspond to batch indices. If `sigma_chol` is not provided, the batch cholesky factorization of `sigma` is calculated for you. The shapes of `mu` and `sigma` must match for the first `N` dimensions. Regardless of which parameter is provided, the covariance matrices must all be **positive definite** (an error is raised if one of them is not). Args: mu: (N+1)-D. `float` or `double` tensor, the means of the distributions. sigma: (N+2)-D. (optional) `float` or `double` tensor, the covariances of the distribution(s). The first `N+1` dimensions must match those of `mu`. Must be batch-positive-definite. sigma_chol: (N+2)-D. (optional) `float` or `double` tensor, a lower-triangular factorization of `sigma` (`sigma = sigma_chol . sigma_chol^*`). The first `N+1` dimensions must match those of `mu`. The tensor itself need not be batch lower triangular: we ignore the upper triangular part. However, the batch diagonals must be positive (i.e., sigma_chol must be batch-positive-definite). name: The name to give Ops created by the initializer. Raises: ValueError: if neither sigma nor sigma_chol is provided. TypeError: if mu and sigma (resp. sigma_chol) are different dtypes. """ if (sigma is None) == (sigma_chol is None): raise ValueError("Exactly one of sigma and sigma_chol must be provided") with ops.op_scope([mu, sigma, sigma_chol], name, "MultivariateNormal"): sigma_or_half = sigma_chol if sigma is None else sigma mu = ops.convert_to_tensor(mu) sigma_or_half = ops.convert_to_tensor(sigma_or_half) contrib_tensor_util.assert_same_float_dtype((mu, sigma_or_half)) with ops.control_dependencies([ _assert_compatible_shapes(mu, sigma_or_half)]): mu = array_ops.identity(mu, name="mu") # Store the dimensionality of the MVNs self._k = array_ops.gather(array_ops.shape(mu), array_ops.rank(mu) - 1) if sigma_chol is not None: # Ensure we only keep the lower triangular part. sigma_chol = array_ops.batch_matrix_band_part( sigma_chol, num_lower=-1, num_upper=0) sigma_det = _determinant_from_sigma_chol(sigma_chol) with ops.control_dependencies([ _assert_batch_positive_definite(sigma_chol)]): self._sigma = math_ops.batch_matmul( sigma_chol, sigma_chol, adj_y=True, name="sigma") self._sigma_chol = array_ops.identity(sigma_chol, "sigma_chol") self._sigma_det = array_ops.identity(sigma_det, "sigma_det") self._mu = array_ops.identity(mu, "mu") else: # sigma is not None sigma_chol = linalg_ops.batch_cholesky(sigma) sigma_det = _determinant_from_sigma_chol(sigma_chol) # batch_cholesky checks for PSD; so we can just use it here. with ops.control_dependencies([sigma_chol]): self._sigma = array_ops.identity(sigma, "sigma") self._sigma_chol = array_ops.identity(sigma_chol, "sigma_chol") self._sigma_det = array_ops.identity(sigma_det, "sigma_det") self._mu = array_ops.identity(mu, "mu")
def log_prob(self, x, name='log_prob'): """Log of the probability density/mass function. Args: x: `float` or `double` `Tensor`. name: The name to give this op. Returns: log_prob: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type `self.dtype`. """ with ops.name_scope(self.name): with ops.name_scope(name, values=[x] + list(self.inputs.values())): x = ops.convert_to_tensor(x, name='x') contrib_tensor_util.assert_same_float_dtype( (self.scale_operator_pd, x)) if self.cholesky_input_output_matrices: x_sqrt = x else: # Complexity: O(nbk^3) x_sqrt = linalg_ops.batch_cholesky(x) batch_shape = self.batch_shape() event_shape = self.event_shape() ndims = array_ops.rank(x_sqrt) # sample_ndims = ndims - batch_ndims - event_ndims sample_ndims = ndims - array_ops.shape(batch_shape)[0] - 2 sample_shape = array_ops.slice( array_ops.shape(x_sqrt), [0], [sample_ndims]) # We need to be able to pre-multiply each matrix by its corresponding # batch scale matrix. Since a Distribution Tensor supports multiple # samples per batch, this means we need to reshape the input matrix `x` # so that the first b dimensions are batch dimensions and the last two # are of shape [dimension, dimensions*number_of_samples]. Doing these # gymnastics allows us to do a batch_solve. # # After we're done with sqrt_solve (the batch operation) we need to undo # this reshaping so what we're left with is a Tensor partitionable by # sample, batch, event dimensions. # Complexity: O(nbk^2) since transpose must access every element. scale_sqrt_inv_x_sqrt = x_sqrt perm = array_ops.concat(0, (math_ops.range(sample_ndims, ndims), math_ops.range(0, sample_ndims))) scale_sqrt_inv_x_sqrt = array_ops.transpose(scale_sqrt_inv_x_sqrt, perm) shape = array_ops.concat( 0, (batch_shape, (math_ops.cast(self.dimension, dtype=dtypes.int32), -1))) scale_sqrt_inv_x_sqrt = array_ops.reshape(scale_sqrt_inv_x_sqrt, shape) # Complexity: O(nbM*k) where M is the complexity of the operator solving # a vector system. E.g., for OperatorPDDiag, each solve is O(k), so # this complexity is O(nbk^2). For OperatorPDCholesky, each solve is # O(k^2) so this step has complexity O(nbk^3). scale_sqrt_inv_x_sqrt = self.scale_operator_pd.sqrt_solve( scale_sqrt_inv_x_sqrt) # Undo make batch-op ready. # Complexity: O(nbk^2) shape = array_ops.concat(0, (batch_shape, event_shape, sample_shape)) scale_sqrt_inv_x_sqrt = array_ops.reshape(scale_sqrt_inv_x_sqrt, shape) perm = array_ops.concat(0, (math_ops.range(ndims - sample_ndims, ndims), math_ops.range(0, ndims - sample_ndims))) scale_sqrt_inv_x_sqrt = array_ops.transpose(scale_sqrt_inv_x_sqrt, perm) # Write V = SS', X = LL'. Then: # tr[inv(V) X] = tr[inv(S)' inv(S) L L'] # = tr[inv(S) L L' inv(S)'] # = tr[(inv(S) L) (inv(S) L)'] # = sum_{ik} (inv(S) L)_{ik}^2 # The second equality follows from the cyclic permutation property. # Complexity: O(nbk^2) trace_scale_inv_x = math_ops.reduce_sum( math_ops.square(scale_sqrt_inv_x_sqrt), reduction_indices=[-2, -1]) # Complexity: O(nbk) half_log_det_x = math_ops.reduce_sum( math_ops.log(array_ops.batch_matrix_diag_part(x_sqrt)), reduction_indices=[-1]) # Complexity: O(nbk^2) log_prob = ((self.df - self.dimension - 1.) * half_log_det_x - 0.5 * trace_scale_inv_x - self.log_normalizing_constant()) # Set shape hints. # Try to merge what we know from the input then what we know from the # parameters of this distribution. if x.get_shape().ndims is not None: log_prob.set_shape(x.get_shape()[:-2]) if (log_prob.get_shape().ndims is not None and self.get_batch_shape().ndims is not None and self.get_batch_shape().ndims > 0): log_prob.get_shape()[-self.get_batch_shape().ndims:].merge_with( self.get_batch_shape()) return log_prob
def _std(self): if self.cholesky_input_output_matrices: raise ValueError( "Computing std. dev. when is cholesky_input_output_matrices=True " "does not make sense.") return linalg_ops.batch_cholesky(self.variance())
def _log_prob(self, x): if self.cholesky_input_output_matrices: x_sqrt = x else: # Complexity: O(nbk^3) x_sqrt = linalg_ops.batch_cholesky(x) batch_shape = self.batch_shape() event_shape = self.event_shape() ndims = array_ops.rank(x_sqrt) # sample_ndims = ndims - batch_ndims - event_ndims sample_ndims = ndims - array_ops.shape(batch_shape)[0] - 2 sample_shape = array_ops.slice( array_ops.shape(x_sqrt), [0], [sample_ndims]) # We need to be able to pre-multiply each matrix by its corresponding # batch scale matrix. Since a Distribution Tensor supports multiple # samples per batch, this means we need to reshape the input matrix `x` # so that the first b dimensions are batch dimensions and the last two # are of shape [dimension, dimensions*number_of_samples]. Doing these # gymnastics allows us to do a batch_solve. # # After we're done with sqrt_solve (the batch operation) we need to undo # this reshaping so what we're left with is a Tensor partitionable by # sample, batch, event dimensions. # Complexity: O(nbk^2) since transpose must access every element. scale_sqrt_inv_x_sqrt = x_sqrt perm = array_ops.concat(0, (math_ops.range(sample_ndims, ndims), math_ops.range(0, sample_ndims))) scale_sqrt_inv_x_sqrt = array_ops.transpose(scale_sqrt_inv_x_sqrt, perm) shape = array_ops.concat( 0, (batch_shape, (math_ops.cast(self.dimension, dtype=dtypes.int32), -1))) scale_sqrt_inv_x_sqrt = array_ops.reshape(scale_sqrt_inv_x_sqrt, shape) # Complexity: O(nbM*k) where M is the complexity of the operator solving # a vector system. E.g., for OperatorPDDiag, each solve is O(k), so # this complexity is O(nbk^2). For OperatorPDCholesky, each solve is # O(k^2) so this step has complexity O(nbk^3). scale_sqrt_inv_x_sqrt = self.scale_operator_pd.sqrt_solve( scale_sqrt_inv_x_sqrt) # Undo make batch-op ready. # Complexity: O(nbk^2) shape = array_ops.concat(0, (batch_shape, event_shape, sample_shape)) scale_sqrt_inv_x_sqrt = array_ops.reshape(scale_sqrt_inv_x_sqrt, shape) perm = array_ops.concat(0, (math_ops.range(ndims - sample_ndims, ndims), math_ops.range(0, ndims - sample_ndims))) scale_sqrt_inv_x_sqrt = array_ops.transpose(scale_sqrt_inv_x_sqrt, perm) # Write V = SS', X = LL'. Then: # tr[inv(V) X] = tr[inv(S)' inv(S) L L'] # = tr[inv(S) L L' inv(S)'] # = tr[(inv(S) L) (inv(S) L)'] # = sum_{ik} (inv(S) L)_{ik}^2 # The second equality follows from the cyclic permutation property. # Complexity: O(nbk^2) trace_scale_inv_x = math_ops.reduce_sum( math_ops.square(scale_sqrt_inv_x_sqrt), reduction_indices=[-2, -1]) # Complexity: O(nbk) half_log_det_x = math_ops.reduce_sum( math_ops.log(array_ops.batch_matrix_diag_part(x_sqrt)), reduction_indices=[-1]) # Complexity: O(nbk^2) log_prob = ((self.df - self.dimension - 1.) * half_log_det_x - 0.5 * trace_scale_inv_x - self.log_normalizing_constant()) # Set shape hints. # Try to merge what we know from the input then what we know from the # parameters of this distribution. if x.get_shape().ndims is not None: log_prob.set_shape(x.get_shape()[:-2]) if (log_prob.get_shape().ndims is not None and self.get_batch_shape().ndims is not None and self.get_batch_shape().ndims > 0): log_prob.get_shape()[-self.get_batch_shape().ndims:].merge_with( self.get_batch_shape()) return log_prob
def _std(self): if self.cholesky_input_output_matrices: raise ValueError( 'Computing std. dev. when is cholesky_input_output_matrices=True ' 'does not make sense.') return linalg_ops.batch_cholesky(self.variance())