def testRectangular(self): with self.session(use_gpu=True): mat = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) mat_diag = array_ops.matrix_diag_part(mat) self.assertAllEqual(mat_diag.eval(), np.array([1.0, 5.0])) mat = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]) mat_diag = array_ops.matrix_diag_part(mat) self.assertAllEqual(mat_diag.eval(), np.array([1.0, 4.0]))
def _variance(self): if distribution_util.is_diagonal_scale(self.scale): return math_ops.square(self.scale.diag_part()) elif (isinstance(self.scale, linalg.LinearOperatorLowRankUpdate) and self.scale.is_self_adjoint): return array_ops.matrix_diag_part( self.scale.matmul(self.scale.to_dense())) else: return array_ops.matrix_diag_part( self.scale.matmul(self.scale.to_dense(), adjoint_arg=True))
def _stddev(self): if distribution_util.is_diagonal_scale(self.scale): return np.sqrt(2) * math_ops.abs(self.scale.diag_part()) elif (isinstance(self.scale, linalg.LinearOperatorUDVHUpdate) and self.scale.is_self_adjoint): return np.sqrt(2) * math_ops.sqrt(array_ops.matrix_diag_part( self.scale.matmul(self.scale.to_dense()))) else: return np.sqrt(2) * math_ops.sqrt(array_ops.matrix_diag_part( self.scale.matmul(self.scale.to_dense(), adjoint_arg=True)))
def _stddev(self): if (isinstance(self.scale, linalg.LinearOperatorIdentity) or isinstance(self.scale, linalg.LinearOperatorScaledIdentity) or isinstance(self.scale, linalg.LinearOperatorDiag)): return math_ops.abs(self.scale.diag_part()) elif (isinstance(self.scale, linalg.LinearOperatorUDVHUpdate) and self.scale.is_self_adjoint): return math_ops.sqrt(array_ops.matrix_diag_part( self.scale.apply(self.scale.to_dense()))) else: # TODO(b/35040238): Remove transpose once LinOp supports `transpose`. return math_ops.sqrt(array_ops.matrix_diag_part( self.scale.apply(array_ops.matrix_transpose(self.scale.to_dense()))))
def testSample(self): with self.test_session(): scale = make_pd(1., 2) df = 4 chol_w = distributions.WishartCholesky( df, chol(scale), cholesky_input_output_matrices=False) x = chol_w.sample(1, seed=42).eval() chol_x = [chol(x[0])] full_w = distributions.WishartFull( df, scale, cholesky_input_output_matrices=False) self.assertAllClose(x, full_w.sample(1, seed=42).eval()) chol_w_chol = distributions.WishartCholesky( df, chol(scale), cholesky_input_output_matrices=True) self.assertAllClose(chol_x, chol_w_chol.sample(1, seed=42).eval()) eigen_values = array_ops.matrix_diag_part( chol_w_chol.sample( 1000, seed=42)) np.testing.assert_array_less(0., eigen_values.eval()) full_w_chol = distributions.WishartFull( df, scale, cholesky_input_output_matrices=True) self.assertAllClose(chol_x, full_w_chol.sample(1, seed=42).eval()) eigen_values = array_ops.matrix_diag_part( full_w_chol.sample( 1000, seed=42)) np.testing.assert_array_less(0., eigen_values.eval()) # Check first and second moments. df = 4. chol_w = distributions.WishartCholesky( df=df, scale=chol(make_pd(1., 3)), cholesky_input_output_matrices=False) x = chol_w.sample(10000, seed=42) self.assertAllEqual((10000, 3, 3), x.get_shape()) moment1_estimate = math_ops.reduce_mean(x, reduction_indices=[0]).eval() self.assertAllClose(chol_w.mean().eval(), moment1_estimate, rtol=0.05) # The Variance estimate uses the squares rather than outer-products # because Wishart.Variance is the diagonal of the Wishart covariance # matrix. variance_estimate = (math_ops.reduce_mean( math_ops.square(x), reduction_indices=[0]) - math_ops.square(moment1_estimate)).eval() self.assertAllClose( chol_w.variance().eval(), variance_estimate, rtol=0.05)
def _maybe_attach_assertion(x): if not validate_args: return x if assert_positive: return control_flow_ops.with_dependencies([ check_ops.assert_positive( array_ops.matrix_diag_part(x), message="diagonal part must be positive"), ], x) return control_flow_ops.with_dependencies([ check_ops.assert_none_equal( array_ops.matrix_diag_part(x), array_ops.zeros([], x.dtype), message="diagonal part must be non-zero"), ], x)
def testSquare(self): with self.session(use_gpu=True): v = np.array([1.0, 2.0, 3.0]) mat = np.diag(v) mat_diag = array_ops.matrix_diag_part(mat) self.assertEqual((3,), mat_diag.get_shape()) self.assertAllEqual(mat_diag.eval(), v)
def entropy_matched_cauchy_scale(covariance): """Approximates a similar Cauchy distribution given a covariance matrix. Since Cauchy distributions do not have moments, entropy matching provides one way to set a Cauchy's scale parameter in a way that provides a similar distribution. The effect is dividing the standard deviation of an independent Gaussian by a constant very near 3. To set the scale of the Cauchy distribution, we first select the diagonals of `covariance`. Since this ignores cross terms, it overestimates the entropy of the Gaussian. For each of these variances, we solve for the Cauchy scale parameter which gives the same entropy as the Gaussian with that variance. This means setting the (univariate) Gaussian entropy 0.5 * ln(2 * variance * pi * e) equal to the Cauchy entropy ln(4 * pi * scale) Solving, we get scale = sqrt(variance * (e / (8 pi))). Args: covariance: A [batch size x N x N] batch of covariance matrices to produce Cauchy scales for. Returns: A [batch size x N] set of Cauchy scale parameters for each part of the batch and each dimension of the input Gaussians. """ return math_ops.sqrt(math.e / (8. * math.pi) * array_ops.matrix_diag_part(covariance))
def _to_dense(self): normalized_axis = self.reflection_axis / linalg.norm( self.reflection_axis, axis=-1, keepdims=True) mat = normalized_axis[..., array_ops.newaxis] matrix = -2 * math_ops.matmul(mat, mat, adjoint_b=True) return array_ops.matrix_set_diag( matrix, 1. + array_ops.matrix_diag_part(matrix))
def testCovarianceFromSampling(self): alpha = np.array([[1., 2, 3], [2.5, 4, 0.01]], dtype=np.float32) with self.test_session() as sess: dist = dirichlet_lib.Dirichlet(alpha) # batch_shape=[2], event_shape=[3] x = dist.sample(int(250e3), seed=1) sample_mean = math_ops.reduce_mean(x, 0) x_centered = x - sample_mean[None, ...] sample_cov = math_ops.reduce_mean(math_ops.matmul( x_centered[..., None], x_centered[..., None, :]), 0) sample_var = array_ops.matrix_diag_part(sample_cov) sample_stddev = math_ops.sqrt(sample_var) [ sample_mean_, sample_cov_, sample_var_, sample_stddev_, analytic_mean, analytic_cov, analytic_var, analytic_stddev, ] = sess.run([ sample_mean, sample_cov, sample_var, sample_stddev, dist.mean(), dist.covariance(), dist.variance(), dist.stddev(), ]) self.assertAllClose(sample_mean_, analytic_mean, atol=0., rtol=0.04) self.assertAllClose(sample_cov_, analytic_cov, atol=0., rtol=0.06) self.assertAllClose(sample_var_, analytic_var, atol=0., rtol=0.03) self.assertAllClose(sample_stddev_, analytic_stddev, atol=0., rtol=0.02)
def logdet(matrix, name=None): """Computes log of the determinant of a hermitian positive definite matrix. ```python # Compute the determinant of a matrix while reducing the chance of over- or underflow: A = ... # shape 10 x 10 det = tf.exp(tf.logdet(A)) # scalar ``` Args: matrix: A `Tensor`. Must be `float16`, `float32`, `float64`, `complex64`, or `complex128` with shape `[..., M, M]`. name: A name to give this `Op`. Defaults to `logdet`. Returns: The natural log of the determinant of `matrix`. @compatibility(numpy) Equivalent to numpy.linalg.slogdet, although no sign is returned since only hermitian positive definite matrices are supported. @end_compatibility """ # This uses the property that the log det(A) = 2*sum(log(real(diag(C)))) # where C is the cholesky decomposition of A. with ops.name_scope(name, 'logdet', [matrix]): chol = gen_linalg_ops.cholesky(matrix) return 2.0 * math_ops.reduce_sum( math_ops.log(math_ops.real(array_ops.matrix_diag_part(chol))), reduction_indices=[-1])
def _forward_log_det_jacobian(self, x): # We formulate the Jacobian with respect to the flattened matrices # `vec(x)` and `vec(y)`. Suppose for notational convenience that # the first `n` entries of `vec(x)` are the diagonal of `x`, and # the remaining `n**2-n` entries are the off-diagonals in # arbitrary order. Then the Jacobian is a block-diagonal matrix, # with the Jacobian of the diagonal bijector in the first block, # and the identity Jacobian for the remaining entries (since this # bijector acts as the identity on non-diagonal entries): # # J_vec(x) (vec(y)) = # ------------------------------- # | J_diag(x) (diag(y)) 0 | n entries # | | # | 0 I | n**2-n entries # ------------------------------- # n n**2-n # # Since the log-det of the second (identity) block is zero, the # overall log-det-jacobian is just the log-det of first block, # from the diagonal bijector. # # Note that for elementwise operations (exp, softplus, etc) the # first block of the Jacobian will itself be a diagonal matrix, # but our implementation does not require this to be true. return self._diag_bijector.forward_log_det_jacobian( array_ops.matrix_diag_part(x), event_ndims=1)
def _variance(self): x = math_ops.sqrt(self.df) * self.scale_operator_pd.to_dense() d = array_ops.expand_dims(array_ops.matrix_diag_part(x), -1) v = math_ops.square(x) + math_ops.matmul(d, d, adjoint_b=True) if self.cholesky_input_output_matrices: return linalg_ops.cholesky(v) return v
def matrix_diag_transform(matrix, transform=None, name=None): """Transform diagonal of [batch-]matrix, leave rest of matrix unchanged. Create a trainable covariance defined by a Cholesky factor: ```python # Transform network layer into 2 x 2 array. matrix_values = tf.contrib.layers.fully_connected(activations, 4) matrix = tf.reshape(matrix_values, (batch_size, 2, 2)) # Make the diagonal positive. If the upper triangle was zero, this would be a # valid Cholesky factor. chol = matrix_diag_transform(matrix, transform=tf.nn.softplus) # OperatorPDCholesky ignores the upper triangle. operator = OperatorPDCholesky(chol) ``` Example of heteroskedastic 2-D linear regression. ```python # Get a trainable Cholesky factor. matrix_values = tf.contrib.layers.fully_connected(activations, 4) matrix = tf.reshape(matrix_values, (batch_size, 2, 2)) chol = matrix_diag_transform(matrix, transform=tf.nn.softplus) # Get a trainable mean. mu = tf.contrib.layers.fully_connected(activations, 2) # This is a fully trainable multivariate normal! dist = tf.contrib.distributions.MVNCholesky(mu, chol) # Standard log loss. Minimizing this will "train" mu and chol, and then dist # will be a distribution predicting labels as multivariate Gaussians. loss = -1 * tf.reduce_mean(dist.log_prob(labels)) ``` Args: matrix: Rank `R` `Tensor`, `R >= 2`, where the last two dimensions are equal. transform: Element-wise function mapping `Tensors` to `Tensors`. To be applied to the diagonal of `matrix`. If `None`, `matrix` is returned unchanged. Defaults to `None`. name: A name to give created ops. Defaults to "matrix_diag_transform". Returns: A `Tensor` with same shape and `dtype` as `matrix`. """ with ops.name_scope(name, "matrix_diag_transform", [matrix]): matrix = ops.convert_to_tensor(matrix, name="matrix") if transform is None: return matrix # Replace the diag with transformed diag. diag = array_ops.matrix_diag_part(matrix) transformed_diag = transform(diag) transformed_mat = array_ops.matrix_set_diag(matrix, transformed_diag) return transformed_mat
def sign_magnitude_positive_definite( raw, off_diagonal_scale=0., overall_scale=0.): """Constructs a positive definite matrix from an unconstrained input matrix. We want to keep the whole matrix on a log scale, but also allow off-diagonal elements to be negative, so the sign of off-diagonal elements is modeled separately from their magnitude (using the lower and upper triangles respectively). Specifically: for i < j, we have: output_cholesky[i, j] = raw[j, i] / (abs(raw[j, i]) + 1) * exp((off_diagonal_scale + overall_scale + raw[i, j]) / 2) output_cholesky[i, i] = exp((raw[i, i] + overall_scale) / 2) output = output_cholesky^T * output_cholesky where raw, off_diagonal_scale, and overall_scale are un-constrained real-valued variables. The resulting values are stable around zero due to the exponential (and the softsign keeps the function smooth). Args: raw: A [..., M, M] Tensor. off_diagonal_scale: A scalar or [...] shaped Tensor controlling the relative scale of off-diagonal values in the output matrix. overall_scale: A scalar or [...] shaped Tensor controlling the overall scale of the output matrix. Returns: The `output` matrix described above, a [..., M, M] positive definite matrix. """ raw = ops.convert_to_tensor(raw) diagonal = array_ops.matrix_diag_part(raw) def _right_pad_with_ones(tensor, target_rank): # Allow broadcasting even if overall_scale and off_diagonal_scale have batch # dimensions tensor = ops.convert_to_tensor(tensor, dtype=raw.dtype.base_dtype) return array_ops.reshape(tensor, array_ops.concat( [ array_ops.shape(tensor), array_ops.ones( [target_rank - array_ops.rank(tensor)], dtype=target_rank.dtype) ], axis=0)) # We divide the log values by 2 to compensate for the squaring that happens # when transforming Cholesky factors into positive definite matrices. sign_magnitude = (gen_math_ops.exp( (raw + _right_pad_with_ones(off_diagonal_scale, array_ops.rank(raw)) + _right_pad_with_ones(overall_scale, array_ops.rank(raw))) / 2.) * nn.softsign(array_ops.matrix_transpose(raw))) sign_magnitude.set_shape(raw.get_shape()) cholesky_factor = array_ops.matrix_set_diag( input=array_ops.matrix_band_part(sign_magnitude, 0, -1), diagonal=gen_math_ops.exp((diagonal + _right_pad_with_ones( overall_scale, array_ops.rank(diagonal))) / 2.)) return math_ops.matmul(cholesky_factor, cholesky_factor, transpose_a=True)
def _sqrt_log_det(self): # The matrix determinant lemma states: # det(M + VDV^T) = det(D^{-1} + V^T M^{-1} V) * det(D) * det(M) # = det(C) * det(D) * det(M) # # Here we compute the Cholesky factor of "C", then pass the result on. diag_chol_c = array_ops.matrix_diag_part( self._chol_capacitance(batch_mode=False)) return self._sqrt_log_det_core(diag_chol_c)
def _batch_log_det(self): """Log determinant of every batch member.""" # Note that array_ops.diag_part does not seem more efficient for non-batch, # and would give a bad result for a batch matrix, so aways use # matrix_diag_part. diag = array_ops.matrix_diag_part(self._chol) det = 2.0 * math_ops.reduce_sum(math_ops.log(diag), reduction_indices=[-1]) det.set_shape(self.get_shape()[:-2]) return det
def _log_abs_determinant(self): logging.warn( "Using (possibly slow) default implementation of determinant." " Requires conversion to a dense matrix and O(N^3) operations.") if self._can_use_cholesky(): diag = array_ops.matrix_diag_part(linalg_ops.cholesky(self.to_dense())) return 2 * math_ops.reduce_sum(math_ops.log(diag), axis=[-1]) _, log_abs_det = linalg.slogdet(self.to_dense()) return log_abs_det
def _GradWithInverseL(l, l_inverse, grad): middle = math_ops.matmul(l, grad, adjoint_a=True) middle = array_ops.matrix_set_diag(middle, 0.5 * array_ops.matrix_diag_part(middle)) middle = array_ops.matrix_band_part(middle, -1, 0) grad_a = math_ops.matmul( math_ops.matmul(l_inverse, middle, adjoint_a=True), l_inverse) grad_a += math_ops.conj(array_ops.matrix_transpose(grad_a)) return grad_a * 0.5
def _log_abs_determinant(self): logging.warn( "Using (possibly slow) default implementation of determinant." " Requires conversion to a dense matrix and O(N^3) operations.") if self._can_use_cholesky(): diag = array_ops.matrix_diag_part(self._get_cached_chol()) return 2 * math_ops.reduce_sum(math_ops.log(diag), reduction_indices=[-1]) abs_det = math_ops.abs(self.determinant()) return math_ops.log(abs_det)
def testRectangularBatch(self): with self.session(use_gpu=True): v_batch = np.array([[1.0, 2.0], [4.0, 5.0]]) mat_batch = np.array([[[1.0, 0.0, 0.0], [0.0, 2.0, 0.0]], [[4.0, 0.0, 0.0], [0.0, 5.0, 0.0]]]) self.assertEqual(mat_batch.shape, (2, 2, 3)) mat_batch_diag = array_ops.matrix_diag_part(mat_batch) self.assertEqual((2, 2), mat_batch_diag.get_shape()) self.assertAllEqual(mat_batch_diag.eval(), v_batch)
def __init__(self, df, scale, cholesky_input_output_matrices=False, validate_args=False, allow_nan_stats=True, name="WishartCholesky"): """Construct Wishart distributions. Args: df: `float` or `double` `Tensor`. Degrees of freedom, must be greater than or equal to dimension of the scale matrix. scale: `float` or `double` `Tensor`. The Cholesky factorization of the symmetric positive definite scale matrix of the distribution. cholesky_input_output_matrices: Python `bool`. Any function which whose input or output is a matrix assumes the input is Cholesky and returns a Cholesky factored matrix. Example `log_prob` input takes a Cholesky and `sample_n` returns a Cholesky when `cholesky_input_output_matrices=True`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ parameters = dict(locals()) with ops.name_scope(name, values=[scale]) as name: with ops.name_scope("init", values=[scale]): scale = ops.convert_to_tensor(scale) if validate_args: scale = control_flow_ops.with_dependencies([ check_ops.assert_positive( array_ops.matrix_diag_part(scale), message="scale must be positive definite"), check_ops.assert_equal( array_ops.shape(scale)[-1], array_ops.shape(scale)[-2], message="scale must be square") ] if validate_args else [], scale) super(WishartCholesky, self).__init__( df=df, scale_operator=linalg.LinearOperatorLowerTriangular( tril=scale, is_non_singular=True, is_positive_definite=True, is_square=True), cholesky_input_output_matrices=cholesky_input_output_matrices, validate_args=validate_args, allow_nan_stats=allow_nan_stats, name=name) self._parameters = parameters
def mvn_tril_log_prob(loc, scale_tril, x): """Computes the MVN log pdf under tril scale. Doesn't handle batches.""" x0 = x - loc z = linalg_ops.matrix_triangular_solve( scale_tril, x0[..., array_ops.newaxis])[..., 0] log_det_cov = 2. * math_ops.reduce_sum(math_ops.log( array_ops.matrix_diag_part(scale_tril)), axis=-1) d = math_ops.cast(array_ops.shape(scale_tril)[-1], log_det_cov.dtype) return -0.5 * (math_ops.reduce_sum(math_ops.square(z), axis=-1) + d * np.log(2. * np.pi) + log_det_cov)
def _log_abs_determinant(self): if self._is_spd: diag = array_ops.matrix_diag_part(self._chol) return 2 * math_ops.reduce_sum(math_ops.log(diag), reduction_indices=[-1]) if self.dtype.is_complex: abs_det = math_ops.complex_abs(self.determinant()) else: abs_det = math_ops.abs(self.determinant()) return math_ops.log(abs_det)
def _testSquareBatch(self, dtype): with self.cached_session(use_gpu=True): v_batch = np.array([[1.0, 0.0, 3.0], [4.0, 5.0, 6.0]]).astype(dtype) mat_batch = np.array([[[1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 3.0]], [[4.0, 0.0, 0.0], [0.0, 5.0, 0.0], [0.0, 0.0, 6.0]]]).astype(dtype) self.assertEqual(mat_batch.shape, (2, 3, 3)) mat_batch_diag = array_ops.matrix_diag_part(mat_batch) self.assertEqual((2, 3), mat_batch_diag.get_shape()) self.assertAllEqual(mat_batch_diag.eval(), v_batch)
def testGrad(self): shapes = ((3, 3), (2, 3), (3, 2), (5, 3, 3)) with self.session(use_gpu=True): for shape in shapes: x = constant_op.constant(np.random.rand(*shape), dtype=np.float32) y = array_ops.matrix_diag_part(x) error = gradient_checker.compute_gradient_error(x, x.get_shape().as_list(), y, y.get_shape().as_list()) self.assertLess(error, 1e-4)
def __init__(self, tril, is_non_singular=None, is_self_adjoint=None, is_positive_definite=None, is_square=None, name="LinearOperatorLowerTriangular"): r"""Initialize a `LinearOperatorLowerTriangular`. Args: tril: Shape `[B1,...,Bb, N, N]` with `b >= 0`, `N >= 0`. The lower triangular part of `tril` defines this operator. The strictly upper triangle is ignored. Allowed dtypes: `float16`, `float32`, `float64`. is_non_singular: Expect that this operator is non-singular. This operator is non-singular if and only if its diagonal elements are all non-zero. is_self_adjoint: Expect that this operator is equal to its hermitian transpose. This operator is self-adjoint only if it is diagonal with real-valued diagonal entries. In this case it is advised to use `LinearOperatorDiag`. is_positive_definite: Expect that this operator is positive definite, meaning the quadratic form `x^H A x` has positive real part for all nonzero `x`. Note that we do not require the operator to be self-adjoint to be positive-definite. See: https://en.wikipedia.org/wiki/Positive-definite_matrix\ #Extension_for_non_symmetric_matrices is_square: Expect that this operator acts like square [batch] matrices. name: A name for this `LinearOperator`. Raises: TypeError: If `diag.dtype` is not an allowed type. ValueError: If `is_square` is `False`. """ if is_square is False: raise ValueError( "Only square lower triangular operators supported at this time.") is_square = True with ops.name_scope(name, values=[tril]): self._tril = ops.convert_to_tensor(tril, name="tril") self._check_tril(self._tril) self._tril = array_ops.matrix_band_part(tril, -1, 0) self._diag = array_ops.matrix_diag_part(self._tril) super(LinearOperatorLowerTriangular, self).__init__( dtype=self._tril.dtype, graph_parents=[self._tril], is_non_singular=is_non_singular, is_self_adjoint=is_self_adjoint, is_positive_definite=is_positive_definite, is_square=is_square, name=name)
def _preprocess_tril(self, identity_multiplier, diag, tril, event_ndims): """Helper to preprocess a lower triangular matrix.""" tril = array_ops.matrix_band_part(tril, -1, 0) # Zero out TriU. if identity_multiplier is None and diag is None: return self._process_matrix(tril, min_rank=2, event_ndims=event_ndims) new_diag = array_ops.matrix_diag_part(tril) if identity_multiplier is not None: new_diag += identity_multiplier if diag is not None: new_diag += diag tril = array_ops.matrix_set_diag(tril, new_diag) return self._process_matrix(tril, min_rank=2, event_ndims=event_ndims)
def __init__(self, tril, is_non_singular=None, is_self_adjoint=None, is_positive_definite=None, name="LinearOperatorTriL"): """Initialize a `LinearOperatorTriL`. Args: tril: Shape `[B1,...,Bb, N, N]` with `b >= 0`, `N >= 0`. The lower triangular part of `tril` defines this operator. The strictly upper triangle is ignored. Allowed dtypes: `float32`, `float64`. is_non_singular: Expect that this operator is non-singular. This operator is non-singular if and only if its diagonal elements are all non-zero. is_self_adjoint: Expect that this operator is equal to its hermitian transpose. This operator is self-adjoint only if it is diagonal with real-valued diagonal entries. In this case it is advised to use `LinearOperatorDiag`. is_positive_definite: Expect that this operator is positive definite, meaning the real part of all eigenvalues is positive. We do not require the operator to be self-adjoint to be positive-definite. See: https://en.wikipedia.org/wiki/Positive-definite_matrix #Extension_for_non_symmetric_matrices name: A name for this `LinearOperator`. Raises: TypeError: If `diag.dtype` is not an allowed type. """ # TODO(langmore) Add complex types once matrix_triangular_solve works for # them. allowed_dtypes = [dtypes.float32, dtypes.float64] with ops.name_scope(name, values=[tril]): self._tril = array_ops.matrix_band_part(tril, -1, 0) self._diag = array_ops.matrix_diag_part(self._tril) dtype = self._tril.dtype if dtype not in allowed_dtypes: raise TypeError( "Argument tril must have dtype in %s. Found: %s" % (allowed_dtypes, dtype)) super(LinearOperatorTriL, self).__init__( dtype=self._tril.dtype, graph_parents=[self._tril], is_non_singular=is_non_singular, is_self_adjoint=is_self_adjoint, is_positive_definite=is_positive_definite, name=name)
def _MatrixSetDiagGrad(op, grad): diag_shape = op.inputs[1].get_shape() diag_shape = diag_shape.merge_with(op.inputs[0].get_shape()[:-1]) diag_shape = diag_shape.merge_with(grad.get_shape()[:-1]) if diag_shape.is_fully_defined(): diag_shape = diag_shape.as_list() else: diag_shape = array_ops.shape(grad) diag_shape = array_ops.slice(diag_shape, [0], [array_ops.rank(grad) - 1]) grad_input = array_ops.matrix_set_diag( grad, array_ops.zeros( diag_shape, dtype=grad.dtype)) grad_diag = array_ops.matrix_diag_part(grad) return (grad_input, grad_diag)
def tridiagonal_solve(diagonals, rhs, diagonals_format='compact', transpose_rhs=False, conjugate_rhs=False, name=None, partial_pivoting=True): r"""Solves tridiagonal systems of equations. The input can be supplied in various formats: `matrix`, `sequence` and `compact`, specified by the `diagonals_format` arg. In `matrix` format, `diagonals` must be a tensor of shape `[..., M, M]`, with two inner-most dimensions representing the square tridiagonal matrices. Elements outside of the three diagonals will be ignored. In `sequence` format, `diagonals` are supplied as a tuple or list of three tensors of shapes `[..., N]`, `[..., M]`, `[..., N]` representing superdiagonals, diagonals, and subdiagonals, respectively. `N` can be either `M-1` or `M`; in the latter case, the last element of superdiagonal and the first element of subdiagonal will be ignored. In `compact` format the three diagonals are brought together into one tensor of shape `[..., 3, M]`, with last two dimensions containing superdiagonals, diagonals, and subdiagonals, in order. Similarly to `sequence` format, elements `diagonals[..., 0, M-1]` and `diagonals[..., 2, 0]` are ignored. The `compact` format is recommended as the one with best performance. In case you need to cast a tensor into a compact format manually, use `tf.gather_nd`. An example for a tensor of shape [m, m]: ```python rhs = tf.constant([...]) matrix = tf.constant([[...]]) m = matrix.shape[0] dummy_idx = [0, 0] # An arbitrary element to use as a dummy indices = [[[i, i + 1] for i in range(m - 1)] + [dummy_idx], # Superdiagonal [[i, i] for i in range(m)], # Diagonal [dummy_idx] + [[i + 1, i] for i in range(m - 1)]] # Subdiagonal diagonals=tf.gather_nd(matrix, indices) x = tf.linalg.tridiagonal_solve(diagonals, rhs) ``` Regardless of the `diagonals_format`, `rhs` is a tensor of shape `[..., M]` or `[..., M, K]`. The latter allows to simultaneously solve K systems with the same left-hand sides and K different right-hand sides. If `transpose_rhs` is set to `True` the expected shape is `[..., M]` or `[..., K, M]`. The batch dimensions, denoted as `...`, must be the same in `diagonals` and `rhs`. The output is a tensor of the same shape as `rhs`: either `[..., M]` or `[..., M, K]`. The op isn't guaranteed to raise an error if the input matrix is not invertible. `tf.debugging.check_numerics` can be applied to the output to detect invertibility problems. **Note**: with large batch sizes, the computation on the GPU may be slow, if either `partial_pivoting=True` or there are multiple right-hand sides (`K > 1`). If this issue arises, consider if it's possible to disable pivoting and have `K = 1`, or, alternatively, consider using CPU. On CPU, solution is computed via Gaussian elimination with or without partial pivoting, depending on `partial_pivoting` parameter. On GPU, Nvidia's cuSPARSE library is used: https://docs.nvidia.com/cuda/cusparse/index.html#gtsv Args: diagonals: A `Tensor` or tuple of `Tensor`s describing left-hand sides. The shape depends of `diagonals_format`, see description above. Must be `float32`, `float64`, `complex64`, or `complex128`. rhs: A `Tensor` of shape [..., M] or [..., M, K] and with the same dtype as `diagonals`. Note that if the shape of `rhs` and/or `diags` isn't known statically, `rhs` will be treated as a matrix rather than a vector. diagonals_format: one of `matrix`, `sequence`, or `compact`. Default is `compact`. transpose_rhs: If `True`, `rhs` is transposed before solving (has no effect if the shape of rhs is [..., M]). conjugate_rhs: If `True`, `rhs` is conjugated before solving. name: A name to give this `Op` (optional). partial_pivoting: whether to perform partial pivoting. `True` by default. Partial pivoting makes the procedure more stable, but slower. Partial pivoting is unnecessary in some cases, including diagonally dominant and symmetric positive definite matrices (see e.g. theorem 9.12 in [1]). Returns: A `Tensor` of shape [..., M] or [..., M, K] containing the solutions. Raises: ValueError: An unsupported type is provided as input, or when the input tensors have incorrect shapes. UnimplementedError: Whenever `partial_pivoting` is true and the backend is XLA. [1] Nicholas J. Higham (2002). Accuracy and Stability of Numerical Algorithms: Second Edition. SIAM. p. 175. ISBN 978-0-89871-802-7. """ if diagonals_format == 'compact': return _tridiagonal_solve_compact_format(diagonals, rhs, transpose_rhs, conjugate_rhs, partial_pivoting, name) if diagonals_format == 'sequence': if not isinstance(diagonals, (tuple, list)) or len(diagonals) != 3: raise ValueError( 'Expected diagonals to be a sequence of length 3.') superdiag, maindiag, subdiag = diagonals if (not subdiag.shape[:-1].is_compatible_with(maindiag.shape[:-1]) or not superdiag.shape[:-1].is_compatible_with( maindiag.shape[:-1])): raise ValueError( 'Tensors representing the three diagonals must have the same shape,' 'except for the last dimension, got {}, {}, {}'.format( subdiag.shape, maindiag.shape, superdiag.shape)) m = tensor_shape.dimension_value(maindiag.shape[-1]) def pad_if_necessary(t, name, last_dim_padding): n = tensor_shape.dimension_value(t.shape[-1]) if not n or n == m: return t if n == m - 1: paddings = ([[0, 0] for _ in range(len(t.shape) - 1)] + [last_dim_padding]) return array_ops.pad(t, paddings) raise ValueError( 'Expected {} to be have length {} or {}, got {}.'.format( name, m, m - 1, n)) subdiag = pad_if_necessary(subdiag, 'subdiagonal', [1, 0]) superdiag = pad_if_necessary(superdiag, 'superdiagonal', [0, 1]) diagonals = array_ops.stack((superdiag, maindiag, subdiag), axis=-2) return _tridiagonal_solve_compact_format(diagonals, rhs, transpose_rhs, conjugate_rhs, partial_pivoting, name) if diagonals_format == 'matrix': m1 = tensor_shape.dimension_value(diagonals.shape[-1]) m2 = tensor_shape.dimension_value(diagonals.shape[-2]) if m1 and m2 and m1 != m2: raise ValueError( 'Expected last two dimensions of diagonals to be same, got {} and {}' .format(m1, m2)) m = m1 or m2 diagonals = array_ops.matrix_diag_part(diagonals, k=(-1, 1), padding_value=0., align='LEFT_RIGHT') return _tridiagonal_solve_compact_format(diagonals, rhs, transpose_rhs, conjugate_rhs, partial_pivoting, name) raise ValueError( 'Unrecognized diagonals_format: {}'.format(diagonals_format))
def loop_fn(i): return array_ops.matrix_diag_part(array_ops.gather(x, i))
def testInvalidShapeAtEval(self): with self.session(use_gpu=True): v = array_ops.placeholder(dtype=dtypes_lib.float32) with self.assertRaisesOpError("input must be at least 2-dim"): array_ops.matrix_diag_part(v).eval(feed_dict={v: 0.0})
def relu(x, alpha=0.0, max_value=None, threshold=0.0, mode="diag"): """Rectified Linear Unit. Assumed Density Filtering (ADF) version of the Keras `relu` activation. Parameters ---------- x : list or tuple Input tensors (means and covariances). alpha: float, optional Slope of negative section. Default is ``0.0``. Currently no value other than the default is supported for ADF. max_value: float, optional Saturation threshold. Default is `None`. Currently no value other than the default is supported for ADF. threshold: float, optional Threshold value for thresholded activation. Default is ``0.0``. Currently no value other than the default is supported for ADF. mode: {"diag", "diagonal", "lowrank", "half", "full"} Covariance computation mode. Default is "diag". Returns ------- list List of transformed means and covariances, according to the ReLU activation: ``max(x, 0)``. """ if not alpha == 0.0: raise NotImplementedError( "The relu activation function with alpha other than 0.0 has" "not been implemented for ADF layers yet." ) if max_value is not None: raise NotImplementedError( "The relu activation function with max_value other than `None` " "has not been implemented for ADF layers yet." ) if not threshold == 0.0: raise NotImplementedError( "The relu activation function with threshold other than 0.0 has" "not been implemented for ADF layers yet." ) if not isinstance(x, list) and len(x) == 2: raise ValueError( "The relu activation function expects a list of " "exactly two input tensors, but got: %s" % x ) means, covariances = x means_shape = means.get_shape().as_list() means_rank = len(means_shape) cov_shape = covariances.get_shape().as_list() cov_rank = len(cov_shape) EPS = K.cast(K.epsilon(), covariances.dtype) # treat inputs according to rank and mode if means_rank == 1: # if rank(mean)=1, treat as single vector, no reshapes necessary pass elif means_rank == 2: # if rank(mean)=2, treat as batch of vectors, no reshapes necessary pass else: # if rank(mean)=2+n, treat as batch of rank=n tensors + channels means = K.reshape(means, [-1] + [K.prod(means_shape[1:])],) if mode == "diag": covariances = K.reshape( covariances, [-1] + [K.prod(cov_shape[1:])], ) elif mode == "half": covariances = K.reshape( covariances, [-1] + [cov_shape[1]] + [K.prod(cov_shape[2:])], ) elif mode == "full": covariances = K.reshape( covariances, [-1] + [K.prod(cov_shape[1 : (cov_rank - 1) // 2 + 1])] + [K.prod(cov_shape[(cov_rank - 1) // 2 + 1 :])], ) if mode == "diag": covariances = covariances + EPS std = K.sqrt(covariances) div = means / std gd_div = _gauss_density(div) gc_div = _gauss_cumulative(div) new_means = K.maximum( means, K.maximum(K.zeros_like(means), means * gc_div + std * gd_div), ) new_covariances = ( K.square(means) * gc_div + covariances * gc_div + means * std * gd_div - K.square(new_means) ) new_covariances = K.maximum( K.zeros_like(new_covariances), new_covariances ) elif mode == "half": variances = K.sum(K.square(covariances), axis=1) + EPS std = K.sqrt(variances) div = means / std gd_div = _gauss_density(div) gc_div = _gauss_cumulative(div) new_means = K.maximum( means, K.maximum(K.zeros_like(means), means * gc_div + std * gd_div), ) gc_div = K.expand_dims(gc_div, 1) new_covariances = covariances * gc_div elif mode == "full": variances = array_ops.matrix_diag_part(covariances) + EPS std = K.sqrt(variances) div = means / std gd_div = _gauss_density(div) gc_div = _gauss_cumulative(div) new_means = K.maximum( means, K.maximum(K.zeros_like(means), means * gc_div + std * gd_div), ) gc_div = K.expand_dims(gc_div, 1) new_covariances = covariances * gc_div new_covariances = K.permute_dimensions(new_covariances, [0, 2, 1]) new_covariances = new_covariances * gc_div new_covariances = K.permute_dimensions(new_covariances, [0, 2, 1]) # undo reshapes if necessary new_means = K.reshape(new_means, [-1] + means_shape[1:]) new_covariances = K.reshape(new_covariances, [-1] + cov_shape[1:]) return [new_means, new_covariances]
def _forward(self, x): diag = self._diag_bijector.forward(array_ops.matrix_diag_part(x)) return array_ops.matrix_set_diag(x, diag)
def _add_to_tensor(self, mat): mat_diag = array_ops.matrix_diag_part(mat) new_diag = math_ops.square(self._diag) + mat_diag return array_ops.matrix_set_diag(mat, new_diag)
def _get_diag(self): """Gets the diagonal part of `tril` kwarg.""" return array_ops.matrix_diag_part(self._tril)
def _mean_of_covariance_given_quadrature_component(self, diag_only): p = self.mixture_distribution.probs # To compute E[Cov(Z|V)], we'll add matrices within three categories: # scaled-identity, diagonal, and full. Then we'll combine these at the end. scale_identity_multiplier = None diag = None full = None for k, aff in enumerate(self.interpolated_affine): s = aff.scale # Just in case aff.scale has side-effects, we'll call once. if (s is None or isinstance( s, linop_identity_lib.LinearOperatorIdentity)): scale_identity_multiplier = add(scale_identity_multiplier, p[..., k, array_ops.newaxis]) elif isinstance(s, linop_identity_lib.LinearOperatorScaledIdentity): scale_identity_multiplier = add( scale_identity_multiplier, (p[..., k, array_ops.newaxis] * math_ops.square(s.multiplier))) elif isinstance(s, linop_diag_lib.LinearOperatorDiag): diag = add(diag, (p[..., k, array_ops.newaxis] * math_ops.square(s.diag_part()))) else: x = (p[..., k, array_ops.newaxis, array_ops.newaxis] * s.matmul(s.to_dense(), adjoint_arg=True)) if diag_only: x = array_ops.matrix_diag_part(x) full = add(full, x) # We must now account for the fact that the base distribution might have a # non-unity variance. Recall that, since X ~ iid Law(X_0), # `Cov(SX+m) = S Cov(X) S.T = S S.T Diag(Var(X_0))`. # We can scale by `Var(X)` (vs `Cov(X)`) since X corresponds to `d` iid # samples from a scalar-event distribution. v = self.distribution.variance() if scale_identity_multiplier is not None: scale_identity_multiplier *= v if diag is not None: diag *= v[..., array_ops.newaxis] if full is not None: full *= v[..., array_ops.newaxis] if diag_only: # Apparently we don't need the full matrix, just the diagonal. r = add(diag, full) if r is None and scale_identity_multiplier is not None: ones = array_ops.ones(self.event_shape_tensor(), dtype=self.dtype) return scale_identity_multiplier[..., array_ops.newaxis] * ones return add(r, scale_identity_multiplier) # `None` indicates we don't know if the result is positive-definite. is_positive_definite = (True if all( aff.scale.is_positive_definite for aff in self.endpoint_affine) else None) to_add = [] if diag is not None: to_add.append( linop_diag_lib.LinearOperatorDiag( diag=diag, is_positive_definite=is_positive_definite)) if full is not None: to_add.append( linop_full_lib.LinearOperatorFullMatrix( matrix=full, is_positive_definite=is_positive_definite)) if scale_identity_multiplier is not None: to_add.append( linop_identity_lib.LinearOperatorScaledIdentity( num_rows=self.event_shape_tensor()[0], multiplier=scale_identity_multiplier, is_positive_definite=is_positive_definite)) return (linop_add_lib.add_operators(to_add)[0].to_dense() if to_add else None)
def _add_to_tensor(self, mat): # Add to a tensor in O(k) time! mat_diag = array_ops.matrix_diag_part(mat) new_diag = self._scale + mat_diag return array_ops.matrix_set_diag(mat, new_diag)
def sign_magnitude_positive_definite(raw, off_diagonal_scale=0., overall_scale=0.): """Constructs a positive definite matrix from an unconstrained input matrix. We want to keep the whole matrix on a log scale, but also allow off-diagonal elements to be negative, so the sign of off-diagonal elements is modeled separately from their magnitude (using the lower and upper triangles respectively). Specifically: for i < j, we have: output_cholesky[i, j] = raw[j, i] / (abs(raw[j, i]) + 1) * exp((off_diagonal_scale + overall_scale + raw[i, j]) / 2) output_cholesky[i, i] = exp((raw[i, i] + overall_scale) / 2) output = output_cholesky^T * output_cholesky where raw, off_diagonal_scale, and overall_scale are un-constrained real-valued variables. The resulting values are stable around zero due to the exponential (and the softsign keeps the function smooth). Args: raw: A [..., M, M] Tensor. off_diagonal_scale: A scalar or [...] shaped Tensor controlling the relative scale of off-diagonal values in the output matrix. overall_scale: A scalar or [...] shaped Tensor controlling the overall scale of the output matrix. Returns: The `output` matrix described above, a [..., M, M] positive definite matrix. """ raw = ops.convert_to_tensor(raw) diagonal = array_ops.matrix_diag_part(raw) def _right_pad_with_ones(tensor, target_rank): # Allow broadcasting even if overall_scale and off_diagonal_scale have batch # dimensions tensor = ops.convert_to_tensor(tensor, dtype=raw.dtype.base_dtype) return array_ops.reshape( tensor, array_ops.concat([ array_ops.shape(tensor), array_ops.ones([target_rank - array_ops.rank(tensor)], dtype=target_rank.dtype) ], axis=0)) # We divide the log values by 2 to compensate for the squaring that happens # when transforming Cholesky factors into positive definite matrices. sign_magnitude = (gen_math_ops.exp( (raw + _right_pad_with_ones(off_diagonal_scale, array_ops.rank(raw)) + _right_pad_with_ones(overall_scale, array_ops.rank(raw))) / 2.) * nn.softsign(array_ops.matrix_transpose(raw))) sign_magnitude.set_shape(raw.get_shape()) cholesky_factor = array_ops.matrix_set_diag( input=array_ops.matrix_band_part(sign_magnitude, 0, -1), diagonal=gen_math_ops.exp( (diagonal + _right_pad_with_ones(overall_scale, array_ops.rank(diagonal))) / 2.)) return math_ops.matmul(cholesky_factor, cholesky_factor, transpose_a=True)
def _diag_part(self): """Generic and often inefficient implementation. Override often.""" return array_ops.matrix_diag_part(self.to_dense())
def make_tril_scale(loc=None, scale_tril=None, scale_diag=None, scale_identity_multiplier=None, shape_hint=None, validate_args=False, assert_positive=False, name=None): """Creates a LinOp representing a lower triangular matrix. Args: loc: Floating-point `Tensor`. This is used for inferring shape in the case where only `scale_identity_multiplier` is set. scale_tril: Floating-point `Tensor` representing the diagonal matrix. `scale_diag` has shape [N1, N2, ... k, k], which represents a k x k lower triangular matrix. When `None` no `scale_tril` term is added to the LinOp. The upper triangular elements above the diagonal are ignored. scale_diag: Floating-point `Tensor` representing the diagonal matrix. `scale_diag` has shape [N1, N2, ... k], which represents a k x k diagonal matrix. When `None` no diagonal term is added to the LinOp. scale_identity_multiplier: floating point rank 0 `Tensor` representing a scaling done to the identity matrix. When `scale_identity_multiplier = scale_diag = scale_tril = None` then `scale += IdentityMatrix`. Otherwise no scaled-identity-matrix is added to `scale`. shape_hint: scalar integer `Tensor` representing a hint at the dimension of the identity matrix when only `scale_identity_multiplier` is set. validate_args: Python `bool` indicating whether arguments should be checked for correctness. assert_positive: Python `bool` indicating whether LinOp should be checked for being positive definite. name: Python `str` name given to ops managed by this object. Returns: `LinearOperator` representing a lower triangular matrix. Raises: ValueError: If only `scale_identity_multiplier` is set and `loc` and `shape_hint` are both None. """ def _maybe_attach_assertion(x): if not validate_args: return x if assert_positive: return control_flow_ops.with_dependencies([ check_ops.assert_positive( array_ops.matrix_diag_part(x), message="diagonal part must be positive"), ], x) return control_flow_ops.with_dependencies([ check_ops.assert_none_equal( array_ops.matrix_diag_part(x), array_ops.zeros([], x.dtype), message="diagonal part must be non-zero"), ], x) with ops.name_scope(name, "make_tril_scale", values=[loc, scale_diag, scale_identity_multiplier]): loc = _convert_to_tensor(loc, name="loc") scale_tril = _convert_to_tensor(scale_tril, name="scale_tril") scale_diag = _convert_to_tensor(scale_diag, name="scale_diag") scale_identity_multiplier = _convert_to_tensor( scale_identity_multiplier, name="scale_identity_multiplier") if scale_tril is not None: scale_tril = array_ops.matrix_band_part(scale_tril, -1, 0) # Zero out TriU. tril_diag = array_ops.matrix_diag_part(scale_tril) if scale_diag is not None: tril_diag += scale_diag if scale_identity_multiplier is not None: tril_diag += scale_identity_multiplier[..., array_ops.newaxis] scale_tril = array_ops.matrix_set_diag(scale_tril, tril_diag) return linalg.LinearOperatorLowerTriangular( tril=_maybe_attach_assertion(scale_tril), is_non_singular=True, is_self_adjoint=False, is_positive_definite=assert_positive) return make_diag_scale(loc=loc, scale_diag=scale_diag, scale_identity_multiplier=scale_identity_multiplier, shape_hint=shape_hint, validate_args=validate_args, assert_positive=assert_positive, name=name)
def _batch_sqrt_log_det(self): # Here we compute the Cholesky factor of "C", then pass the result on. abs_diag_chol_c = math_ops.abs( array_ops.matrix_diag_part( self._chol_capacitance(batch_mode=True))) return self._sqrt_log_det_core(abs_diag_chol_c)
def _log_prob(self, x): if self.cholesky_input_output_matrices: x_sqrt = x else: # Complexity: O(nbk^3) x_sqrt = linalg_ops.cholesky(x) batch_shape = self.batch_shape_tensor() event_shape = self.event_shape_tensor() ndims = array_ops.rank(x_sqrt) # sample_ndims = ndims - batch_ndims - event_ndims sample_ndims = ndims - array_ops.shape(batch_shape)[0] - 2 sample_shape = array_ops.strided_slice(array_ops.shape(x_sqrt), [0], [sample_ndims]) # We need to be able to pre-multiply each matrix by its corresponding # batch scale matrix. Since a Distribution Tensor supports multiple # samples per batch, this means we need to reshape the input matrix `x` # so that the first b dimensions are batch dimensions and the last two # are of shape [dimension, dimensions*number_of_samples]. Doing these # gymnastics allows us to do a batch_solve. # # After we're done with sqrt_solve (the batch operation) we need to undo # this reshaping so what we're left with is a Tensor partitionable by # sample, batch, event dimensions. # Complexity: O(nbk**2) since transpose must access every element. scale_sqrt_inv_x_sqrt = x_sqrt perm = array_ops.concat([ math_ops.range(sample_ndims, ndims), math_ops.range(0, sample_ndims) ], 0) scale_sqrt_inv_x_sqrt = array_ops.transpose(scale_sqrt_inv_x_sqrt, perm) shape = array_ops.concat( (batch_shape, (math_ops.cast(self.dimension, dtype=dtypes.int32), -1)), 0) scale_sqrt_inv_x_sqrt = array_ops.reshape(scale_sqrt_inv_x_sqrt, shape) # Complexity: O(nbM*k) where M is the complexity of the operator solving # a vector system. E.g., for LinearOperatorDiag, each solve is O(k), so # this complexity is O(nbk**2). For LinearOperatorLowerTriangular, # each solve is O(k**2) so this step has complexity O(nbk^3). scale_sqrt_inv_x_sqrt = self.scale_operator.solve( scale_sqrt_inv_x_sqrt) # Undo make batch-op ready. # Complexity: O(nbk**2) shape = array_ops.concat([batch_shape, event_shape, sample_shape], 0) scale_sqrt_inv_x_sqrt = array_ops.reshape(scale_sqrt_inv_x_sqrt, shape) perm = array_ops.concat([ math_ops.range(ndims - sample_ndims, ndims), math_ops.range(0, ndims - sample_ndims) ], 0) scale_sqrt_inv_x_sqrt = array_ops.transpose(scale_sqrt_inv_x_sqrt, perm) # Write V = SS', X = LL'. Then: # tr[inv(V) X] = tr[inv(S)' inv(S) L L'] # = tr[inv(S) L L' inv(S)'] # = tr[(inv(S) L) (inv(S) L)'] # = sum_{ik} (inv(S) L)_{ik}**2 # The second equality follows from the cyclic permutation property. # Complexity: O(nbk**2) trace_scale_inv_x = math_ops.reduce_sum( math_ops.square(scale_sqrt_inv_x_sqrt), axis=[-2, -1]) # Complexity: O(nbk) half_log_det_x = math_ops.reduce_sum(math_ops.log( array_ops.matrix_diag_part(x_sqrt)), axis=[-1]) # Complexity: O(nbk**2) log_prob = ((self.df - self.dimension - 1.) * half_log_det_x - 0.5 * trace_scale_inv_x - self.log_normalization()) # Set shape hints. # Try to merge what we know from the input then what we know from the # parameters of this distribution. if x.get_shape().ndims is not None: log_prob.set_shape(x.get_shape()[:-2]) if (log_prob.get_shape().ndims is not None and self.batch_shape.ndims is not None and self.batch_shape.ndims > 0): log_prob.get_shape()[-self.batch_shape.ndims:].merge_with( self.batch_shape) return log_prob
def _MatrixDiagGrad(_, grad): return array_ops.matrix_diag_part(grad)
def loop_fn(i): input = array_ops.gather(x, i) # pylint: disable=redefined-builtin return array_ops.matrix_diag_part(input, k=(-2, 0), padding_value=3, align="RIGHT_LEFT")
def _MatrixDiagV3Grad(op, grad): return array_ops.matrix_diag_part( grad, k=op.inputs[1], align=op.get_attr("align")), None, None, None, None
def _forward_log_det_jacobian(self, x): # Let Y be a symmetric, positive definite matrix and write: # Y = X X.T # where X is lower-triangular. # # Observe that, # dY[i,j]/dX[a,b] # = d/dX[a,b] { X[i,:] X[j,:] } # = sum_{d=1}^p { I[i=a] I[d=b] X[j,d] + I[j=a] I[d=b] X[i,d] } # # To compute the Jacobian dX/dY we must represent X,Y as vectors. Since Y is # symmetric and X is lower-triangular, we need vectors of dimension: # d = p (p + 1) / 2 # where X, Y are p x p matrices, p > 0. We use a row-major mapping, i.e., # k = { i (i + 1) / 2 + j i>=j # { undef i<j # and assume zero-based indexes. When k is undef, the element is dropped. # Example: # j k # 0 1 2 3 / # 0 [ 0 . . . ] # i 1 [ 1 2 . . ] # 2 [ 3 4 5 . ] # 3 [ 6 7 8 9 ] # Write vec[.] to indicate transforming a matrix to vector via k(i,j). (With # slight abuse: k(i,j)=undef means the element is dropped.) # # We now show d vec[Y] / d vec[X] is lower triangular. Assuming both are # defined, observe that k(i,j) < k(a,b) iff (1) i<a or (2) i=a and j<b. # In both cases dvec[Y]/dvec[X]@[k(i,j),k(a,b)] = 0 since: # (1) j<=i<a thus i,j!=a. # (2) i=a>j thus i,j!=a. # # Since the Jacobian is lower-triangular, we need only compute the product # of diagonal elements: # d vec[Y] / d vec[X] @[k(i,j), k(i,j)] # = X[j,j] + I[i=j] X[i,j] # = 2 X[j,j]. # Since there is a 2 X[j,j] term for every lower-triangular element of X we # conclude: # |Jac(d vec[Y]/d vec[X])| = 2^p prod_{j=0}^{p-1} X[j,j]^{p-j}. diag = array_ops.matrix_diag_part(x) # We now ensure diag is columnar. Eg, if `diag = [1, 2, 3]` then the output # is `[[1], [2], [3]]` and if `diag = [[1, 2, 3], [4, 5, 6]]` then the # output is unchanged. diag = self._make_columnar(diag) if self.validate_args: is_matrix = check_ops.assert_rank_at_least( x, 2, message="Input must be a (batch of) matrix.") shape = array_ops.shape(x) is_square = check_ops.assert_equal( shape[-2], shape[-1], message="Input must be a (batch of) square matrix.") # Assuming lower-triangular means we only need check diag>0. is_positive_definite = check_ops.assert_positive( diag, message="Input must be positive definite.") x = control_flow_ops.with_dependencies( [is_matrix, is_square, is_positive_definite], x) # Create a vector equal to: [p, p-1, ..., 2, 1]. if x.get_shape().ndims is None or x.get_shape()[-1].value is None: p_int = array_ops.shape(x)[-1] p_float = math_ops.cast(p_int, dtype=x.dtype) else: p_int = x.get_shape()[-1].value p_float = np.array(p_int, dtype=x.dtype.as_numpy_dtype) exponents = math_ops.linspace(p_float, 1., p_int) sum_weighted_log_diag = array_ops.squeeze(math_ops.matmul( math_ops.log(diag), exponents[..., array_ops.newaxis]), axis=-1) fldj = p_float * np.log(2.) + sum_weighted_log_diag return fldj
def _inverse_log_det_jacobian(self, y): return self._diag_bijector.inverse_log_det_jacobian( array_ops.matrix_diag_part(y), event_ndims=1)
def _MatrixDiagV2Grad(op, grad): return array_ops.matrix_diag_part( grad, k=op.inputs[1]), None, None, None, None
def do_filter(self, estimated_state, estimated_state_covariance, predicted_observation, predicted_observation_covariance, observation, observation_model, observation_noise): """Convenience function for scoring predictions. Scores a prediction against an observation, and computes the updated posterior over states. Shapes given below for arguments are for single-model Kalman filtering (e.g. KalmanFilter). For ensembles, prior_state and prior_state_var are same-length tuples of values corresponding to each model. Args: estimated_state: A prior mean over states [batch size x state dimension] estimated_state_covariance: Covariance of state prior [batch size x D x D], with D depending on the Kalman filter implementation (typically the state dimension). predicted_observation: A prediction for the observed value, such as that returned by observed_from_state. A [batch size x num features] Tensor. predicted_observation_covariance: A covariance matrix corresponding to `predicted_observation`, a [batch size x num features x num features] Tensor. observation: The observed value corresponding to the predictions given [batch size x observation dimension] observation_model: The [batch size x observation dimension x model state dimension] Tensor indicating how a particular state is mapped to (pre-noise) observations for each part of the batch. observation_noise: A [batch size x observation dimension x observation dimension] Tensor or [observation dimension x observation dimension] Tensor with covariance matrices to use for each part of the batch (a two-dimensional input will be broadcast). Returns: posterior_state, posterior_state_var: Posterior mean and covariance, updated versions of prior_state and prior_state_var. log_prediction_prob: Log probability of the observations under the priors, suitable for optimization (should be maximized). """ symmetrized_observation_covariance = 0.5 * ( predicted_observation_covariance + array_ops.matrix_transpose(predicted_observation_covariance)) instability_message = ( "This may occur due to numerically unstable filtering when there is " "a large difference in posterior variances, or when inferences are " "near-deterministic. Considering tuning the " "'filtering_maximum_posterior_variance_ratio' or " "'filtering_minimum_posterior_variance' parameters in your " "StateSpaceModelConfiguration, or tuning the transition matrix.") symmetrized_observation_covariance = numerics.verify_tensor_all_finite( symmetrized_observation_covariance, "Predicted observation covariance was not finite. {}".format( instability_message)) diag = array_ops.matrix_diag_part(symmetrized_observation_covariance) min_diag = math_ops.reduce_min(diag) non_negative_assert = control_flow_ops.Assert( min_diag >= 0., [("The predicted observation covariance " "has a negative diagonal entry. {}").format(instability_message), min_diag]) with ops.control_dependencies([non_negative_assert]): observation_covariance_cholesky = linalg_ops.cholesky( symmetrized_observation_covariance) log_prediction_prob = distributions.MultivariateNormalTriL( predicted_observation, observation_covariance_cholesky).log_prob(observation) (posterior_state, posterior_state_var) = self.posterior_from_prior_state( prior_state=estimated_state, prior_state_var=estimated_state_covariance, observation=observation, observation_model=observation_model, predicted_observations=(predicted_observation, predicted_observation_covariance), observation_noise=observation_noise) return (posterior_state, posterior_state_var, log_prediction_prob)
def _log_abs_determinant(self): if self._is_spd: diag = array_ops.matrix_diag_part(self._chol) return 2 * math_ops.reduce_sum(math_ops.log(diag), reduction_indices=[-1]) abs_det = math_ops.abs(self.determinant()) return math_ops.log(abs_det)
def run_test_sample_consistent_mean_covariance(self, sess_run_fn, dist, num_samples=int(1e5), seed=24, rtol=1e-2, atol=0., cov_rtol=None, cov_atol=None): """Tests that sample/mean/covariance are consistent with each other. "Consistency" means that `sample`, `mean`, `covariance`, etc all correspond to the same distribution. Args: sess_run_fn: Python `callable` taking `list`-like of `Tensor`s and returning a list of results after running one "step" of TensorFlow computation, typically set to `sess.run`. dist: Distribution instance or object which implements `sample`, `log_prob`, `event_shape_tensor` and `batch_shape_tensor`. num_samples: Python `int` scalar indicating the number of Monte-Carlo samples to draw from `dist`. seed: Python `int` indicating the seed to use when sampling from `dist`. In general it is not recommended to use `None` during a test as this increases the likelihood of spurious test failure. rtol: Python `float`-type indicating the admissible relative error between analytical and sample statistics. atol: Python `float`-type indicating the admissible absolute error between analytical and sample statistics. cov_rtol: Python `float`-type indicating the admissible relative error between analytical and sample covariance. Default: rtol. cov_atol: Python `float`-type indicating the admissible absolute error between analytical and sample covariance. Default: atol. """ x = dist.sample(num_samples, seed=seed) sample_mean = math_ops.reduce_mean(x, axis=0) sample_covariance = math_ops.reduce_mean( _vec_outer_square(x - sample_mean), axis=0) sample_variance = array_ops.matrix_diag_part(sample_covariance) sample_stddev = math_ops.sqrt(sample_variance) [ sample_mean_, sample_covariance_, sample_variance_, sample_stddev_, mean_, covariance_, variance_, stddev_ ] = sess_run_fn([ sample_mean, sample_covariance, sample_variance, sample_stddev, dist.mean(), dist.covariance(), dist.variance(), dist.stddev(), ]) self.assertAllClose(mean_, sample_mean_, rtol=rtol, atol=atol) self.assertAllClose(covariance_, sample_covariance_, rtol=cov_rtol or rtol, atol=cov_atol or atol) self.assertAllClose(variance_, sample_variance_, rtol=rtol, atol=atol) self.assertAllClose(stddev_, sample_stddev_, rtol=rtol, atol=atol)
def _inverse(self, y): diag = self._diag_bijector.inverse(array_ops.matrix_diag_part(y)) return array_ops.matrix_set_diag(y, diag)
def tridiagonal_matmul(diagonals, rhs, diagonals_format='compact', name=None): r"""Multiplies tridiagonal matrix by matrix. `diagonals` is representation of 3-diagonal NxN matrix, which depends on `diagonals_format`. In `matrix` format, `diagonals` must be a tensor of shape `[..., M, M]`, with two inner-most dimensions representing the square tridiagonal matrices. Elements outside of the three diagonals will be ignored. If `sequence` format, `diagonals` is list or tuple of three tensors: `[superdiag, maindiag, subdiag]`, each having shape [..., M]. Last element of `superdiag` first element of `subdiag` are ignored. In `compact` format the three diagonals are brought together into one tensor of shape `[..., 3, M]`, with last two dimensions containing superdiagonals, diagonals, and subdiagonals, in order. Similarly to `sequence` format, elements `diagonals[..., 0, M-1]` and `diagonals[..., 2, 0]` are ignored. The `sequence` format is recommended as the one with the best performance. `rhs` is matrix to the right of multiplication. It has shape `[..., M, N]`. Example: ```python superdiag = tf.constant([-1, -1, 0], dtype=tf.float64) maindiag = tf.constant([2, 2, 2], dtype=tf.float64) subdiag = tf.constant([0, -1, -1], dtype=tf.float64) diagonals = [superdiag, maindiag, subdiag] rhs = tf.constant([[1, 1], [1, 1], [1, 1]], dtype=tf.float64) x = tf.linalg.tridiagonal_matmul(diagonals, rhs, diagonals_format='sequence') ``` Args: diagonals: A `Tensor` or tuple of `Tensor`s describing left-hand sides. The shape depends of `diagonals_format`, see description above. Must be `float32`, `float64`, `complex64`, or `complex128`. rhs: A `Tensor` of shape [..., M, N] and with the same dtype as `diagonals`. diagonals_format: one of `sequence`, or `compact`. Default is `compact`. name: A name to give this `Op` (optional). Returns: A `Tensor` of shape [..., M, N] containing the result of multiplication. Raises: ValueError: An unsupported type is provided as input, or when the input tensors have incorrect shapes. """ if diagonals_format == 'compact': superdiag = diagonals[..., 0, :] maindiag = diagonals[..., 1, :] subdiag = diagonals[..., 2, :] elif diagonals_format == 'sequence': superdiag, maindiag, subdiag = diagonals elif diagonals_format == 'matrix': m1 = tensor_shape.dimension_value(diagonals.shape[-1]) m2 = tensor_shape.dimension_value(diagonals.shape[-2]) if m1 and m2 and m1 != m2: raise ValueError( 'Expected last two dimensions of diagonals to be same, got {} and {}' .format(m1, m2)) diags = array_ops.matrix_diag_part(diagonals, k=(-1, 1), padding_value=0., align='LEFT_RIGHT') superdiag = diags[..., 0, :] maindiag = diags[..., 1, :] subdiag = diags[..., 2, :] else: raise ValueError('Unrecognized diagonals_format: %s' % diagonals_format) # C++ backend requires matrices. # Converting 1-dimensional vectors to matrices with 1 row. superdiag = array_ops.expand_dims(superdiag, -2) maindiag = array_ops.expand_dims(maindiag, -2) subdiag = array_ops.expand_dims(subdiag, -2) return linalg_ops.tridiagonal_mat_mul(superdiag, maindiag, subdiag, rhs, name)
def testInvalidShape(self): with self.assertRaisesRegexp(ValueError, "must be at least rank 2"): array_ops.matrix_diag_part(0)
def _add_to_tensor(self, x): x_diag = array_ops.matrix_diag_part(x) new_diag = self._diag + x_diag return array_ops.matrix_set_diag(x, new_diag)