def _makeTridiagonalMatrix(self, superdiag, maindiag, subdiag): super_pad = [[0, 0], [0, 1], [1, 0]] sub_pad = [[0, 0], [1, 0], [0, 1]] super_part = array_ops.pad(array_ops.matrix_diag(superdiag), super_pad) main_part = array_ops.matrix_diag(maindiag) sub_part = array_ops.pad(array_ops.matrix_diag(subdiag), sub_pad) return super_part + main_part + sub_part
def test_broadcast_matmul_and_solve(self): # These cannot be done in the automated (base test class) tests since they # test shapes that tf.matmul cannot handle. # In particular, tf.matmul does not broadcast. with self.test_session() as sess: x = random_ops.random_normal(shape=(2, 2, 3, 4)) # This LinearOperatorDiag will be broadcast to (2, 2, 3, 3) during solve # and matmul with 'x' as the argument. diag = random_ops.random_uniform(shape=(2, 1, 3)) operator = linalg.LinearOperatorDiag(diag, is_self_adjoint=True) self.assertAllEqual((2, 1, 3, 3), operator.shape) # Create a batch matrix with the broadcast shape of operator. diag_broadcast = array_ops.concat((diag, diag), 1) mat = array_ops.matrix_diag(diag_broadcast) self.assertAllEqual((2, 2, 3, 3), mat.get_shape()) # being pedantic. operator_matmul = operator.matmul(x) mat_matmul = math_ops.matmul(mat, x) self.assertAllEqual(operator_matmul.get_shape(), mat_matmul.get_shape()) self.assertAllClose(*sess.run([operator_matmul, mat_matmul])) operator_solve = operator.solve(x) mat_solve = linalg_ops.matrix_solve(mat, x) self.assertAllEqual(operator_solve.get_shape(), mat_solve.get_shape()) self.assertAllClose(*sess.run([operator_solve, mat_solve]))
def testSampleWithBroadcastScale(self): # mu corresponds to a 2-batch of 3-variate normals mu = np.zeros([2, 3]) # diag corresponds to no batches of 3-variate normals diag = np.ones([3]) with self.test_session(): dist = ds.VectorExponentialDiag(mu, diag, validate_args=True) mean = dist.mean() self.assertAllEqual([2, 3], mean.get_shape()) self.assertAllClose(mu + diag, mean.eval()) n = int(1e4) samps = dist.sample(n, seed=0).eval() samps_centered = samps - samps.mean(axis=0) cov_mat = array_ops.matrix_diag(diag).eval()**2 sample_cov = np.matmul(samps_centered.transpose([1, 2, 0]), samps_centered.transpose([1, 0, 2])) / n self.assertAllClose(mu + diag, samps.mean(axis=0), atol=0.10, rtol=0.05) self.assertAllClose([cov_mat, cov_mat], sample_cov, atol=0.10, rtol=0.05)
def testVector(self): with self.session(use_gpu=True): v = np.array([1.0, 2.0, 3.0]) mat = np.diag(v) v_diag = array_ops.matrix_diag(v) self.assertEqual((3, 3), v_diag.get_shape()) self.assertAllEqual(v_diag.eval(), mat)
def Test(self): np.random.seed(1) n = shape_[-1] batch_shape = shape_[:-2] np_dtype = dtype_.as_numpy_dtype a = np.random.uniform( low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype) if dtype_.is_complex: a += 1j * np.random.uniform( low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype) a += np.conj(a.T) a = np.tile(a, batch_shape + (1, 1)) if dtype_ in (dtypes_lib.float32, dtypes_lib.complex64): atol = 1e-4 else: atol = 1e-12 np_e, np_v = np.linalg.eigh(a) with self.test_session(): if compute_v_: tf_e, tf_v = linalg_ops.self_adjoint_eig(constant_op.constant(a)) # Check that V*diag(E)*V^T is close to A. a_ev = math_ops.matmul( math_ops.matmul(tf_v, array_ops.matrix_diag(tf_e)), tf_v, adjoint_b=True) self.assertAllClose(a_ev.eval(), a, atol=atol) # Compare to numpy.linalg.eigh. CompareEigenDecompositions(self, np_e, np_v, tf_e.eval(), tf_v.eval(), atol) else: tf_e = linalg_ops.self_adjoint_eigvals(constant_op.constant(a)) self.assertAllClose( np.sort(np_e, -1), np.sort(tf_e.eval(), -1), atol=atol)
def _covariance(self): if (isinstance(self.scale, linalg.LinearOperatorIdentity) or isinstance(self.scale, linalg.LinearOperatorScaledIdentity) or isinstance(self.scale, linalg.LinearOperatorDiag)): return array_ops.matrix_diag(math_ops.square(self.scale.diag_part())) else: # TODO(b/35040238): Remove transpose once LinOp supports `transpose`. return self.scale.apply(array_ops.matrix_transpose(self.scale.to_dense()))
def _testBatchVector(self, dtype): with self.cached_session(use_gpu=True): v_batch = np.array([[1.0, 0.0, 3.0], [4.0, 5.0, 6.0]]).astype(dtype) mat_batch = np.array([[[1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 3.0]], [[4.0, 0.0, 0.0], [0.0, 5.0, 0.0], [0.0, 0.0, 6.0]]]).astype(dtype) v_batch_diag = array_ops.matrix_diag(v_batch) self.assertEqual((2, 3, 3), v_batch_diag.get_shape()) self.assertAllEqual(v_batch_diag.eval(), mat_batch)
def eye( num_rows, num_columns=None, batch_shape=None, dtype=dtypes.float32, name=None): """Construct an identity matrix, or a batch of matrices. ```python # Construct one identity matrix. tf.eye(2) ==> [[1., 0.], [0., 1.]] # Construct a batch of 3 identity matricies, each 2 x 2. # batch_identity[i, :, :] is a 2 x 2 identity matrix, i = 0, 1, 2. batch_identity = tf.eye(2, batch_shape=[3]) # Construct one 2 x 3 "identity" matrix tf.eye(2, num_columns=3) ==> [[ 1., 0., 0.], [ 0., 1., 0.]] ``` Args: num_rows: Non-negative `int32` scalar `Tensor` giving the number of rows in each batch matrix. num_columns: Optional non-negative `int32` scalar `Tensor` giving the number of columns in each batch matrix. Defaults to `num_rows`. batch_shape: `int32` `Tensor`. If provided, returned `Tensor` will have leading batch dimensions of this shape. dtype: The type of an element in the resulting `Tensor` name: A name for this `Op`. Defaults to "eye". Returns: A `Tensor` of shape `batch_shape + [num_rows, num_columns]` """ with ops.name_scope( name, default_name="eye", values=[num_rows, num_columns, batch_shape]): batch_shape = [] if batch_shape is None else batch_shape batch_shape = ops.convert_to_tensor( batch_shape, name="shape", dtype=dtypes.int32) if num_columns is None: diag_size = num_rows else: diag_size = math_ops.minimum(num_rows, num_columns) diag_shape = array_ops.concat_v2((batch_shape, [diag_size]), 0) diag_ones = array_ops.ones(diag_shape, dtype=dtype) if num_columns is None: return array_ops.matrix_diag(diag_ones) else: shape = array_ops.concat_v2((batch_shape, [num_rows, num_columns]), 0) zero_matrix = array_ops.zeros(shape, dtype=dtype) return array_ops.matrix_set_diag(zero_matrix, diag_ones)
def testMultivariateNormalDiagWithSoftplusStDev(self): mu = [-1.0, 1.0] diag = [-1.0, -2.0] with self.test_session(): dist = distributions.MultivariateNormalDiagWithSoftplusStDev(mu, diag) samps = dist.sample(1000, seed=0).eval() cov_mat = array_ops.matrix_diag(nn_ops.softplus(diag)).eval()**2 self.assertAllClose(mu, samps.mean(axis=0), atol=0.1) self.assertAllClose(cov_mat, np.cov(samps.T), atol=0.1)
def testSample(self): mu = [-1., 1] diag = [1., -2] with self.cached_session(): dist = ds.MultivariateNormalDiag(mu, diag, validate_args=True) samps = dist.sample(int(1e3), seed=0).eval() cov_mat = array_ops.matrix_diag(diag).eval()**2 self.assertAllClose(mu, samps.mean(axis=0), atol=0., rtol=0.05) self.assertAllClose(cov_mat, np.cov(samps.T), atol=0.05, rtol=0.05)
def _covariance(self): # Let # W = (w1,...,wk), with wj ~ iid Exponential(0, 1). # Then this distribution is # X = loc + LW, # and then since Cov(wi, wj) = 1 if i=j, and 0 otherwise, # Cov(X) = L Cov(W W^T) L^T = L L^T. if distribution_util.is_diagonal_scale(self.scale): return array_ops.matrix_diag(math_ops.square(self.scale.diag_part())) else: return self.scale.matmul(self.scale.to_dense(), adjoint_arg=True)
def testGrad(self): shapes = ((3,), (7, 4)) with self.session(use_gpu=True): for shape in shapes: x = constant_op.constant(np.random.rand(*shape), np.float32) y = array_ops.matrix_diag(x) error = gradient_checker.compute_gradient_error(x, x.get_shape().as_list(), y, y.get_shape().as_list()) self.assertLess(error, 1e-4)
def _build_operator_and_mat(self, batch_shape, k, dtype=np.float64): # Build an identity matrix with right shape and dtype. # Build an operator that should act the same way. batch_shape = list(batch_shape) diag_shape = batch_shape + [k] matrix_shape = batch_shape + [k, k] diag = array_ops.ones(diag_shape, dtype=dtype) scale = constant_op.constant(2.0, dtype=dtype) scaled_identity_matrix = scale * array_ops.matrix_diag(diag) operator = operator_pd_identity.OperatorPDIdentity( matrix_shape, dtype, scale=scale) return operator, scaled_identity_matrix.eval()
def testSample(self): mu = [-1., 1] diag = [1., -2] with self.test_session(): dist = ds.VectorLaplaceDiag(mu, diag, validate_args=True) samps = dist.sample(int(1e4), seed=0).eval() cov_mat = 2. * array_ops.matrix_diag(diag).eval()**2 self.assertAllClose(mu, samps.mean(axis=0), atol=0., rtol=0.05) self.assertAllClose(cov_mat, np.cov(samps.T), atol=0.05, rtol=0.05)
def testSample(self): mu = [-2., 1] diag = [1., -2] with self.cached_session(): dist = ds.VectorExponentialDiag(mu, diag, validate_args=True) samps = dist.sample(int(1e4), seed=0).eval() cov_mat = array_ops.matrix_diag(diag).eval()**2 self.assertAllClose([-2 + 1, 1. - 2], samps.mean(axis=0), atol=0., rtol=0.05) self.assertAllClose(cov_mat, np.cov(samps.T), atol=0.05, rtol=0.05)
def _SelfAdjointEigV2Grad(op, grad_e, grad_v): """Gradient for SelfAdjointEigV2.""" e = op.outputs[0] compute_v = op.get_attr("compute_v") # a = op.inputs[0], which satisfies # a[...,:,:] * v[...,:,i] = e[...,i] * v[...,i] with ops.control_dependencies([grad_e, grad_v]): if compute_v: v = op.outputs[1] # Construct the matrix f(i,j) = (i != j ? 1 / (e_i - e_j) : 0). # Notice that because of the term involving f, the gradient becomes # infinite (or NaN in practice) when eigenvalues are not unique. # Mathematically this should not be surprising, since for (k-fold) # degenerate eigenvalues, the corresponding eigenvectors are only defined # up to arbitrary rotation in a (k-dimensional) subspace. f = array_ops.matrix_set_diag( math_ops.reciprocal( array_ops.expand_dims(e, -2) - array_ops.expand_dims(e, -1)), array_ops.zeros_like(e)) grad_a = math_ops.matmul( v, math_ops.matmul( array_ops.matrix_diag(grad_e) + f * math_ops.matmul(v, grad_v, adjoint_a=True), v, adjoint_b=True)) else: _, v = linalg_ops.self_adjoint_eig(op.inputs[0]) grad_a = math_ops.matmul(v, math_ops.matmul( array_ops.matrix_diag(grad_e), v, adjoint_b=True)) # The forward op only depends on the lower triangular part of a, so here we # symmetrize and take the lower triangle grad_a = array_ops.matrix_band_part( grad_a + math_ops.conj(array_ops.matrix_transpose(grad_a)), -1, 0) grad_a = array_ops.matrix_set_diag(grad_a, 0.5 * array_ops.matrix_diag_part(grad_a)) return grad_a
def _covariance(self): # Let # W = (w1,...,wk), with wj ~ iid Laplace(0, 1). # Then this distribution is # X = loc + LW, # and since E[X] = loc, # Cov(X) = E[LW W^T L^T] = L E[W W^T] L^T. # Since E[wi wj] = 0 if i != j, and 2 if i == j, we have # Cov(X) = 2 LL^T if distribution_util.is_diagonal_scale(self.scale): return 2. * array_ops.matrix_diag(math_ops.square(self.scale.diag_part())) else: return 2. * self.scale.matmul(self.scale.to_dense(), adjoint_arg=True)
def testBatchVector(self): with self.test_session(use_gpu=self._use_gpu): v_batch = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) mat_batch = np.array( [[[1.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 3.0]], [[4.0, 0.0, 0.0], [0.0, 5.0, 0.0], [0.0, 0.0, 6.0]]]) v_batch_diag = array_ops.matrix_diag(v_batch) self.assertEqual((2, 3, 3), v_batch_diag.get_shape()) self.assertAllEqual(v_batch_diag.eval(), mat_batch)
def _operator_and_matrix(self, build_info, dtype, use_placeholder): shape = list(build_info.shape) diag = linear_operator_test_util.random_sign_uniform( shape[:-1], minval=1., maxval=2., dtype=dtype) lin_op_diag = diag if use_placeholder: lin_op_diag = array_ops.placeholder_with_default(diag, shape=None) operator = linalg.LinearOperatorDiag(lin_op_diag) matrix = array_ops.matrix_diag(diag) return operator, matrix
def eye(num_rows, num_columns=None, batch_shape=None, dtype=dtypes.float32, name=None): """Construct an identity matrix, or a batch of matrices. See `linalg_ops.eye`. """ with ops.name_scope( name, default_name='eye', values=[num_rows, num_columns, batch_shape]): is_square = num_columns is None batch_shape = [] if batch_shape is None else batch_shape num_columns = num_rows if num_columns is None else num_columns # We cannot statically infer what the diagonal size should be: if (isinstance(num_rows, ops.Tensor) or isinstance(num_columns, ops.Tensor)): diag_size = math_ops.minimum(num_rows, num_columns) else: # We can statically infer the diagonal size, and whether it is square. if not isinstance(num_rows, compat.integral_types) or not isinstance( num_columns, compat.integral_types): raise TypeError( 'num_rows and num_columns must be positive integer values.') is_square = num_rows == num_columns diag_size = np.minimum(num_rows, num_columns) # We can not statically infer the shape of the tensor. if isinstance(batch_shape, ops.Tensor) or isinstance(diag_size, ops.Tensor): batch_shape = ops.convert_to_tensor( batch_shape, name='shape', dtype=dtypes.int32) diag_shape = array_ops.concat((batch_shape, [diag_size]), axis=0) if not is_square: shape = array_ops.concat((batch_shape, [num_rows, num_columns]), axis=0) # We can statically infer everything. else: batch_shape = list(batch_shape) diag_shape = batch_shape + [diag_size] if not is_square: shape = batch_shape + [num_rows, num_columns] diag_ones = array_ops.ones(diag_shape, dtype=dtype) if is_square: return array_ops.matrix_diag(diag_ones) else: zero_matrix = array_ops.zeros(shape, dtype=dtype) return array_ops.matrix_set_diag(zero_matrix, diag_ones)
def CheckApproximation(self, a, u, s, v, full_matrices_, tol): # Tests that a ~= u*diag(s)*transpose(v). batch_shape = a.shape[:-2] m = a.shape[-2] n = a.shape[-1] diag_s = math_ops.cast(array_ops.matrix_diag(s), dtype=dtype_) if full_matrices_: if m > n: zeros = array_ops.zeros(batch_shape + (m - n, n), dtype=dtype_) diag_s = array_ops.concat([diag_s, zeros], a.ndim - 2) elif n > m: zeros = array_ops.zeros(batch_shape + (m, n - m), dtype=dtype_) diag_s = array_ops.concat([diag_s, zeros], a.ndim - 1) a_recon = math_ops.matmul(u, diag_s) a_recon = math_ops.matmul(a_recon, v, adjoint_b=True) self.assertAllClose(a_recon.eval(), a, rtol=tol, atol=tol)
def _updated_mat(self, mat, v, diag): # Get dense matrix defined by its square root, which is an update of `mat`: # A = (mat + v D v^T) (mat + v D v^T)^T # D is the diagonal matrix with `diag` on the diagonal. # If diag is None, then it defaults to the identity matrix, so DV^T = V^T if diag is None: diag_vt = array_ops.matrix_transpose(v) else: diag_mat = array_ops.matrix_diag(diag) diag_vt = math_ops.matmul(diag_mat, v, adjoint_b=True) v_diag_vt = math_ops.matmul(v, diag_vt) sqrt = mat + v_diag_vt a = math_ops.matmul(sqrt, sqrt, adjoint_b=True) return a.eval()
def _operator_and_mat_and_feed_dict(self, shape, dtype, use_placeholder): diag = linear_operator_test_util.random_sign_uniform( shape[:-1], minval=1., maxval=2., dtype=dtype) if use_placeholder: diag_ph = array_ops.placeholder(dtype=dtype) # Evaluate the diag here because (i) you cannot feed a tensor, and (ii) # diag is random and we want the same value used for both mat and # feed_dict. diag = diag.eval() operator = linalg.LinearOperatorDiag(diag_ph) feed_dict = {diag_ph: diag} else: operator = linalg.LinearOperatorDiag(diag) feed_dict = None mat = array_ops.matrix_diag(diag) return operator, mat, feed_dict
def _operator_and_mat_and_feed_dict(self, build_info, dtype, use_placeholder): shape = list(build_info.shape) expected_blocks = ( build_info.__dict__["blocks"] if "blocks" in build_info.__dict__ else [shape]) diag_matrices = [ linear_operator_test_util.random_uniform( shape=block_shape[:-1], minval=1., maxval=20., dtype=dtype) for block_shape in expected_blocks ] if use_placeholder: diag_matrices_ph = [ array_ops.placeholder(dtype=dtype) for _ in expected_blocks ] diag_matrices = self.evaluate(diag_matrices) # Evaluate here because (i) you cannot feed a tensor, and (ii) # values are random and we want the same value used for both mat and # feed_dict. operator = block_diag.LinearOperatorBlockDiag( [linalg.LinearOperatorDiag(m_ph) for m_ph in diag_matrices_ph]) feed_dict = {m_ph: m for (m_ph, m) in zip( diag_matrices_ph, diag_matrices)} else: operator = block_diag.LinearOperatorBlockDiag( [linalg.LinearOperatorDiag(m) for m in diag_matrices]) feed_dict = None # Should be auto-set. self.assertTrue(operator.is_square) # Broadcast the shapes. expected_shape = list(build_info.shape) matrices = linear_operator_util.broadcast_matrix_batch_dims( [array_ops.matrix_diag(diag_block) for diag_block in diag_matrices]) block_diag_dense = _block_diag_dense(expected_shape, matrices) if not use_placeholder: block_diag_dense.set_shape( expected_shape[:-2] + [expected_shape[-1], expected_shape[-1]]) return operator, block_diag_dense, feed_dict
def variable_covariance_matrix( size, name, dtype, initial_diagonal_values=None, initial_overall_scale_log=0.): """Construct a Variable-parameterized positive definite matrix. Useful for parameterizing covariance matrices. Args: size: The size of the main diagonal, the returned matrix having shape [size x size]. name: The name to use when defining variables and ops. dtype: The floating point data type to use. initial_diagonal_values: A Tensor with shape [size] with initial values for the diagonal values of the returned matrix. Must be positive. initial_overall_scale_log: Initial value of the bias term for every element of the matrix in log space. Returns: A Variable-parameterized covariance matrix with shape [size x size]. """ raw_values = variable_scope.get_variable( name + "_pre_transform", dtype=dtype, shape=[size, size], initializer=init_ops.zeros_initializer()) if initial_diagonal_values is not None: raw_values += array_ops.matrix_diag(math_ops.log(initial_diagonal_values)) return array_ops.identity( sign_magnitude_positive_definite( raw=raw_values, off_diagonal_scale=variable_scope.get_variable( name + "_off_diagonal_scale", dtype=dtype, initializer=constant_op.constant(-5., dtype=dtype)), overall_scale=ops.convert_to_tensor( initial_overall_scale_log, dtype=dtype) + variable_scope.get_variable( name + "_overall_scale", dtype=dtype, shape=[], initializer=init_ops.zeros_initializer())), name=name)
def eye(num_rows, num_columns=None, batch_shape=None, dtype=dtypes.float32, name=None): """Construct an identity matrix, or a batch of matrices. See `linalg_ops.eye`. """ with ops.name_scope( name, default_name='eye', values=[num_rows, num_columns, batch_shape]): is_square = num_columns is None batch_shape = [] if batch_shape is None else batch_shape num_columns = num_rows if num_columns is None else num_columns if isinstance(num_rows, ops.Tensor) or isinstance( num_columns, ops.Tensor) or isinstance(batch_shape, ops.Tensor): batch_shape = ops.convert_to_tensor( batch_shape, name='shape', dtype=dtypes.int32) diag_size = math_ops.minimum(num_rows, num_columns) diag_shape = array_ops.concat((batch_shape, [diag_size]), 0) if not is_square: shape = array_ops.concat((batch_shape, [num_rows, num_columns]), 0) else: if not isinstance(num_rows, compat.integral_types) or not isinstance( num_columns, compat.integral_types): raise TypeError( 'num_rows and num_columns must be positive integer values.') batch_shape = [dim for dim in batch_shape] is_square = num_rows == num_columns diag_shape = batch_shape + [np.minimum(num_rows, num_columns)] if not is_square: shape = batch_shape + [num_rows, num_columns] diag_ones = array_ops.ones(diag_shape, dtype=dtype) if is_square: return array_ops.matrix_diag(diag_ones) else: zero_matrix = array_ops.zeros(shape, dtype=dtype) return array_ops.matrix_set_diag(zero_matrix, diag_ones)
def operator_and_matrix( self, build_info, dtype, use_placeholder, ensure_self_adjoint_and_pd=False): shape = list(build_info.shape) diag = linear_operator_test_util.random_sign_uniform( shape[:-1], minval=1., maxval=2., dtype=dtype) if ensure_self_adjoint_and_pd: # Abs on complex64 will result in a float32, so we cast back up. diag = math_ops.cast(math_ops.abs(diag), dtype=dtype) lin_op_diag = diag if use_placeholder: lin_op_diag = array_ops.placeholder_with_default(diag, shape=None) operator = linalg.LinearOperatorDiag( lin_op_diag, is_self_adjoint=True if ensure_self_adjoint_and_pd else None, is_positive_definite=True if ensure_self_adjoint_and_pd else None) matrix = array_ops.matrix_diag(diag) return operator, matrix
def _SvdGrad(op, grad_s, grad_u, grad_v): """Gradient for the singular value decomposition.""" # The derivation for the compute_uv=False case, and most of # the derivation for the full_matrices=True case, are in # Giles' paper (see reference at top of file). A derivation for # the full_matrices=False case is available at # https://j-towns.github.io/papers/svd-derivative.pdf a = op.inputs[0] a_shape = a.get_shape().with_rank_at_least(2) if op.get_attr("compute_uv"): # TODO (rmlarsen): Make this work with complex types. id:3169 gh:3170 if a.dtype.is_complex: raise NotImplementedError( "SVD gradient is not implemented for complex types and " "compute_uv=True.") grad_u_shape = grad_u.get_shape().with_rank_at_least(2) grad_v_shape = grad_v.get_shape().with_rank_at_least(2) m = a_shape[-2].merge_with(grad_u_shape[-2]) n = a_shape[-1].merge_with(grad_v_shape[-2]) batch_shape = a_shape[:-2].merge_with(grad_u_shape[:-2]).merge_with( grad_v_shape[:-2]) a_shape = batch_shape.concatenate([m, n]) m = a_shape[-2].value n = a_shape[-1].value # TODO (rmlarsen): Make this work with placeholders. id:3403 gh:3405 if m is None or n is None: raise NotImplementedError( "SVD gradient has not been implemented for input with unknown " "inner matrix shape.") if not op.get_attr("compute_uv"): s, u, v = linalg_ops.svd(a, compute_uv=True, full_matrices=True) else: s = op.outputs[0] u = op.outputs[1] v = op.outputs[2] use_adjoint = False if m > n: # Compute the gradient for A^H = V * S^T * U^H, and (implicitly) take the # Hermitian transpose of the gradient at the end. use_adjoint = True m, n = n, m u, v = v, u grad_u, grad_v = grad_v, grad_u with ops.control_dependencies([grad_s, grad_u, grad_v]): grad_s_mat = array_ops.matrix_diag(grad_s) if not op.get_attr("compute_uv"): if use_adjoint: grad_a = math_ops.matmul(v[..., :, :m], math_ops.matmul(u, grad_s_mat), adjoint_b=True) else: grad_a = math_ops.matmul( u, math_ops.matmul(grad_s_mat, v[..., :, :m], adjoint_b=True)) grad_a.set_shape(a_shape) return grad_a if op.get_attr("full_matrices") and abs(m - n) > 1: raise NotImplementedError( "svd gradient is not implemented for abs(m - n) > 1 " "when full_matrices is True") s_mat = array_ops.matrix_diag(s) s2 = math_ops.square(s) # NOTICE: Because of the term involving f, the gradient becomes # infinite (or NaN in practice) when singular values are not unique. # Mathematically this should not be surprising, since for (k-fold) # degenerate singular values, the corresponding singular vectors are # only defined up a (k-dimensional) subspace. In practice, this can # lead to numerical instability when singular values are close but not # exactly equal. f = array_ops.matrix_set_diag( math_ops.reciprocal( array_ops.expand_dims(s2, -2) - array_ops.expand_dims(s2, -1)), array_ops.zeros_like(s)) s_inv_mat = array_ops.matrix_diag(math_ops.reciprocal(s)) v1 = v[..., :, :m] grad_v1 = grad_v[..., :, :m] u_gu = math_ops.matmul(u, grad_u, adjoint_a=True) v_gv = math_ops.matmul(v1, grad_v1, adjoint_a=True) f_u = f * u_gu f_v = f * v_gv term1_nouv = (grad_s_mat + math_ops.matmul(f_u + _linalg.adjoint(f_u), s_mat) + math_ops.matmul(s_mat, f_v + _linalg.adjoint(f_v))) term1 = math_ops.matmul( u, math_ops.matmul(term1_nouv, v1, adjoint_b=True)) if m == n: grad_a_before_transpose = term1 else: gv1t = array_ops.matrix_transpose(grad_v1) gv1t_v1 = math_ops.matmul(gv1t, v1) term2_nous = gv1t - math_ops.matmul(gv1t_v1, v1, adjoint_b=True) if op.get_attr("full_matrices"): v2 = v[..., :, m:n] grad_v2 = grad_v[..., :, m:n] v1t_gv2 = math_ops.matmul(v1, grad_v2, adjoint_a=True) term2_nous -= math_ops.matmul(v1t_gv2, v2, adjoint_b=True) u_s_inv = math_ops.matmul(u, s_inv_mat) term2 = math_ops.matmul(u_s_inv, term2_nous) grad_a_before_transpose = term1 + term2 if use_adjoint: grad_a = array_ops.matrix_transpose(grad_a_before_transpose) else: grad_a = grad_a_before_transpose grad_a.set_shape(a_shape) return grad_a
def operator_and_matrix(self, shape_info, dtype, use_placeholder, ensure_self_adjoint_and_pd=False): # Recall A = L + UDV^H shape = list(shape_info.shape) diag_shape = shape[:-1] k = shape[-2] // 2 + 1 u_perturbation_shape = shape[:-1] + [k] diag_update_shape = shape[:-2] + [k] # base_operator L will be a symmetric positive definite diagonal linear # operator, with condition number as high as 1e4. base_diag = self._gen_positive_diag(dtype, diag_shape) lin_op_base_diag = base_diag # U u = linear_operator_test_util.random_normal_correlated_columns( u_perturbation_shape, dtype=dtype) lin_op_u = u # V v = linear_operator_test_util.random_normal_correlated_columns( u_perturbation_shape, dtype=dtype) lin_op_v = v # D if self._is_diag_update_positive or ensure_self_adjoint_and_pd: diag_update = self._gen_positive_diag(dtype, diag_update_shape) else: diag_update = linear_operator_test_util.random_normal( diag_update_shape, stddev=1e-4, dtype=dtype) lin_op_diag_update = diag_update if use_placeholder: lin_op_base_diag = array_ops.placeholder_with_default(base_diag, shape=None) lin_op_u = array_ops.placeholder_with_default(u, shape=None) lin_op_v = array_ops.placeholder_with_default(v, shape=None) lin_op_diag_update = array_ops.placeholder_with_default( diag_update, shape=None) base_operator = linalg.LinearOperatorDiag(lin_op_base_diag, is_positive_definite=True, is_self_adjoint=True) operator = linalg.LinearOperatorLowRankUpdate( base_operator, lin_op_u, v=lin_op_v if self._use_v else None, diag_update=lin_op_diag_update if self._use_diag_update else None, is_diag_update_positive=self._is_diag_update_positive) # The matrix representing L base_diag_mat = array_ops.matrix_diag(base_diag) # The matrix representing D diag_update_mat = array_ops.matrix_diag(diag_update) # Set up mat as some variant of A = L + UDV^H if self._use_v and self._use_diag_update: # In this case, we have L + UDV^H and it isn't symmetric. expect_use_cholesky = False matrix = base_diag_mat + math_ops.matmul( u, math_ops.matmul(diag_update_mat, v, adjoint_b=True)) elif self._use_v: # In this case, we have L + UDV^H and it isn't symmetric. expect_use_cholesky = False matrix = base_diag_mat + math_ops.matmul(u, v, adjoint_b=True) elif self._use_diag_update: # In this case, we have L + UDU^H, which is PD if D > 0, since L > 0. expect_use_cholesky = self._is_diag_update_positive matrix = base_diag_mat + math_ops.matmul( u, math_ops.matmul(diag_update_mat, u, adjoint_b=True)) else: # In this case, we have L + UU^H, which is PD since L > 0. expect_use_cholesky = True matrix = base_diag_mat + math_ops.matmul(u, u, adjoint_b=True) if expect_use_cholesky: self.assertTrue(operator._use_cholesky) else: self.assertFalse(operator._use_cholesky) return operator, matrix
def _MatrixDiagPartGrad(op, grad): matrix_shape = op.inputs[0].get_shape()[-2:] if matrix_shape.is_fully_defined() and matrix_shape[0] == matrix_shape[1]: return array_ops.matrix_diag(grad) else: return array_ops.matrix_set_diag(array_ops.zeros_like(op.inputs[0]), grad)
def _to_dense(self): return array_ops.matrix_diag(self._diag)
def _operator_and_mat_and_feed_dict(self, shape, dtype, use_placeholder): # Recall A = L + UDV^H shape = list(shape) diag_shape = shape[:-1] k = shape[-2] // 2 + 1 u_perturbation_shape = shape[:-1] + [k] diag_update_shape = shape[:-2] + [k] # base_operator L will be a symmetric positive definite diagonal linear # operator, with condition number as high as 1e4. base_diag = linear_operator_test_util.random_uniform( diag_shape, minval=1e-4, maxval=1., dtype=dtype) base_diag_ph = array_ops.placeholder(dtype=dtype) # U u = linear_operator_test_util.random_normal_correlated_columns( u_perturbation_shape, dtype=dtype) u_ph = array_ops.placeholder(dtype=dtype) # V v = linear_operator_test_util.random_normal_correlated_columns( u_perturbation_shape, dtype=dtype) v_ph = array_ops.placeholder(dtype=dtype) # D if self._is_diag_update_positive: diag_update = linear_operator_test_util.random_uniform( diag_update_shape, minval=1e-4, maxval=1., dtype=dtype) else: diag_update = linear_operator_test_util.random_normal( diag_update_shape, stddev=1e-4, dtype=dtype) diag_update_ph = array_ops.placeholder(dtype=dtype) if use_placeholder: # Evaluate here because (i) you cannot feed a tensor, and (ii) # values are random and we want the same value used for both mat and # feed_dict. base_diag = base_diag.eval() u = u.eval() v = v.eval() diag_update = diag_update.eval() # In all cases, set base_operator to be positive definite. base_operator = linalg.LinearOperatorDiag( base_diag_ph, is_positive_definite=True) operator = linalg.LinearOperatorUDVHUpdate( base_operator, u=u_ph, v=v_ph if self._use_v else None, diag_update=diag_update_ph if self._use_diag_update else None, is_diag_update_positive=self._is_diag_update_positive) feed_dict = { base_diag_ph: base_diag, u_ph: u, v_ph: v, diag_update_ph: diag_update} else: base_operator = linalg.LinearOperatorDiag( base_diag, is_positive_definite=True) operator = linalg.LinearOperatorUDVHUpdate( base_operator, u, v=v if self._use_v else None, diag_update=diag_update if self._use_diag_update else None, is_diag_update_positive=self._is_diag_update_positive) feed_dict = None # The matrix representing L base_diag_mat = array_ops.matrix_diag(base_diag) # The matrix representing D diag_update_mat = array_ops.matrix_diag(diag_update) # Set up mat as some variant of A = L + UDV^H if self._use_v and self._use_diag_update: # In this case, we have L + UDV^H and it isn't symmetric. expect_use_cholesky = False mat = base_diag_mat + math_ops.matmul( u, math_ops.matmul(diag_update_mat, v, adjoint_b=True)) elif self._use_v: # In this case, we have L + UDV^H and it isn't symmetric. expect_use_cholesky = False mat = base_diag_mat + math_ops.matmul(u, v, adjoint_b=True) elif self._use_diag_update: # In this case, we have L + UDU^H, which is PD if D > 0, since L > 0. expect_use_cholesky = self._is_diag_update_positive mat = base_diag_mat + math_ops.matmul( u, math_ops.matmul(diag_update_mat, u, adjoint_b=True)) else: # In this case, we have L + UU^H, which is PD since L > 0. expect_use_cholesky = True mat = base_diag_mat + math_ops.matmul(u, u, adjoint_b=True) if expect_use_cholesky: self.assertTrue(operator._use_cholesky) else: self.assertFalse(operator._use_cholesky) return operator, mat, feed_dict
def testInvalidShapeAtEval(self): with self.test_session(use_gpu=True): v = array_ops.placeholder(dtype=dtypes_lib.float32) with self.assertRaisesOpError("input must be at least 1-dim"): array_ops.matrix_diag(v).eval(feed_dict={v: 0.0})
def _MatrixDiagPartGrad(_, grad): return array_ops.matrix_diag(grad)
def _covariance(self): if distribution_util.is_diagonal_scale(self.scale): return array_ops.matrix_diag(math_ops.square(self.scale.diag_part())) else: return self.scale.matmul(self.scale.to_dense(), adjoint_arg=True)
def _SvdGrad(op, grad_s, grad_u, grad_v): """Gradient for Svd based on Giles' algorithm. Reference at top of file.""" if op.get_attr("compute_uv") and not op.get_attr("full_matrices"): raise NotImplementedError( "SVD gradient is not implemented for compute_uv=True and " "full_matrices=False.") a = op.inputs[0] a_shape = a.get_shape().with_rank_at_least(2) if op.get_attr("compute_uv"): # TODO(rmlarsen): Make this work with complex types. if a.dtype.is_complex: raise NotImplementedError( "SVD gradient is not implemented for complex types and " "compute_uv=True.") grad_u_shape = grad_u.get_shape().with_rank_at_least(2) grad_v_shape = grad_v.get_shape().with_rank_at_least(2) m = a_shape[-2].merge_with(grad_u_shape[-2]) n = a_shape[-1].merge_with(grad_v_shape[-2]) batch_shape = a_shape[:-2].merge_with(grad_u_shape[:-2]).merge_with( grad_v_shape[:-2]) a_shape = batch_shape.concatenate([m, n]) m = a_shape[-2].value n = a_shape[-1].value # TODO(rmlarsen): Make this work with placeholders. if m is None or n is None: raise NotImplementedError( "SVD gradient has not been implemented for input with unknown " "inner matrix shape.") if not op.get_attr("full_matrices") or not op.get_attr("compute_uv"): s, u, v = linalg_ops.svd(a, compute_uv=True, full_matrices=True) else: s = op.outputs[0] u = op.outputs[1] v = op.outputs[2] use_adjoint = False if m > n: # Compute the gradient for A^H = V * S^T * U^H, and (implicitly) take the # Hermitian transpose of the gradient at the end. use_adjoint = True m, n = n, m u, v = v, u grad_u, grad_v = grad_v, grad_u with ops.control_dependencies([grad_s, grad_u, grad_v]): grad_s_mat = array_ops.matrix_diag(grad_s) if not op.get_attr("compute_uv"): if use_adjoint: grad_a = math_ops.matmul(v[..., :, :m], math_ops.matmul(u, grad_s_mat), adjoint_b=True) else: grad_a = math_ops.matmul( u, math_ops.matmul(grad_s_mat, v[..., :, :m], adjoint_b=True)) grad_a.set_shape(a_shape) return grad_a # TODO(rmlarsen): Define a gradient that is numerically stable for # abs(m-n) > 1. Currently this does not work because there are effectively # multiple singular values with value zero. I am not sure if this is a true # instability or if it simply throws off the finite difference gradient # checker. if abs(m - n) > 1: raise NotImplementedError( "svd gradient is not implemented for abs(m - n) > 1") s_mat = array_ops.matrix_diag(s) s2 = math_ops.square(s) # NOTICE: Because of the term involving f, the gradient becomes # infinite (or NaN in practice) when singular values are not unique. # Mathematically this should not be surprising, since for (k-fold) # degenerate singular values, the corresponding singular vectors are # only defined up a (k-dimensional) subspace. In practice, this can # lead to numerical instability when singular values are close but not # exactly equal. f = array_ops.matrix_set_diag( math_ops.reciprocal( array_ops.expand_dims(s2, -2) - array_ops.expand_dims(s2, -1)), array_ops.zeros_like(s)) s_inv_mat = array_ops.matrix_diag(math_ops.reciprocal(s)) u_gu = math_ops.matmul(u, grad_u, adjoint_a=True) v_gv = math_ops.matmul(v, grad_v, adjoint_a=True) if m == n: f_u = f * u_gu f_v = f * v_gv else: dv2 = array_ops.matrix_transpose( v_gv[..., m:n, :m]) - v_gv[..., :m, m:n] f_u = f * u_gu f_v = f * v_gv[..., :m, :m] grad_a_nouv = (grad_s_mat + math_ops.matmul(f_u + _linalg.adjoint(f_u), s_mat) + math_ops.matmul(s_mat, f_v + _linalg.adjoint(f_v))) if m != n: grad_a_nouv = array_ops.concat( [grad_a_nouv, math_ops.matmul(s_inv_mat, dv2)], -1) if use_adjoint: # Use (U X V^H)^H = V (U X)^H. grad_a = math_ops.matmul(v, math_ops.matmul(u, grad_a_nouv), adjoint_b=True) else: grad_a = math_ops.matmul( u, math_ops.matmul(grad_a_nouv, v, adjoint_b=True)) grad_a.set_shape(a_shape) return grad_a
def _to_dense(self): diag = array_ops.ones(self.vector_shape(), dtype=self.dtype) dense = array_ops.matrix_diag(diag) dense.set_shape(self.get_shape()) return dense
def testRectangularBatch(self): # LINT.IfChange if compat.forward_compatible(2019, 8, 31): # LINT.ThenChange(//tensorflow/python/ops/array_ops.py) with self.cached_session(use_gpu=True): # Stores expected num_rows and num_cols (when the other is given). # expected[(d_lower, d_upper)] = (expected_num_rows, expected_num_cols) test_list = list() # Square cases: expected = { (-1, -1): (5, 4), (-4, -3): (5, 2), (-2, 1): (5, 5), (2, 4): (3, 5), } test_list.append((expected, square_cases())) # More cases: expected = {(-3, -1): (5, 4), (-1, 1): (4, 4), (2, 4): (4, 6)} test_list.append((expected, self._moreCases())) # Tall cases expected = { (0, 0): (3, 3), (-4, -3): (5, 2), (-2, -1): (4, 3), (-2, 1): (3, 3), (1, 2): (2, 3) } test_list.append((expected, tall_cases())) # Fat cases expected = { (2, 2): (2, 4), (-2, 0): (3, 3), (-1, 1): (3, 3), (0, 3): (3, 3) } test_list.append((expected, fat_cases())) for padding_value in [0, 555, -11]: # Giving both num_rows and num_cols for _, tests in [tall_cases(), fat_cases()]: for diags, (vecs, solution) in tests.items(): v_diags = array_ops.matrix_diag( vecs, k=diags, num_rows=solution.shape[-2], num_cols=solution.shape[-1], padding_value=padding_value) mask = solution == 0 solution = solution + padding_value * mask self.assertEqual(v_diags.get_shape(), solution.shape) self.assertAllEqual(v_diags.eval(), solution) # Giving just num_rows. for expected, (_, tests) in test_list: for diags, (_, new_num_cols) in expected.items(): vecs, solution = tests[diags] solution = solution.take(indices=range(new_num_cols), axis=-1) v_diags = array_ops.matrix_diag( vecs, k=diags, num_rows=solution.shape[-2], padding_value=padding_value) mask = solution == 0 solution = solution + padding_value * mask self.assertEqual(v_diags.get_shape(), solution.shape) self.assertAllEqual(v_diags.eval(), solution) # Giving just num_cols. for expected, (_, tests) in test_list: for diags, (new_num_rows, _) in expected.items(): vecs, solution = tests[diags] solution = solution.take(indices=range(new_num_rows), axis=-2) v_diags = array_ops.matrix_diag( vecs, k=diags, num_cols=solution.shape[-1], padding_value=padding_value) mask = solution == 0 solution = solution + padding_value * mask self.assertEqual(v_diags.get_shape(), solution.shape) self.assertAllEqual(v_diags.eval(), solution)
def loop_fn(i): return array_ops.matrix_diag(array_ops.gather(x, i))
def loop_fn(i): diagonal = array_ops.gather(x, i) return array_ops.matrix_diag( diagonal, k=(0, 1), num_rows=4, num_cols=5, align="RIGHT_LEFT")
def _diag_to_matrix(self, diag): return array_ops.matrix_diag(diag**2).eval()
def testInvalidShape(self): with self.assertRaisesRegexp(ValueError, "must be at least rank 1"): array_ops.matrix_diag(0)
def _diag(v, k): return np_utils.cond( math_ops.equal(array_ops.size(v), 0), lambda: array_ops.zeros([abs(k), abs(k)], dtype=v.dtype), lambda: array_ops.matrix_diag(v, k=k))
def eye(num_rows, num_columns=None, batch_shape=None, dtype=dtypes.float32, name=None): """Construct an identity matrix, or a batch of matrices. ```python # Construct one identity matrix. tf.eye(2) ==> [[1., 0.], [0., 1.]] # Construct a batch of 3 identity matricies, each 2 x 2. # batch_identity[i, :, :] is a 2 x 2 identity matrix, i = 0, 1, 2. batch_identity = tf.eye(2, batch_shape=[3]) # Construct one 2 x 3 "identity" matrix tf.eye(2, num_columns=3) ==> [[ 1., 0., 0.], [ 0., 1., 0.]] ``` Args: num_rows: Non-negative `int32` scalar `Tensor` giving the number of rows in each batch matrix. num_columns: Optional non-negative `int32` scalar `Tensor` giving the number of columns in each batch matrix. Defaults to `num_rows`. batch_shape: A list or tuple of Python integers or a 1-D `int32` `Tensor`. If provided, the returned `Tensor` will have leading batch dimensions of this shape. dtype: The type of an element in the resulting `Tensor` name: A name for this `Op`. Defaults to "eye". Returns: A `Tensor` of shape `batch_shape + [num_rows, num_columns]` """ with ops.name_scope( name, default_name='eye', values=[num_rows, num_columns, batch_shape]): is_square = num_columns is None batch_shape = [] if batch_shape is None else batch_shape num_columns = num_rows if num_columns is None else num_columns if isinstance(num_rows, ops.Tensor) or isinstance( num_columns, ops.Tensor) or isinstance(batch_shape, ops.Tensor): batch_shape = ops.convert_to_tensor( batch_shape, name='shape', dtype=dtypes.int32) diag_size = math_ops.minimum(num_rows, num_columns) diag_shape = array_ops.concat((batch_shape, [diag_size]), 0) if not is_square: shape = array_ops.concat((batch_shape, [num_rows, num_columns]), 0) else: if not isinstance(num_rows, compat.integral_types) or not isinstance( num_columns, compat.integral_types): raise TypeError( 'num_rows and num_columns must be positive integer values.') batch_shape = [dim for dim in batch_shape] is_square = num_rows == num_columns diag_shape = batch_shape + [np.minimum(num_rows, num_columns)] if not is_square: shape = batch_shape + [num_rows, num_columns] diag_ones = array_ops.ones(diag_shape, dtype=dtype) if is_square: return array_ops.matrix_diag(diag_ones) else: zero_matrix = array_ops.zeros(shape, dtype=dtype) return array_ops.matrix_set_diag(zero_matrix, diag_ones)
def tridiag(below=None, diag=None, above=None, name=None): """Creates a matrix with values set above, below, and on the diagonal. Example: ```python tridiag(below=[1., 2., 3.], diag=[4., 5., 6., 7.], above=[8., 9., 10.]) # ==> array([[ 4., 8., 0., 0.], # [ 1., 5., 9., 0.], # [ 0., 2., 6., 10.], # [ 0., 0., 3., 7.]], dtype=float32) ``` Warning: This Op is intended for convenience, not efficiency. Args: below: `Tensor` of shape `[B1, ..., Bb, d-1]` corresponding to the below diagonal part. `None` is logically equivalent to `below = 0`. diag: `Tensor` of shape `[B1, ..., Bb, d]` corresponding to the diagonal part. `None` is logically equivalent to `diag = 0`. above: `Tensor` of shape `[B1, ..., Bb, d-1]` corresponding to the above diagonal part. `None` is logically equivalent to `above = 0`. name: Python `str`. The name to give this op. Returns: tridiag: `Tensor` with values set above, below and on the diagonal. Raises: ValueError: if all inputs are `None`. """ def _pad(x): """Prepends and appends a zero to every vector in a batch of vectors.""" shape = array_ops.concat([array_ops.shape(x)[:-1], [1]], axis=0) z = array_ops.zeros(shape, dtype=x.dtype) return array_ops.concat([z, x, z], axis=-1) def _add(*x): """Adds list of Tensors, ignoring `None`.""" s = None for y in x: if y is None: continue elif s is None: s = y else: s += y if s is None: raise ValueError( "Must specify at least one of `below`, `diag`, `above`.") return s with ops.name_scope(name, "tridiag", [below, diag, above]): if below is not None: below = ops.convert_to_tensor(below, name="below") below = array_ops.matrix_diag(_pad(below))[..., :-1, 1:] if diag is not None: diag = ops.convert_to_tensor(diag, name="diag") diag = array_ops.matrix_diag(diag) if above is not None: above = ops.convert_to_tensor(above, name="above") above = array_ops.matrix_diag(_pad(above))[..., 1:, :-1] # TODO(jvdillon): Consider using scatter_nd instead of creating three full # matrices. return _add(below, diag, above)
def loop_fn(i): diagonal = array_ops.gather(x, i) if compat.forward_compatible(2019, 10, 31): return array_ops.matrix_diag(diagonal, k=(0, 1), num_rows=4, num_cols=5) return array_ops.matrix_diag(diagonal)
def _to_dense(self): return array_ops.matrix_diag(math_ops.square(self._diag))
def _sqrt_to_dense(self): diag = array_ops.ones(self.vector_shape(), dtype=self.dtype) dense = array_ops.matrix_diag(diag) dense.set_shape(self.get_shape()) return math_ops.sqrt(self._scale) * dense
def transition_power_noise_accumulator(self, num_steps, noise_addition_coefficient=1): r"""Sum the transitioned covariance matrix over a number of steps. Assumes that state_transition_noise_covariance is a matrix with a single non-zero value in the upper left. Args: num_steps: A [...] shape integer Tensor with numbers of steps to compute power sums for. noise_addition_coefficient: A multiplier for the state transition noise covariance (used in ResolutionCycleModel to compute multiples of full period sums). Returns: The computed power sum, with shape [..., state dimension, state dimension] containing: [\sum_{p=0}^{num_steps - 1} ( state_transition^p * state_transition_noise_covariance * (state_transition^p)^T)]_{i, j} = { -contribution_{j + 1} if j == i - 1 contribution_{j + 1} + contribution{j} if j == i -contribution_{j} if j == i + 1 0 otherwise } contribution_k = noise_scalar * ((num_steps + self._periodicity - 1 - (k % self._periodicity)) // self._periodicity) Where contribution_k is the sum of noise_scalar additions to component k of the periodicity. """ noise_addition_scalar = array_ops.squeeze( self.state_transition_noise_covariance, axis=[-1, -2]) period_range_reshaped = array_ops.reshape( math_ops.range(self._periodicity, dtype=num_steps.dtype), array_ops.concat([ array_ops.ones([array_ops.rank(num_steps)], dtype=dtypes.int32), [self._periodicity] ], axis=0)) reversed_remaining_steps = ((period_range_reshaped - (num_steps[..., None] - 1)) % self._periodicity) period_additions_reversed = ( ops.convert_to_tensor(noise_addition_coefficient, self.dtype)[..., None] * noise_addition_scalar * math_ops.cast( (num_steps[..., None] + reversed_remaining_steps) // self._periodicity, dtype=self.dtype)) period_additions_diag = array_ops.matrix_diag( period_additions_reversed) upper_band = array_ops.concat([ array_ops.zeros_like(period_additions_diag[..., :-1, 0:1]), -period_additions_diag[..., :-1, 0:-2] ], axis=-1) lower_band = array_ops.concat([ array_ops.zeros_like(period_additions_diag[..., 0:1, :-1]), -period_additions_diag[..., 0:-2, :-1] ], axis=-2) period_additions_rotated = array_ops.concat([ period_additions_reversed[..., -1:], period_additions_reversed[..., :-2] ], axis=-1) diagonal = array_ops.matrix_diag(period_additions_reversed[..., :-1] + period_additions_rotated) return diagonal + lower_band + upper_band