def _SparseReorderGrad(op, unused_output_indices_grad, output_values_grad): """Gradients for the SparseReorder op. Args: op: the SparseReorder op unused_output_indices_grad: the incoming gradients of the output indices output_values_grad: the incoming gradients of the output values Returns: Gradient for each of the 3 input tensors: (input_indices, input_values, input_shape) The gradients for input_indices and input_shape is None. """ input_indices = op.inputs[0] input_shape = op.inputs[2] num_entries = array_ops.shape(input_indices)[0] entry_indices = math_ops.range(num_entries) sp_unordered = sparse_tensor.SparseTensor( input_indices, entry_indices, input_shape) sp_ordered = sparse_ops.sparse_reorder(sp_unordered) inverted_permutation = array_ops.invert_permutation(sp_ordered.values) return (None, array_ops.gather(output_values_grad, inverted_permutation), None)
def testInvertPermutation(self): for dtype in [dtypes.int32, dtypes.int64]: with self.test_session(use_gpu=True): x = constant_op.constant([3, 4, 0, 2, 1], dtype=dtype) y = array_ops.invert_permutation(x) self.assertAllEqual(y.get_shape(), [5]) self.assertAllEqual(y.eval(), [2, 4, 3, 0, 1])
def _ConjugateTransposeGrad(op, grad): """Returns conj(unshuffle(grad)).""" p = op.inputs[1] return [ array_ops.transpose( grad, array_ops.invert_permutation(p), conjugate=True), None ]
def reshape_inv(y): # Expand the extra dims hanging off the end, "b_extra_sh". # Note we use y_sh[:-1] + [b_main_sh[-1]] rather than b_main_sh, because y # Could have different batch dims than a and b, because of broadcasting. y_extra_shape = array_ops.concat( (array_ops.shape(y)[:-1], [b_main_sh[-1]], b_extra_sh), 0) y_extra_on_end = array_ops.reshape(y, y_extra_shape) return array_ops.transpose( y_extra_on_end, perm=array_ops.invert_permutation(perm))
def _ProdGrad(op, grad): """Gradient for Prod.""" # The gradient can be expressed by dividing the product by each entry of the # input tensor, but this approach can't deal with zeros in the input. # Here, we avoid this problem by composing the output as a product of two # cumprod operations. input_shape = array_ops.shape(op.inputs[0]) # Reshape reduction indices for the case where the parameter is a scalar reduction_indices = array_ops.reshape(op.inputs[1], [-1]) # Expand grad to full input shape output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims) grad = array_ops.reshape(grad, output_shape_kept_dims) grad = array_ops.tile(grad, tile_scaling) # Pack all reduced dimensions into a single one, so we can perform the # cumprod ops. If the reduction dims list is empty, it defaults to float32, # so we need to cast here. We put all the shape-related ops on CPU to avoid # copying back and forth, and since listdiff is CPU only. with ops.device("/cpu:0"): rank = array_ops.rank(op.inputs[0]) reduction_indices = (reduction_indices + rank) % rank reduced = math_ops.cast(reduction_indices, dtypes.int32) idx = math_ops.range(0, rank) other, _ = array_ops.setdiff1d(idx, reduced) perm = array_ops.concat([reduced, other], 0) reduced_num = math_ops.reduce_prod(array_ops.gather(input_shape, reduced)) other_num = math_ops.reduce_prod(array_ops.gather(input_shape, other)) permuted = array_ops.transpose(op.inputs[0], perm) permuted_shape = array_ops.shape(permuted) reshaped = array_ops.reshape(permuted, (reduced_num, other_num)) # Calculate product, leaving out the current entry left = math_ops.cumprod(reshaped, axis=0, exclusive=True) right = math_ops.cumprod(reshaped, axis=0, exclusive=True, reverse=True) # For complex inputs, the gradient is in the conjugate direction. y = array_ops.reshape(math_ops.conj(left) * math_ops.conj(right), permuted_shape) # Invert the transpose and reshape operations. # Make sure to set the statically known shape information through a reshape. out = grad * array_ops.transpose(y, array_ops.invert_permutation(perm)) return array_ops.reshape(out, input_shape), None
def _ProdGrad(op, grad): """Gradient for Prod.""" # The gradient can be expressed by dividing the product by each entry of the # input tensor, but this approach can't deal with zeros in the input. # Here, we avoid this problem by composing the output as a product of two # cumprod operations. input_shape = array_ops.shape(op.inputs[0]) # Expand grad to full input shape output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims) grad = array_ops.reshape(grad, output_shape_kept_dims) grad = array_ops.tile(grad, tile_scaling) # Pack all reduced dimensions into a single one, so we can perform the # cumprod ops. If the reduction dims list is empty, it defaults to float32, # so we need to cast here. reduced = math_ops.cast(op.inputs[1], dtypes.int32) idx = math_ops.range(0, array_ops.rank(op.inputs[0])) other, _ = array_ops.listdiff(idx, reduced) perm = array_ops.concat(0, [reduced, other]) reduced_num = math_ops.reduce_prod(array_ops.gather(input_shape, reduced)) other_num = math_ops.reduce_prod(array_ops.gather(input_shape, other)) permuted = array_ops.transpose(op.inputs[0], perm) permuted_shape = array_ops.shape(permuted) reshaped = array_ops.reshape(permuted, (reduced_num, other_num)) # Calculate product, leaving out the current entry left = math_ops.cumprod(reshaped, axis=0, exclusive=True) right = math_ops.cumprod(reshaped, axis=0, exclusive=True, reverse=True) y = array_ops.reshape(left * right, permuted_shape) # Invert the transpose and reshape operations. # Make sure to set the statically known shape information through a reshape. out = grad * array_ops.transpose(y, array_ops.invert_permutation(perm)) return array_ops.reshape(out, input_shape), None
def _TransposeGrad(op, grad): """Returns unshuffle(grad).""" p = op.inputs[1] return [array_ops.transpose(grad, array_ops.invert_permutation(p)), None]
def testInvertPermutationTwiceIsNoop(self): self._assertOpOutputMatchesExpected( lambda x: array_ops.invert_permutation(array_ops.invert_permutation(x)), np.array([1, 2, 0], np.int32), expected=np.array([1, 2, 0], dtype=np.int32))
def lu_reconstruct(lower_upper, perm, validate_args=False, name=None): """The reconstruct one or more matrices from their LU decomposition(s). Args: lower_upper: `lu` as returned by `tf.linalg.lu`, i.e., if `matmul(P, matmul(L, U)) = X` then `lower_upper = L + U - eye`. perm: `p` as returned by `tf.linag.lu`, i.e., if `matmul(P, matmul(L, U)) = X` then `perm = argmax(P)`. validate_args: Python `bool` indicating whether arguments should be checked for correctness. Default value: `False` (i.e., don't validate arguments). name: Python `str` name given to ops managed by this object. Default value: `None` (i.e., 'lu_reconstruct'). Returns: x: The original input to `tf.linalg.lu`, i.e., `x` as in, `lu_reconstruct(*tf.linalg.lu(x))`. #### Examples ```python import numpy as np import tensorflow as tf import tensorflow_probability as tfp x = [[[3., 4], [1, 2]], [[7., 8], [3, 4]]] x_reconstructed = tf.linalg.lu_reconstruct(*tf.linalg.lu(x)) tf.assert_near(x, x_reconstructed) # ==> True ``` """ with ops.name_scope(name or 'lu_reconstruct'): lower_upper = ops.convert_to_tensor(lower_upper, dtype_hint=dtypes.float32, name='lower_upper') perm = ops.convert_to_tensor(perm, dtype_hint=dtypes.int32, name='perm') assertions = lu_reconstruct_assertions(lower_upper, perm, validate_args) if assertions: with ops.control_dependencies(assertions): lower_upper = array_ops.identity(lower_upper) perm = array_ops.identity(perm) shape = array_ops.shape(lower_upper) lower = set_diag(band_part(lower_upper, num_lower=-1, num_upper=0), array_ops.ones(shape[:-1], dtype=lower_upper.dtype)) upper = band_part(lower_upper, num_lower=0, num_upper=-1) x = math_ops.matmul(lower, upper) if (lower_upper.shape is None or lower_upper.shape.rank is None or lower_upper.shape.rank != 2): # We either don't know the batch rank or there are >0 batch dims. batch_size = math_ops.reduce_prod(shape[:-2]) d = shape[-1] x = array_ops.reshape(x, [batch_size, d, d]) perm = array_ops.reshape(perm, [batch_size, d]) perm = map_fn.map_fn(array_ops.invert_permutation, perm) batch_indices = array_ops.broadcast_to( math_ops.range(batch_size)[:, array_ops.newaxis], [batch_size, d]) x = array_ops.gather_nd( x, array_ops.stack([batch_indices, perm], axis=-1)) x = array_ops.reshape(x, shape) else: x = array_ops.gather(x, array_ops.invert_permutation(perm)) x.set_shape(lower_upper.shape) return x
def invert_twice(x): return array_ops.invert_permutation(array_ops.invert_permutation(x))
def _inverse(self, y): return array_ops.gather(y, array_ops.invert_permutation(self.permutation), axis=-1)
def _inverse(self, y): return array_ops.gather( y, array_ops.invert_permutation(self.permutation), axis=-1)