def _ReciprocalGradGrad(op, grad): b = op.inputs[1] # op.output[0]: y = -b * conj(a)^2 with ops.control_dependencies([grad]): ca = math_ops.conj(op.inputs[0]) cg = math_ops.conj(grad) return cg * -2.0 * b * ca, gen_math_ops.reciprocal_grad(ca, grad)
def _SigmoidGradGrad(op, grad): with ops.control_dependencies([grad.op]): a = math_ops.conj(op.inputs[0]) b = math_ops.conj(op.inputs[1]) gb = grad * b # pylint: disable=protected-access return gb - 2.0 * gb * a, gen_math_ops._sigmoid_grad(a, grad)
def _ReciprocalGradGrad(op, grad): b = op.inputs[1] # op.output[0]: y = -b * conj(a)^2 with ops.control_dependencies([grad.op]): ca = math_ops.conj(op.inputs[0]) cg = math_ops.conj(grad) # pylint: disable=protected-access return cg * -2.0 * b * ca, gen_math_ops._reciprocal_grad(ca, grad)
def _RsqrtGradGrad(op, grad): """Returns backprop gradient for f(a,b) = -0.5 * b * conj(a)^3.""" a = op.inputs[0] # a = x^{-1/2} b = op.inputs[1] # backprop gradient for a with ops.control_dependencies([grad]): ca = math_ops.conj(a) cg = math_ops.conj(grad) grad_a = -1.5 * cg * b * math_ops.square(ca) grad_b = gen_math_ops.rsqrt_grad(ca, grad) return grad_a, grad_b
def _DivGrad(op, grad): x = op.inputs[0] y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) # pylint: disable=protected-access x = math_ops.conj(x) y = math_ops.conj(y) return (array_ops.reshape(math_ops.reduce_sum(grad / y, rx), sx), array_ops.reshape(math_ops.reduce_sum(grad * (-x / math_ops.square(y)), ry), sy))
def _MulGrad(op, grad): """The gradient of scalar multiplication.""" x = op.inputs[0] y = op.inputs[1] assert x.dtype.base_dtype == y.dtype.base_dtype, (x.dtype, " vs. ", y.dtype) sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) x = math_ops.conj(x) y = math_ops.conj(y) return (array_ops.reshape(math_ops.reduce_sum(grad * y, rx), sx), array_ops.reshape(math_ops.reduce_sum(x * grad, ry), sy))
def _DivGrad(op, grad): """The gradient for the Div operator.""" x = op.inputs[0] y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) x = math_ops.conj(x) y = math_ops.conj(y) return (array_ops.reshape(math_ops.reduce_sum(math_ops.div(grad, y), rx), sx), array_ops.reshape( math_ops.reduce_sum(grad * math_ops.div(math_ops.div(-x, y), y), ry), sy))
def _MulGrad(op, grad): x = op.inputs[0] y = op.inputs[1] assert x.dtype.base_dtype == y.dtype.base_dtype, (x.dtype, " vs. ", y.dtype) sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) if x.dtype.base_dtype == dtypes.complex64: return (array_ops.reshape(math_ops.reduce_sum(grad * math_ops.conj(y), rx), sx), array_ops.reshape(math_ops.reduce_sum(math_ops.conj(x) * grad, ry), sy)) else: return (array_ops.reshape(math_ops.reduce_sum(grad * y, rx), sx), array_ops.reshape(math_ops.reduce_sum(x * grad, ry), sy))
def _FloorModGrad(op, grad): """Returns grad * (1, -floor(x/y)).""" x = math_ops.conj(op.inputs[0]) y = math_ops.conj(op.inputs[1]) sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) floor_xy = math_ops.floor_div(x, y) gx = array_ops.reshape(math_ops.reduce_sum(grad, rx), sx) gy = array_ops.reshape( math_ops.reduce_sum(grad * math_ops.negative(floor_xy), ry), sy) return gx, gy
def _DivGrad(op, grad): """The gradient for the Div operator.""" x = op.inputs[0] y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) # pylint: disable=protected-access rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) # pylint: enable=protected-access x = math_ops.conj(x) y = math_ops.conj(y) return (array_ops.reshape(math_ops.reduce_sum(math_ops.div(grad, y), rx), sx), array_ops.reshape(math_ops.reduce_sum( grad * math_ops.div(-x, math_ops.square(y)), ry), sy))
def _DivNoNanGrad(op, grad): """DivNoNan op gradient.""" x = op.inputs[0] y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) x = math_ops.conj(x) y = math_ops.conj(y) return (array_ops.reshape( math_ops.reduce_sum(math_ops.div_no_nan(grad, y), rx), sx), array_ops.reshape( math_ops.reduce_sum( grad * math_ops.div_no_nan(math_ops.div_no_nan(-x, y), y), ry), sy))
def _PolygammaGrad(op, grad): """Returns gradient of psi(n, x) with respect to n and x.""" # TODO(tillahoffmann): Add derivative with respect to n n = op.inputs[0] x = op.inputs[1] # Broadcast gradients sn = array_ops.shape(n) sx = array_ops.shape(x) unused_rn, rx = gen_array_ops._broadcast_gradient_args(sn, sx) # Evaluate gradient with ops.control_dependencies([grad.op]): n = math_ops.conj(n) x = math_ops.conj(x) partial_x = math_ops.polygamma(n + 1, x) return (None, array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx))
def _SquareGrad(op, grad): x = op.inputs[0] # Added control dependencies to prevent 2*x from being computed too early. with ops.control_dependencies([grad.op]): if x.dtype.is_complex: x = math_ops.conj(x) return grad * (2.0 * x)
def _ExpGrad(op, grad): """Returns grad * exp(x).""" y = op.outputs[0] # y = e^x with ops.control_dependencies([grad.op]): if y.dtype.is_complex: y = math_ops.conj(y) return grad * y
def _TanhGrad(op, grad): """Returns grad * (1 - tanh(x) * tanh(x)).""" y = op.outputs[0] # y = tanh(x) with ops.control_dependencies([grad.op]): if y.dtype.is_complex: y = math_ops.conj(y) return grad * (1 - math_ops.square(y))
def _SigmoidGrad(op, grad): """Returns grad * sigmoid(x) * (1 - sigmoid(x)).""" y = op.outputs[0] # y = sigmoid(x) with ops.control_dependencies([grad.op]): if y.dtype.is_complex: y = math_ops.conj(y) return grad * (y * (1 - y))
def _CosGrad(op, grad): """Returns grad * -sin(x).""" x = op.inputs[0] with ops.control_dependencies([grad.op]): if x.dtype.is_complex: x = math_ops.conj(x) return -grad * math_ops.sin(x)
def _ErfGrad(op, grad): """Returns grad * 2/sqrt(pi) * exp(-x**2).""" x = op.inputs[0] two_over_root_pi = constant_op.constant(2 / np.sqrt(np.pi), dtype=grad.dtype) with ops.control_dependencies([grad]): x = math_ops.conj(x) return grad * two_over_root_pi * math_ops.exp(-math_ops.square(x))
def _SquareGrad(op, grad): x = op.inputs[0] # Added control dependencies to prevent 2*x from being computed too early. with ops.control_dependencies([grad]): x = math_ops.conj(x) y = constant_op.constant(2.0, dtype=x.dtype) return math_ops.multiply(grad, math_ops.multiply(x, y))
def _SigmoidGrad(op, grad): """Returns grad * sigmoid(x) * (1 - sigmoid(x)).""" y = op.outputs[0] # y = sigmoid(x) with ops.control_dependencies([grad.op]): y = math_ops.conj(y) # pylint: disable=protected-access return gen_math_ops._sigmoid_grad(y, grad)
def _Expm1Grad(op, grad): """Returns grad * exp(x).""" x = op.inputs[0] with ops.control_dependencies([grad]): x = math_ops.conj(x) y = math_ops.exp(x) return grad * y
def _ZetaGrad(op, grad): """Returns gradient of zeta(x, q) with respect to x and q.""" # TODO(tillahoffmann): Add derivative with respect to x x = op.inputs[0] q = op.inputs[1] # Broadcast gradients sx = array_ops.shape(x) sq = array_ops.shape(q) unused_rx, rq = gen_array_ops._broadcast_gradient_args(sx, sq) # Evaluate gradient with ops.control_dependencies([grad.op]): x = math_ops.conj(x) q = math_ops.conj(q) partial_q = -x * math_ops.zeta(x + 1, q) return (None, array_ops.reshape(math_ops.reduce_sum(partial_q * grad, rq), sq))
def _TanhGrad(op, grad): """Returns grad * (1 - tanh(x) * tanh(x)).""" y = op.outputs[0] # y = tanh(x) with ops.control_dependencies([grad.op]): y = math_ops.conj(y) # pylint: disable=protected-access return gen_math_ops._tanh_grad(y, grad)
def test_defining_spd_operator_by_taking_real_part(self): with self.cached_session() as sess: # S is real and positive. s = linear_operator_test_util.random_uniform( shape=(10, 2, 3, 4), dtype=dtypes.float32, minval=1., maxval=2.) # Let S = S1 + S2, the Hermitian and anti-hermitian parts. # S1 = 0.5 * (S + S^H), S2 = 0.5 * (S - S^H), # where ^H is the Hermitian transpose of the function: # f(n0, n1, n2)^H := ComplexConjugate[f(N0-n0, N1-n1, N2-n2)]. # We want to isolate S1, since # S1 is Hermitian by construction # S1 is real since S is # S1 is positive since it is the sum of two positive kernels # IDFT[S] = IDFT[S1] + IDFT[S2] # = H1 + H2 # where H1 is real since it is Hermitian, # and H2 is imaginary since it is anti-Hermitian. ifft_s = fft_ops.ifft3d(math_ops.cast(s, dtypes.complex64)) # Throw away H2, keep H1. real_ifft_s = math_ops.real(ifft_s) # This is the perfect spectrum! # spectrum = DFT[H1] # = S1, fft_real_ifft_s = fft_ops.fft3d( math_ops.cast(real_ifft_s, dtypes.complex64)) # S1 is Hermitian ==> operator is real. # S1 is real ==> operator is self-adjoint. # S1 is positive ==> operator is positive-definite. operator = linalg.LinearOperatorCirculant3D(fft_real_ifft_s) # Allow for complex output so we can check operator has zero imag part. self.assertEqual(operator.dtype, dtypes.complex64) matrix, matrix_t = sess.run([ operator.to_dense(), array_ops.matrix_transpose(operator.to_dense()) ]) operator.assert_positive_definite().run() # Should not fail. np.testing.assert_allclose(0, np.imag(matrix), atol=1e-6) self.assertAllClose(matrix, matrix_t) # Just to test the theory, get S2 as well. # This should create an imaginary operator. # S2 is anti-Hermitian ==> operator is imaginary. # S2 is real ==> operator is self-adjoint. imag_ifft_s = math_ops.imag(ifft_s) fft_imag_ifft_s = fft_ops.fft3d( 1j * math_ops.cast(imag_ifft_s, dtypes.complex64)) operator_imag = linalg.LinearOperatorCirculant3D(fft_imag_ifft_s) matrix, matrix_h = sess.run([ operator_imag.to_dense(), array_ops.matrix_transpose(math_ops.conj(operator_imag.to_dense())) ]) self.assertAllClose(matrix, matrix_h) np.testing.assert_allclose(0, np.real(matrix), atol=1e-7)
def __init__(self, num_rows, multiplier, is_non_singular=None, is_self_adjoint=None, is_positive_definite=None, assert_proper_shapes=False, name="LinearOperatorScaledIdentity"): """Initialize a `LinearOperatorScaledIdentity`. The `LinearOperatorScaledIdentity` is initialized with `num_rows`, which determines the size of each identity matrix, and a `multiplier`, which defines `dtype`, batch shape, and scale of each matrix. This operator is able to broadcast the leading (batch) dimensions. Args: num_rows: Scalar non-negative integer `Tensor`. Number of rows in the corresponding identity matrix. multiplier: `Tensor` of shape `[B1,...,Bb]`, or `[]` (a scalar). is_non_singular: Expect that this operator is non-singular. is_self_adjoint: Expect that this operator is equal to its hermitian transpose. is_positive_definite: Expect that this operator is positive definite. assert_proper_shapes: Python `bool`. If `False`, only perform static checks that initialization and method arguments have proper shape. If `True`, and static checks are inconclusive, add asserts to the graph. name: A name for this `LinearOperator` Raises: ValueError: If `num_rows` is determined statically to be non-scalar, or negative. """ self._assert_proper_shapes = assert_proper_shapes with ops.name_scope(name, values=[multiplier, num_rows]): self._multiplier = ops.convert_to_tensor(multiplier, name="multiplier") super(LinearOperatorScaledIdentity, self).__init__( dtype=self._multiplier.dtype, is_non_singular=is_non_singular, is_self_adjoint=is_self_adjoint, is_positive_definite=is_positive_definite, name=name) # Shape [B1,...Bb, 1, 1] self._multiplier_matrix = array_ops.expand_dims( array_ops.expand_dims(self.multiplier, -1), -1) self._multiplier_matrix_conj = math_ops.conj( self._multiplier_matrix) self._abs_multiplier = math_ops.abs(self.multiplier) self._num_rows = linear_operator_util.shape_tensor( num_rows, name="num_rows") self._num_rows_static = tensor_util.constant_value(self._num_rows) self._check_num_rows_possibly_add_asserts() self._num_rows_cast_to_dtype = math_ops.cast(self._num_rows, self.dtype) self._num_rows_cast_to_real_dtype = math_ops.cast( self._num_rows, self.dtype.real_dtype)
def _TanGrad(op, grad): """Returns grad * 1/sec^2(x).""" x = op.inputs[0] with ops.control_dependencies([grad]): x = math_ops.conj(x) secx = math_ops.reciprocal(math_ops.cos(x)) secx2 = math_ops.square(secx) return grad * secx2
def _InvGrad(op, grad): """Returns -grad * (1 / x^2).""" y = op.outputs[0] # y = 1 / x # Added control dependencies to prevent -x^2 from being computed too early. with ops.control_dependencies([grad.op]): if y.dtype.is_complex: y = math_ops.conj(y) return grad * (- math_ops.square(y))
def runFiniteDifferences(self, shapes, dtypes=(dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.complex64, dtypes_lib.complex128), scalarTest=False): with self.test_session(use_gpu=True): for shape in shapes: for batch in False, True: for dtype in dtypes: if not scalarTest: data = np.random.randn(shape[0], shape[1]) if dtype.is_complex: data = data.astype(np.complex64) data += 1j * np.random.randn(shape[0], shape[1]) x = constant_op.constant(data, dtype) tensor = math_ops.matmul( x, math_ops.conj(array_ops.transpose(x))) / shape[0] else: # This is designed to be a faster test for larger matrices. data = np.random.randn() if dtype.is_complex: data = np.complex64(data) data += 1j * np.random.randn() x = constant_op.constant(data, dtype) R = constant_op.constant( np.random.randn(shape[0], shape[1]), dtype) e = math_ops.multiply(R, x) tensor = math_ops.matmul( e, math_ops.conj(array_ops.transpose(e))) / shape[0] # Inner-most matrices in tensor are positive definite. if batch: tensor = array_ops.tile( array_ops.expand_dims(tensor, 0), [4, 1, 1]) y = linalg_ops.cholesky(tensor) if scalarTest: y = math_ops.reduce_mean(y) error = gradient_checker.compute_gradient_error( x, x._shape_as_list(), y, y._shape_as_list()) tf_logging.info("error = %f", error) if dtype == dtypes_lib.float64: self.assertLess(error, 1e-5) elif dtype == dtypes_lib.complex128: self.assertLess(error, 5e-5) else: self.assertLess(error, 5e-3)
def _compareConj(self, cplx, use_gpu): np_ans = np.conj(cplx) with test_util.device(use_gpu=use_gpu): inx = ops.convert_to_tensor(cplx) tf_conj = math_ops.conj(inx) tf_ans = self.evaluate(tf_conj) self.assertAllEqual(np_ans, tf_ans) self.assertShapeEqual(np_ans, tf_conj)
def _ProdGrad(op, grad): """Gradient for Prod.""" # The gradient can be expressed by dividing the product by each entry of the # input tensor, but this approach can't deal with zeros in the input. # Here, we avoid this problem by composing the output as a product of two # cumprod operations. input_shape = array_ops.shape(op.inputs[0]) # Reshape reduction indices for the case where the parameter is a scalar reduction_indices = array_ops.reshape(op.inputs[1], [-1]) # Expand grad to full input shape output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims) grad = array_ops.reshape(grad, output_shape_kept_dims) grad = array_ops.tile(grad, tile_scaling) # Pack all reduced dimensions into a single one, so we can perform the # cumprod ops. If the reduction dims list is empty, it defaults to float32, # so we need to cast here. We put all the shape-related ops on CPU to avoid # copying back and forth, and since listdiff is CPU only. with ops.device("/cpu:0"): rank = array_ops.rank(op.inputs[0]) reduction_indices = (reduction_indices + rank) % rank reduced = math_ops.cast(reduction_indices, dtypes.int32) idx = math_ops.range(0, rank) other, _ = array_ops.setdiff1d(idx, reduced) perm = array_ops.concat([reduced, other], 0) reduced_num = math_ops.reduce_prod(array_ops.gather(input_shape, reduced)) other_num = math_ops.reduce_prod(array_ops.gather(input_shape, other)) permuted = array_ops.transpose(op.inputs[0], perm) permuted_shape = array_ops.shape(permuted) reshaped = array_ops.reshape(permuted, (reduced_num, other_num)) # Calculate product, leaving out the current entry left = math_ops.cumprod(reshaped, axis=0, exclusive=True) right = math_ops.cumprod(reshaped, axis=0, exclusive=True, reverse=True) # For complex inputs, the gradient is in the conjugate direction. y = array_ops.reshape(math_ops.conj(left) * math_ops.conj(right), permuted_shape) # Invert the transpose and reshape operations. # Make sure to set the statically known shape information through a reshape. out = grad * array_ops.transpose(y, array_ops.invert_permutation(perm)) return array_ops.reshape(out, input_shape), None
def _DigammaGrad(op, grad): """Compute gradient of the digamma function with respect to its argument.""" x = op.inputs[0] with ops.control_dependencies([grad]): x = math_ops.conj(x) return grad * math_ops.polygamma(array_ops.constant(1, dtype=x.dtype), x)
def _eigvals(self): eigvals = self.operator.eigvals() if not self.operator.is_self_adjoint: eigvals = math_ops.conj(eigvals) return eigvals
def _CosGrad(op, grad): """Returns grad * -sin(x).""" x = op.inputs[0] with ops.control_dependencies([grad.op]): x = math_ops.conj(x) return -grad * math_ops.sin(x)
def _TanhGradGrad(op, grad): with ops.control_dependencies([grad]): a = math_ops.conj(op.inputs[0]) b = math_ops.conj(op.inputs[1]) # pylint: disable=protected-access return grad * -2.0 * b * a, gen_math_ops._tanh_grad(a, grad)
def testConjReal(self): for dtype in (dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.float16, dtypes_lib.float32, dtypes_lib.float64): x = array_ops.placeholder(dtype) y = math_ops.conj(x) self.assertEqual(x, y)
def _diag_part(self): normalized_axis = self.reflection_axis / linalg.norm( self.reflection_axis, axis=-1, keepdims=True) return 1. - 2 * normalized_axis * math_ops.conj(normalized_axis)
def _SqrtGradGrad(op, grad): a = op.inputs[0] y = op.outputs[0] # y = 0.5 * b / conj(a) with ops.control_dependencies([grad]): ga = grad / a return -math_ops.conj(ga) * y, 0.5 * ga
def _ExpGrad(op, grad): """Returns grad * exp(x).""" y = op.outputs[0] # y = e^x with ops.control_dependencies([grad]): y = math_ops.conj(y) return grad * y
def auto_correlation(x, axis=-1, max_lags=None, center=True, normalize=True, name="auto_correlation"): """Auto correlation along one axis. Given a `1-D` wide sense stationary (WSS) sequence `X`, the auto correlation `RXX` may be defined as (with `E` expectation and `Conj` complex conjugate) ``` RXX[m] := E{ W[m] Conj(W[0]) } = E{ W[0] Conj(W[-m]) }, W[n] := (X[n] - MU) / S, MU := E{ X[0] }, S**2 := E{ (X[0] - MU) Conj(X[0] - MU) }. ``` This function takes the viewpoint that `x` is (along one axis) a finite sub-sequence of a realization of (WSS) `X`, and then uses `x` to produce an estimate of `RXX[m]` as follows: After extending `x` from length `L` to `inf` by zero padding, the auto correlation estimate `rxx[m]` is computed for `m = 0, 1, ..., max_lags` as ``` rxx[m] := (L - m)**-1 sum_n w[n + m] Conj(w[n]), w[n] := (x[n] - mu) / s, mu := L**-1 sum_n x[n], s**2 := L**-1 sum_n (x[n] - mu) Conj(x[n] - mu) ``` The error in this estimate is proportional to `1 / sqrt(len(x) - m)`, so users often set `max_lags` small enough so that the entire output is meaningful. Note that since `mu` is an imperfect estimate of `E{ X[0] }`, and we divide by `len(x) - m` rather than `len(x) - m - 1`, our estimate of auto correlation contains a slight bias, which goes to zero as `len(x) - m --> infinity`. Args: x: `float32` or `complex64` `Tensor`. axis: Python `int`. The axis number along which to compute correlation. Other dimensions index different batch members. max_lags: Positive `int` tensor. The maximum value of `m` to consider (in equation above). If `max_lags >= x.shape[axis]`, we effectively re-set `max_lags` to `x.shape[axis] - 1`. center: Python `bool`. If `False`, do not subtract the mean estimate `mu` from `x[n]` when forming `w[n]`. normalize: Python `bool`. If `False`, do not divide by the variance estimate `s**2` when forming `w[n]`. name: `String` name to prepend to created ops. Returns: `rxx`: `Tensor` of same `dtype` as `x`. `rxx.shape[i] = x.shape[i]` for `i != axis`, and `rxx.shape[axis] = max_lags + 1`. Raises: TypeError: If `x` is not a supported type. """ # Implementation details: # Extend length N / 2 1-D array x to length N by zero padding onto the end. # Then, set # F[x]_k := sum_n x_n exp{-i 2 pi k n / N }. # It is not hard to see that # F[x]_k Conj(F[x]_k) = F[R]_k, where # R_m := sum_n x_n Conj(x_{(n - m) mod N}). # One can also check that R_m / (N / 2 - m) is an unbiased estimate of RXX[m]. # Since F[x] is the DFT of x, this leads us to a zero-padding and FFT/IFFT # based version of estimating RXX. # Note that this is a special case of the Wiener-Khinchin Theorem. with ops.name_scope(name, values=[x]): x = ops.convert_to_tensor(x, name="x") # Rotate dimensions of x in order to put axis at the rightmost dim. # FFT op requires this. rank = util.prefer_static_rank(x) if axis < 0: axis = rank + axis shift = rank - 1 - axis # Suppose x.shape[axis] = T, so there are T "time" steps. # ==> x_rotated.shape = B + [T], # where B is x_rotated's batch shape. x_rotated = util.rotate_transpose(x, shift) if center: x_rotated -= math_ops.reduce_mean(x_rotated, axis=-1, keepdims=True) # x_len = N / 2 from above explanation. The length of x along axis. # Get a value for x_len that works in all cases. x_len = util.prefer_static_shape(x_rotated)[-1] # TODO(langmore) Investigate whether this zero padding helps or hurts. At # the moment is necessary so that all FFT implementations work. # Zero pad to the next power of 2 greater than 2 * x_len, which equals # 2**(ceil(Log_2(2 * x_len))). Note: Log_2(X) = Log_e(X) / Log_e(2). x_len_float64 = math_ops.cast(x_len, np.float64) target_length = math_ops.pow( np.float64(2.), math_ops.ceil(math_ops.log(x_len_float64 * 2) / np.log(2.))) pad_length = math_ops.cast(target_length - x_len_float64, np.int32) # We should have: # x_rotated_pad.shape = x_rotated.shape[:-1] + [T + pad_length] # = B + [T + pad_length] x_rotated_pad = util.pad(x_rotated, axis=-1, back=True, count=pad_length) dtype = x.dtype if not dtype.is_complex: if not dtype.is_floating: raise TypeError( "Argument x must have either float or complex dtype" " found: {}".format(dtype)) x_rotated_pad = math_ops.complex( x_rotated_pad, dtype.real_dtype.as_numpy_dtype(0.)) # Autocorrelation is IFFT of power-spectral density (up to some scaling). fft_x_rotated_pad = spectral_ops.fft(x_rotated_pad) spectral_density = fft_x_rotated_pad * math_ops.conj(fft_x_rotated_pad) # shifted_product is R[m] from above detailed explanation. # It is the inner product sum_n X[n] * Conj(X[n - m]). shifted_product = spectral_ops.ifft(spectral_density) # Cast back to real-valued if x was real to begin with. shifted_product = math_ops.cast(shifted_product, dtype) # Figure out if we can deduce the final static shape, and set max_lags. # Use x_rotated as a reference, because it has the time dimension in the far # right, and was created before we performed all sorts of crazy shape # manipulations. know_static_shape = True if not x_rotated.shape.is_fully_defined(): know_static_shape = False if max_lags is None: max_lags = x_len - 1 else: max_lags = ops.convert_to_tensor(max_lags, name="max_lags") max_lags_ = tensor_util.constant_value(max_lags) if max_lags_ is None or not know_static_shape: know_static_shape = False max_lags = math_ops.minimum(x_len - 1, max_lags) else: max_lags = min(x_len - 1, max_lags_) # Chop off the padding. # We allow users to provide a huge max_lags, but cut it off here. # shifted_product_chopped.shape = x_rotated.shape[:-1] + [max_lags] shifted_product_chopped = shifted_product[..., :max_lags + 1] # If possible, set shape. if know_static_shape: chopped_shape = x_rotated.shape.as_list() chopped_shape[-1] = min(x_len, max_lags + 1) shifted_product_chopped.set_shape(chopped_shape) # Recall R[m] is a sum of N / 2 - m nonzero terms x[n] Conj(x[n - m]). The # other terms were zeros arising only due to zero padding. # `denominator = (N / 2 - m)` (defined below) is the proper term to # divide by to make this an unbiased estimate of the expectation # E[X[n] Conj(X[n - m])]. x_len = math_ops.cast(x_len, dtype.real_dtype) max_lags = math_ops.cast(max_lags, dtype.real_dtype) denominator = x_len - math_ops.range(0., max_lags + 1.) denominator = math_ops.cast(denominator, dtype) shifted_product_rotated = shifted_product_chopped / denominator if normalize: shifted_product_rotated /= shifted_product_rotated[..., :1] # Transpose dimensions back to those of x. return util.rotate_transpose(shifted_product_rotated, -shift)
def _AcoshGrad(op, grad): """Returns grad * 1/sinh(y).""" y = op.outputs[0] with ops.control_dependencies([grad]): y = math_ops.conj(y) return grad / math_ops.sinh(y)
def _TanhGrad(op, grad): """Returns grad * (1 - tanh(x) * tanh(x)).""" y = op.outputs[0] # y = tanh(x) with ops.control_dependencies([grad]): y = math_ops.conj(y) return gen_math_ops.tanh_grad(y, grad)
def matmul(a, b, transpose_a=False, transpose_b=False, adjoint_a=False, adjoint_b=False, name=None): """Perform a sparse matrix matmul between `a` and `b`. Performs a contraction between `a` and `b` along the two innermost dimensions. If both `a` and `b` are instances of `SparseMatrix`, returns a new instance of `SparseMatrix` (same type as `a`). If one is not an instance of `SparseMatrix`, returns a dense `Tensor`: ``` c = opA(a) . opB(b) ``` where `opA` (resp. `opB`) is the transpose or hermitian transpose depending on the values of `transpose_a` (resp. `transpose_b`) and `adjoint_a` (resp. `adjoint_b`). Args: a: `Tensor` or `SparseMatrix`, having rank `2` or `3`. b: `Tensor` or `SparseMatrix`, having rank `2` or `3`. transpose_a: Python `bool`. transpose_b: Python `bool`. adjoint_a: Python `bool`. adjoint_b: Python `bool`. name: Optional name to use when creating ops. Returns: A `SparseMatrix` if both `a` and `b` are instances of `SparseMatrix`, otherwise a dense `Tensor`. """ if not isinstance(a, SparseMatrix) and not isinstance(b, SparseMatrix): return math_ops.matmul(a, b, transpose_a=transpose_a, transpose_b=transpose_b, adjoint_a=adjoint_a, adjoint_b=adjoint_b, name=name) # pylint: disable=protected-access a_matrix = a._matrix if isinstance(a, SparseMatrix) else a b_matrix = b._matrix if isinstance(b, SparseMatrix) else b with ops.name_scope(name, "SparseMatrixMatMul", [a_matrix, b_matrix]): if isinstance(a, SparseMatrix) and isinstance(b, SparseMatrix): if not (isinstance(a, type(b)) or isinstance(b, type(a))): raise TypeError( "SparseMatrix types don't inherit from each other: " "%s and %s" % (type(a), type(b))) c = sm_ops.sparse_matrix_sparse_mat_mul(a_matrix, b_matrix, transpose_a=transpose_a, transpose_b=transpose_b, adjoint_a=adjoint_a, adjoint_b=adjoint_b, type=a.dtype) # In eager mode, shape inference functions are not called, and the output # shape is not set. We have to infer the output shape here. # TODO(penporn): Set this from the C++ kernel instead. c_handle = matmul_shape_inference(a_matrix, b_matrix, c, transpose_a, transpose_b, adjoint_a, adjoint_b) return a._from_matrix(c, handle_data=c_handle) elif isinstance(a, SparseMatrix): return sm_ops.sparse_matrix_mat_mul(a_matrix, b, transpose_a=transpose_a, transpose_b=transpose_b, adjoint_a=adjoint_a, adjoint_b=adjoint_b) else: # opA(A) . opB(B) = t(nopB(B) . nopA(A)) if not adjoint_a and not adjoint_b: return sm_ops.sparse_matrix_mat_mul( b_matrix, a, transpose_a=not transpose_b, transpose_b=not transpose_a, transpose_output=True) elif not transpose_a and not transpose_b: return sm_ops.sparse_matrix_mat_mul(b_matrix, a, adjoint_a=not adjoint_b, adjoint_b=not adjoint_a, transpose_output=True, conjugate_output=True) else: return sm_ops.sparse_matrix_mat_mul(b_matrix, math_ops.conj(a), transpose_output=True, conjugate_output=adjoint_b)
def _CoshGrad(op, grad): """Returns grad * sinh(x).""" x = op.inputs[0] with ops.control_dependencies([grad]): x = math_ops.conj(x) return grad * math_ops.sinh(x)
def _ConjGrad(_, grad): """Returns the complex conjugate of grad.""" return math_ops.conj(grad)
def testConjString(self): x = array_ops.placeholder(dtypes_lib.string) with self.assertRaisesRegexp(TypeError, r"Expected numeric or variant tensor"): math_ops.conj(x)
def __init__(self, num_rows, multiplier, is_non_singular=None, is_self_adjoint=None, is_positive_definite=None, is_square=True, assert_proper_shapes=False, name="LinearOperatorScaledIdentity"): r"""Initialize a `LinearOperatorScaledIdentity`. The `LinearOperatorScaledIdentity` is initialized with `num_rows`, which determines the size of each identity matrix, and a `multiplier`, which defines `dtype`, batch shape, and scale of each matrix. This operator is able to broadcast the leading (batch) dimensions. Args: num_rows: Scalar non-negative integer `Tensor`. Number of rows in the corresponding identity matrix. multiplier: `Tensor` of shape `[B1,...,Bb]`, or `[]` (a scalar). is_non_singular: Expect that this operator is non-singular. is_self_adjoint: Expect that this operator is equal to its hermitian transpose. is_positive_definite: Expect that this operator is positive definite, meaning the quadratic form `x^H A x` has positive real part for all nonzero `x`. Note that we do not require the operator to be self-adjoint to be positive-definite. See: https://en.wikipedia.org/wiki/Positive-definite_matrix#Extension_for_non-symmetric_matrices is_square: Expect that this operator acts like square [batch] matrices. assert_proper_shapes: Python `bool`. If `False`, only perform static checks that initialization and method arguments have proper shape. If `True`, and static checks are inconclusive, add asserts to the graph. name: A name for this `LinearOperator` Raises: ValueError: If `num_rows` is determined statically to be non-scalar, or negative. """ self._assert_proper_shapes = assert_proper_shapes with ops.name_scope(name, values=[multiplier, num_rows]): self._multiplier = ops.convert_to_tensor(multiplier, name="multiplier") # Check and auto-set hints. if not self._multiplier.dtype.is_complex: if is_self_adjoint is False: # pylint: disable=g-bool-id-comparison raise ValueError("A real diagonal operator is always self adjoint.") else: is_self_adjoint = True if not is_square: raise ValueError("A ScaledIdentity operator is always square.") super(LinearOperatorScaledIdentity, self).__init__( dtype=self._multiplier.dtype, is_non_singular=is_non_singular, is_self_adjoint=is_self_adjoint, is_positive_definite=is_positive_definite, is_square=is_square, name=name) # Shape [B1,...Bb, 1, 1] self._multiplier_matrix = array_ops.expand_dims( array_ops.expand_dims(self.multiplier, -1), -1) self._multiplier_matrix_conj = math_ops.conj(self._multiplier_matrix) self._abs_multiplier = math_ops.abs(self.multiplier) self._num_rows = linear_operator_util.shape_tensor( num_rows, name="num_rows") self._num_rows_static = tensor_util.constant_value(self._num_rows) self._check_num_rows_possibly_add_asserts() self._num_rows_cast_to_dtype = math_ops.cast(self._num_rows, self.dtype) self._num_rows_cast_to_real_dtype = math_ops.cast(self._num_rows, self.dtype.real_dtype)
def _LgammaGrad(op, grad): """Returns grad * digamma(x).""" x = op.inputs[0] with ops.control_dependencies([grad]): x = math_ops.conj(x) return grad * math_ops.digamma(x)
def _solve(self, rhs, adjoint=False, adjoint_arg=False): diag_term = math_ops.conj(self._diag) if adjoint else self._diag rhs = linalg.adjoint(rhs) if adjoint_arg else rhs inv_diag_mat = array_ops.expand_dims(1. / diag_term, -1) return rhs * inv_diag_mat
def _SquareGrad(op, grad): x = op.inputs[0] # Added control dependencies to prevent 2*x from being computed too early. with ops.control_dependencies([grad]): x = math_ops.conj(x) return grad * (2.0 * x)
def _matvec(self, x, adjoint=False): diag_term = math_ops.conj(self._diag) if adjoint else self._diag return diag_term * x
def _SinGrad(op, grad): """Returns grad * cos(x).""" x = op.inputs[0] with ops.control_dependencies([grad.op]): x = math_ops.conj(x) return grad * math_ops.cos(x)
def _matmul(self, x, adjoint=False, adjoint_arg=False): diag_term = math_ops.conj(self._diag) if adjoint else self._diag x = linalg.adjoint(x) if adjoint_arg else x diag_mat = array_ops.expand_dims(diag_term, -1) return diag_mat * x
def _trace(self): if self.is_self_adjoint: return self.operator.trace() return math_ops.conj(self.operator.trace())
def _Grad(op, grad): """A gradient function for RFFT with the provided `rank` and `irfft_fn`.""" fft_length = op.inputs[1] input_shape = array_ops.shape(op.inputs[0]) is_even = math_ops.cast(1 - (fft_length[-1] % 2), dtypes.complex64) def _TileForBroadcasting(matrix, t): expanded = array_ops.reshape( matrix, array_ops.concat([ array_ops.ones([array_ops.rank(t) - 2], dtypes.int32), array_ops.shape(matrix) ], 0)) return array_ops.tile( expanded, array_ops.concat([array_ops.shape(t)[:-2], [1, 1]], 0)) def _MaskMatrix(length): # TODO(rjryan): Speed up computation of twiddle factors using the # following recurrence relation and cache them across invocations of RFFT. # # t_n = exp(sqrt(-1) * pi * n^2 / line_len) # for n = 0, 1,..., line_len-1. # For n > 2, use t_n = t_{n-1}^2 / t_{n-2} * t_1^2 a = array_ops.tile( array_ops.expand_dims(math_ops.range(length), 0), (length, 1)) b = array_ops.transpose(a, [1, 0]) return math_ops.exp(-2j * np.pi * math_ops.cast(a * b, dtypes.complex64) / math_ops.cast(length, dtypes.complex64)) def _YMMask(length): """A sequence of [1+0j, -1+0j, 1+0j, -1+0j, ...] with length `length`.""" return math_ops.cast(1 - 2 * (math_ops.range(length) % 2), dtypes.complex64) y0 = grad[..., 0:1] if rank == 1: ym = grad[..., -1:] extra_terms = y0 + is_even * ym * _YMMask(input_shape[-1]) elif rank == 2: # Create a mask matrix for y0 and ym. base_mask = _MaskMatrix(input_shape[-2]) # Tile base_mask to match y0 in shape so that we can batch-matmul the # inner 2 dimensions. tiled_mask = _TileForBroadcasting(base_mask, y0) y0_term = math_ops.matmul(tiled_mask, math_ops.conj(y0)) extra_terms = y0_term ym = grad[..., -1:] ym_term = math_ops.matmul(tiled_mask, math_ops.conj(ym)) inner_dim = input_shape[-1] ym_term = array_ops.tile( ym_term, array_ops.concat([ array_ops.ones([array_ops.rank(grad) - 1], dtypes.int32), [inner_dim] ], 0)) * _YMMask(inner_dim) extra_terms += is_even * ym_term # The gradient of RFFT is the IRFFT of the incoming gradient times a scaling # factor, plus some additional terms to make up for the components dropped # due to Hermitian symmetry. input_size = math_ops.to_float(_FFTSizeForGrad(op.inputs[0], rank)) irfft = irfft_fn(grad, fft_length) return 0.5 * (irfft * input_size + math_ops.real(extra_terms)), None
def _determinant(self): if self.is_self_adjoint: return self.operator.determinant() return math_ops.conj(self.operator.determinant())
def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None, keep_dims=None): r"""Computes the norm of vectors, matrices, and tensors. This function can compute several different vector norms (the 1-norm, the Euclidean or 2-norm, the inf-norm, and in general the p-norm for p > 0) and matrix norms (Frobenius, 1-norm, 2-norm and inf-norm). Args: tensor: `Tensor` of types `float32`, `float64`, `complex64`, `complex128` ord: Order of the norm. Supported values are 'fro', 'euclidean', `1`, `2`, `np.inf` and any positive real number yielding the corresponding p-norm. Default is 'euclidean' which is equivalent to Frobenius norm if `tensor` is a matrix and equivalent to 2-norm for vectors. Some restrictions apply: a) The Frobenius norm `fro` is not defined for vectors, b) If axis is a 2-tuple (matrix norm), only 'euclidean', 'fro', `1`, `2`, `np.inf` are supported. See the description of `axis` on how to compute norms for a batch of vectors or matrices stored in a tensor. axis: If `axis` is `None` (the default), the input is considered a vector and a single vector norm is computed over the entire set of values in the tensor, i.e. `norm(tensor, ord=ord)` is equivalent to `norm(reshape(tensor, [-1]), ord=ord)`. If `axis` is a Python integer, the input is considered a batch of vectors, and `axis` determines the axis in `tensor` over which to compute vector norms. If `axis` is a 2-tuple of Python integers it is considered a batch of matrices and `axis` determines the axes in `tensor` over which to compute a matrix norm. Negative indices are supported. Example: If you are passing a tensor that can be either a matrix or a batch of matrices at runtime, pass `axis=[-2,-1]` instead of `axis=None` to make sure that matrix norms are computed. keepdims: If True, the axis indicated in `axis` are kept with size 1. Otherwise, the dimensions in `axis` are removed from the output shape. name: The name of the op. keep_dims: Deprecated alias for `keepdims`. Returns: output: A `Tensor` of the same type as tensor, containing the vector or matrix norms. If `keepdims` is True then the rank of output is equal to the rank of `tensor`. Otherwise, if `axis` is none the output is a scalar, if `axis` is an integer, the rank of `output` is one less than the rank of `tensor`, if `axis` is a 2-tuple the rank of `output` is two less than the rank of `tensor`. Raises: ValueError: If `ord` or `axis` is invalid. @compatibility(numpy) Mostly equivalent to numpy.linalg.norm. Not supported: ord <= 0, 2-norm for matrices, nuclear norm. Other differences: a) If axis is `None`, treats the flattened `tensor` as a vector regardless of rank. b) Explicitly supports 'euclidean' norm as the default, including for higher order tensors. @end_compatibility """ keepdims = deprecation.deprecated_argument_lookup('keepdims', keepdims, 'keep_dims', keep_dims) if keepdims is None: keepdims = False is_matrix_norm = ((isinstance(axis, tuple) or isinstance(axis, list)) and len(axis) == 2) if is_matrix_norm: axis = tuple(axis) if (not isinstance(axis[0], int) or not isinstance(axis[1], int) or axis[0] == axis[1]): raise ValueError( "'axis' must be None, an integer, or a tuple of 2 unique integers" ) supported_matrix_norms = ['euclidean', 'fro', 1, 2, np.inf] if ord not in supported_matrix_norms: raise ValueError( "'ord' must be a supported matrix norm in %s, got %s" % (supported_matrix_norms, ord)) else: if not (isinstance(axis, int) or axis is None): raise ValueError( "'axis' must be None, an integer, or a tuple of 2 unique integers" ) supported_vector_norms = ['euclidean', 1, 2, np.inf] if (not np.isreal(ord) or ord <= 0) and ord not in supported_vector_norms: raise ValueError("'ord' must be a supported vector norm, got %s" % ord) if axis is not None: axis = (axis, ) with ops.name_scope(name, 'norm', [tensor]): tensor = ops.convert_to_tensor(tensor) if ord in ['fro', 'euclidean', 2, 2.0]: if is_matrix_norm and ord in [2, 2.0]: rank = array_ops.rank(tensor) positive_axis = map_fn.map_fn( lambda i: control_flow_ops.cond(i >= 0, lambda: i, lambda: i + rank), ops.convert_to_tensor(axis)) axes = math_ops.range(rank) perm_before = array_ops.concat([ array_ops.setdiff1d(axes, positive_axis)[0], positive_axis ], axis=0) perm_after = map_fn.map_fn( lambda i: math_ops.cast(array_ops.squeeze( array_ops.where(math_ops.equal(perm_before, i))), dtype=dtypes.int32), axes) permed = array_ops.transpose(tensor, perm=perm_before) matrix_2_norm = array_ops.expand_dims(math_ops.reduce_max( math_ops.abs( gen_linalg_ops.svd(permed, compute_uv=False)[0]), axis=-1, keepdims=True), axis=-1) result = array_ops.transpose(matrix_2_norm, perm=perm_after) else: result = math_ops.sqrt( math_ops.reduce_sum(tensor * math_ops.conj(tensor), axis, keepdims=True)) else: result = math_ops.abs(tensor) if ord == 1: sum_axis = None if axis is None else axis[0] result = math_ops.reduce_sum(result, sum_axis, keepdims=True) if is_matrix_norm: result = math_ops.reduce_max(result, axis[-1], keepdims=True) elif ord == np.inf: if is_matrix_norm: result = math_ops.reduce_sum(result, axis[1], keepdims=True) max_axis = None if axis is None else axis[0] result = math_ops.reduce_max(result, max_axis, keepdims=True) else: # General p-norms (positive p only) result = math_ops.pow( math_ops.reduce_sum(math_ops.pow(result, ord), axis, keepdims=True), 1.0 / ord) if not keepdims: result = array_ops.squeeze(result, axis) return result
def conj(self): return self._from_matrix(math_ops.conj(self._matrix), self.eager_handle_data)
def _diag_part(self): reflection_axis = ops.convert_to_tensor_v2_with_dispatch( self.reflection_axis) normalized_axis = reflection_axis / linalg.norm( reflection_axis, axis=-1, keepdims=True) return 1. - 2 * normalized_axis * math_ops.conj(normalized_axis)
def _SigmoidGradGrad(op, grad): with ops.control_dependencies([grad]): a = math_ops.conj(op.inputs[0]) b = math_ops.conj(op.inputs[1]) gb = grad * b return gb - 2.0 * gb * a, gen_math_ops.sigmoid_grad(a, grad)
def _Log1pGrad(op, grad): """Returns grad * (1/(1 + x)).""" x = op.inputs[0] with ops.control_dependencies([grad]): x = math_ops.conj(x) return grad * math_ops.reciprocal(1 + x)