def _SubGrad(op, grad): x = op.inputs[0] y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) return (array_ops.reshape(math_ops.reduce_sum(grad, rx), sx), array_ops.reshape(-math_ops.reduce_sum(grad, ry), sy))
def _BetaincGrad(op, grad): """Returns gradient of betainc(a, b, x) with respect to x.""" # TODO(ebrevdo): Perhaps add the derivative w.r.t. a, b a, b, x = op.inputs # two cases: x is a scalar and a/b are same-shaped tensors, or vice # versa; so its sufficient to check against shape(a). sa = array_ops.shape(a) sx = array_ops.shape(x) # pylint: disable=protected-access _, rx = gen_array_ops._broadcast_gradient_args(sa, sx) # pylint: enable=protected-access # Perform operations in log space before summing, because terms # can grow large. log_beta = ( gen_math_ops.lgamma(a) + gen_math_ops.lgamma(b) - gen_math_ops.lgamma(a + b)) partial_x = math_ops.exp((b - 1) * math_ops.log(1 - x) + (a - 1) * math_ops.log(x) - log_beta) # TODO(b/36815900): Mark None return values as NotImplemented return ( None, # da None, # db array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx))
def _reduce_and_reshape_grad(g, t): """Returns the gradient, sum-reduced and reshaped to `t`'s shape.""" shape = array_ops.shape(t) g_shape = array_ops.shape(g) # pylint: disable=protected-access bcast_dims, _ = gen_array_ops._broadcast_gradient_args(shape, g_shape) # pylint: enable=protected-access return array_ops.reshape(math_ops.reduce_sum(g, bcast_dims), shape)
def _DivGrad(op, grad): x = op.inputs[0] y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) # pylint: disable=protected-access return (array_ops.reshape(math_ops.reduce_sum(grad / y, rx), sx), array_ops.reshape(math_ops.reduce_sum(grad * (-x / math_ops.square(y)), ry), sy))
def _ComplexGrad(op, grad): """Returns the real and imaginary components of 'grad', respectively.""" x = op.inputs[0] y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) return (array_ops.reshape(math_ops.reduce_sum(math_ops.real(grad), rx), sx), array_ops.reshape(math_ops.reduce_sum(math_ops.imag(grad), ry), sy))
def _PowGrad(op, grad): """Returns grad * (y*x^(y-1), z*log(x)).""" x = op.inputs[0] y = op.inputs[1] z = op.outputs[0] sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) gx = array_ops.reshape(math_ops.reduce_sum(grad * y * math_ops.pow(x, y - 1), rx), sx) gy = array_ops.reshape(math_ops.reduce_sum(grad * z * math_ops.log(x), ry), sy) return gx, gy
def _MulGrad(op, grad): """The gradient of scalar multiplication.""" x = op.inputs[0] y = op.inputs[1] assert x.dtype.base_dtype == y.dtype.base_dtype, (x.dtype, " vs. ", y.dtype) sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) x = math_ops.conj(x) y = math_ops.conj(y) return (array_ops.reshape(math_ops.reduce_sum(grad * y, rx), sx), array_ops.reshape(math_ops.reduce_sum(x * grad, ry), sy))
def _MulGrad(op, grad): x = op.inputs[0] y = op.inputs[1] assert x.dtype.base_dtype == y.dtype.base_dtype, (x.dtype, " vs. ", y.dtype) sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) if x.dtype.base_dtype == dtypes.complex64: return (array_ops.reshape(math_ops.reduce_sum(grad * math_ops.conj(y), rx), sx), array_ops.reshape(math_ops.reduce_sum(math_ops.conj(x) * grad, ry), sy)) else: return (array_ops.reshape(math_ops.reduce_sum(grad * y, rx), sx), array_ops.reshape(math_ops.reduce_sum(x * grad, ry), sy))
def _PowGrad(op, grad): """Returns grad * (y*x^(y-1), z*log(x)).""" x = op.inputs[0] y = op.inputs[1] z = op.outputs[0] sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) gx = array_ops.reshape(math_ops.reduce_sum(grad * y * math_ops.pow(x, y - 1), rx), sx) # Avoid false singularity at x = 0 log_x = math_ops.select(x > 0, math_ops.log(x), array_ops.zeros_like(x)) gy = array_ops.reshape(math_ops.reduce_sum(grad * z * log_x, ry), sy) return gx, gy
def _PolygammaGrad(op, grad): """Returns gradient of psi(n, x) with respect to n and x.""" # TODO(tillahoffmann): Add derivative with respect to n n = op.inputs[0] x = op.inputs[1] # Broadcast gradients sn = array_ops.shape(n) sx = array_ops.shape(x) unused_rn, rx = gen_array_ops._broadcast_gradient_args(sn, sx) # Evaluate gradient with ops.control_dependencies([grad.op]): partial_x = math_ops.polygamma(n + 1, x) return (None, array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx))
def _ZetaGrad(op, grad): """Returns gradient of zeta(x, q) with respect to x and q.""" # TODO(tillahoffmann): Add derivative with respect to x x = op.inputs[0] q = op.inputs[1] # Broadcast gradients sx = array_ops.shape(x) sq = array_ops.shape(q) unused_rx, rq = gen_array_ops._broadcast_gradient_args(sx, sq) # Evaluate gradient with ops.control_dependencies([grad.op]): partial_q = -x * math_ops.zeta(x + 1, q) return (None, array_ops.reshape(math_ops.reduce_sum(partial_q * grad, rq), sq))
def _IgammaGrad(op, grad): """Returns gradient of igamma(a, x) with respect to a and x.""" # TODO(ebrevdo): Perhaps add the derivative w.r.t. a a = op.inputs[0] x = op.inputs[1] sa = array_ops.shape(a) sx = array_ops.shape(x) unused_ra, rx = gen_array_ops._broadcast_gradient_args(sa, sx) # Perform operations in log space before summing, because Gamma(a) # and Gamma'(a) can grow large. partial_x = math_ops.exp(-x + (a - 1) * math_ops.log(x) - math_ops.lgamma(a)) return (None, array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx))
def _SubGrad(op, grad): """Gradient for Sub.""" x = op.inputs[0] y = op.inputs[1] if (isinstance(grad, ops.Tensor) and _ShapesFullySpecifiedAndEqual(x, y, grad)): return grad, -grad sx = array_ops.shape(x) sy = array_ops.shape(y) # pylint: disable=protected-access rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) # pylint: enable=protected-access return (array_ops.reshape(math_ops.reduce_sum(grad, rx), sx), array_ops.reshape(-math_ops.reduce_sum(grad, ry), sy))
def _DivGrad(op, grad): """The gradient for the Div operator.""" x = op.inputs[0] y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) # pylint: disable=protected-access rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) # pylint: enable=protected-access x = math_ops.conj(x) y = math_ops.conj(y) return (array_ops.reshape(math_ops.reduce_sum(math_ops.div(grad, y), rx), sx), array_ops.reshape(math_ops.reduce_sum( grad * math_ops.div(-x, math_ops.square(y)), ry), sy))
def _FloorModGrad(op, grad): """Returns grad * (1, -floor(x/y)).""" x = math_ops.conj(op.inputs[0]) y = math_ops.conj(op.inputs[1]) sx = array_ops.shape(x) sy = array_ops.shape(y) # pylint: disable=protected-access rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) # pylint: enable=protected-access floor_xy = math_ops.floor_div(x, y) gx = array_ops.reshape(math_ops.reduce_sum(grad, rx), sx) gy = array_ops.reshape( math_ops.reduce_sum(grad * math_ops.negative(floor_xy), ry), sy) return gx, gy
def _SquaredDifferenceGrad(op, grad): """Returns the gradient for (x-y)^2.""" x = op.inputs[0] y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) # pylint: disable=protected-access rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) # pylint: enable=protected-access with ops.control_dependencies([grad]): # The parens ensure that if grad is IndexedSlices, it'll get multiplied by # Tensor (not a number like 2.0) which causes it to convert to Tensor. x_grad = math_ops.scalar_mul(2.0, grad) * (x - y) return (array_ops.reshape(math_ops.reduce_sum(x_grad, rx), sx), -array_ops.reshape(math_ops.reduce_sum(x_grad, ry), sy))
def _MaximumMinimumGrad(op, grad, selector_op): """Factor out the code for the gradient of Maximum or Minimum.""" x = op.inputs[0] y = op.inputs[1] gdtype = grad.dtype sx = array_ops.shape(x) sy = array_ops.shape(y) gradshape = array_ops.shape(grad) zeros = array_ops.zeros(gradshape, gdtype) xmask = selector_op(x, y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) xgrad = array_ops.where(xmask, grad, zeros) ygrad = array_ops.where(xmask, zeros, grad) gx = array_ops.reshape(math_ops.reduce_sum(xgrad, rx), sx) gy = array_ops.reshape(math_ops.reduce_sum(ygrad, ry), sy) return (gx, gy)
def _IgammaGrad(op, grad): """Returns gradient of igamma(a, x) with respect to x.""" # TODO(ebrevdo): Perhaps add the derivative w.r.t. a a = op.inputs[0] x = op.inputs[1] sa = array_ops.shape(a) sx = array_ops.shape(x) # pylint: disable=protected-access unused_ra, rx = gen_array_ops._broadcast_gradient_args(sa, sx) # pylint: enable=protected-access # Perform operations in log space before summing, because Gamma(a) # and Gamma'(a) can grow large. partial_x = math_ops.exp(-x + (a - 1) * math_ops.log(x) - math_ops.lgamma(a)) # TODO(b/36815900): Mark None return values as NotImplemented return (None, array_ops.reshape( math_ops.reduce_sum(partial_x * grad, rx), sx))
def _PowGrad(op, grad): """Returns grad * (y*x^(y-1), z*log(x)).""" x = op.inputs[0] y = op.inputs[1] z = op.outputs[0] sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) gx = array_ops.reshape(math_ops.reduce_sum(grad * y * math_ops.pow(x, y - 1), rx), sx) # Avoid false singularity at x = 0 if x.dtype.is_complex: # real(x) < 0 is fine for the complex case log_x = math_ops.select(math_ops.not_equal(x, 0), math_ops.log(x), array_ops.zeros_like(x)) else: # There's no sensible real value to return if x < 0, so return 0 log_x = math_ops.select(x > 0, math_ops.log(x), array_ops.zeros_like(x)) gy = array_ops.reshape(math_ops.reduce_sum(grad * z * log_x, ry), sy) return gx, gy
def _MulGrad(op, grad): """The gradient of scalar multiplication.""" x = op.inputs[0] y = op.inputs[1] # pylint: disable=protected-access if (isinstance(grad, ops.Tensor) and _ShapesFullySpecifiedAndEqual(x, y, grad) and grad.dtype in (dtypes.int32, dtypes.float32)): return gen_math_ops._mul(grad, y), gen_math_ops._mul(grad, x) assert x.dtype.base_dtype == y.dtype.base_dtype, (x.dtype, " vs. ", y.dtype) sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) # pylint: enable=protected-access x = math_ops.conj(x) y = math_ops.conj(y) return (array_ops.reshape(math_ops.reduce_sum(grad * y, rx), sx), array_ops.reshape(math_ops.reduce_sum(x * grad, ry), sy))
def _ZetaGrad(op, grad): """Returns gradient of zeta(x, q) with respect to x and q.""" # TODO(tillahoffmann): Add derivative with respect to x x = op.inputs[0] q = op.inputs[1] # Broadcast gradients sx = array_ops.shape(x) sq = array_ops.shape(q) unused_rx, rq = gen_array_ops._broadcast_gradient_args(sx, sq) # Evaluate gradient with ops.control_dependencies([grad]): x = math_ops.conj(x) q = math_ops.conj(q) partial_q = -x * math_ops.zeta(x + 1, q) # TODO(b/36815900): Mark None return values as NotImplemented return (None, array_ops.reshape(math_ops.reduce_sum(partial_q * grad, rq), sq))
def _PolygammaGrad(op, grad): """Returns gradient of psi(n, x) with respect to n and x.""" # TODO(tillahoffmann): Add derivative with respect to n n = op.inputs[0] x = op.inputs[1] # Broadcast gradients sn = array_ops.shape(n) sx = array_ops.shape(x) unused_rn, rx = gen_array_ops._broadcast_gradient_args(sn, sx) # Evaluate gradient with ops.control_dependencies([grad]): n = math_ops.conj(n) x = math_ops.conj(x) partial_x = math_ops.polygamma(n + 1, x) # TODO(b/36815900): Mark None return values as NotImplemented return (None, array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx))
def _IgammaGrad(op, grad): """Returns gradient of igamma(a, x) with respect to x.""" # TODO(ebrevdo): Perhaps add the derivative w.r.t. a a = op.inputs[0] x = op.inputs[1] sa = array_ops.shape(a) sx = array_ops.shape(x) # pylint: disable=protected-access unused_ra, rx = gen_array_ops._broadcast_gradient_args(sa, sx) # pylint: enable=protected-access # Perform operations in log space before summing, because Gamma(a) # and Gamma'(a) can grow large. partial_x = math_ops.exp(-x + (a - 1) * math_ops.log(x) - math_ops.lgamma(a)) # TODO(b/36815900): Mark None return values as NotImplemented return (None, array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx))
def _PolygammaGrad(op, grad): """Returns gradient of psi(n, x) with respect to n and x.""" # TODO(tillahoffmann): Add derivative with respect to n n = op.inputs[0] x = op.inputs[1] # Broadcast gradients sn = array_ops.shape(n) sx = array_ops.shape(x) # pylint: disable=protected-access unused_rn, rx = gen_array_ops._broadcast_gradient_args(sn, sx) # pylint: enable=protected-access # Evaluate gradient with ops.control_dependencies([grad]): n = math_ops.conj(n) x = math_ops.conj(x) partial_x = math_ops.polygamma(n + 1, x) # TODO(b/36815900): Mark None return values as NotImplemented return (None, array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx))
def _ZetaGrad(op, grad): """Returns gradient of zeta(x, q) with respect to x and q.""" # TODO(tillahoffmann): Add derivative with respect to x x = op.inputs[0] q = op.inputs[1] # Broadcast gradients sx = array_ops.shape(x) sq = array_ops.shape(q) # pylint: disable=protected-access unused_rx, rq = gen_array_ops._broadcast_gradient_args(sx, sq) # pylint: enable=protected-access # Evaluate gradient with ops.control_dependencies([grad]): x = math_ops.conj(x) q = math_ops.conj(q) partial_q = -x * math_ops.zeta(x + 1, q) # TODO(b/36815900): Mark None return values as NotImplemented return (None, array_ops.reshape(math_ops.reduce_sum(partial_q * grad, rq), sq))
def _PowGrad(op, grad): """Returns grad * (y*x^(y-1), z*log(x)).""" x = op.inputs[0] y = op.inputs[1] z = op.outputs[0] sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) gx = array_ops.reshape( math_ops.reduce_sum(grad * y * math_ops.pow(x, y - 1), rx), sx) # Avoid false singularity at x = 0 if x.dtype.is_complex: # real(x) < 0 is fine for the complex case log_x = math_ops.select(math_ops.not_equal(x, 0), math_ops.log(x), array_ops.zeros_like(x)) else: # There's no sensible real value to return if x < 0, so return 0 log_x = math_ops.select(x > 0, math_ops.log(x), array_ops.zeros_like(x)) gy = array_ops.reshape(math_ops.reduce_sum(grad * z * log_x, ry), sy) return gx, gy
def _BetaincGrad(op, grad): """Returns gradient of betainc(a, b, x) with respect to x.""" # TODO(ebrevdo): Perhaps add the derivative w.r.t. a, b a, b, x = op.inputs # two cases: x is a scalar and a/b are same-shaped tensors, or vice # versa; so its sufficient to check against shape(a). sa = array_ops.shape(a) sx = array_ops.shape(x) # pylint: disable=protected-access _, rx = gen_array_ops._broadcast_gradient_args(sa, sx) # pylint: enable=protected-access # Perform operations in log space before summing, because terms # can grow large. log_beta = (gen_math_ops.lgamma(a) + gen_math_ops.lgamma(b) - gen_math_ops.lgamma(a + b)) partial_x = math_ops.exp( (b - 1) * math_ops.log(1 - x) + (a - 1) * math_ops.log(x) - log_beta) # TODO(b/36815900): Mark None return values as NotImplemented return (None, # da None, # db array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx))
def _GetGradientArgs(self, xs, ys): with self.test_session() as sess: return sess.run(_broadcast_gradient_args(xs, ys))