def testRandomGammaGradMediumValues(self, dtype, rtol, atol): self.maybe_skip_test(dtype) rtol, atol = self.adjust_tolerance_for_tpu(dtype, rtol, atol) with self.session() as sess: with self.test_scope(): x = constant_op.constant( np.random.uniform(low=1., high=10., size=[NUM_SAMPLES]).astype(dtype)) a = constant_op.constant( np.random.uniform(low=1., high=10., size=[NUM_SAMPLES]).astype(dtype)) gamma_sample_grad = gen_random_ops.random_gamma_grad(a, x) actual_grad = implicit_reparameterization_grad(a, x) gamma_sample_grad, actual_grad = sess.run( [gamma_sample_grad, actual_grad]) # We do this because the ratio computed in # implicit_reparameterization_grad can very easily result in a NaN due # to the computed numerator and denominator zeroing out. gamma_sample_grad = gamma_sample_grad[~np.logical_or( np.isnan(actual_grad), np.isinf(actual_grad))] actual_grad = actual_grad[~np.logical_or( np.isnan(actual_grad), np.isinf(actual_grad))] self.assertAllClose(actual_grad, gamma_sample_grad, atol=atol, rtol=rtol)
def _RandomGammaGrad(op, grad): # pylint: disable=invalid-name """Returns the gradient of a Gamma sample w.r.t. alpha. The gradient is computed using implicit differentiation, see "Implicit Reparameterization Gradients" (https://arxiv.org/abs/1805.08498). Args: op: A `RandomGamma` operation. We assume that the inputs to the operation are `shape` and `alpha` tensors, and the output is the `sample` tensor. grad: The incoming gradient `dloss / dsample` of the same shape as `op.outputs[0]`. Returns: A `Tensor` with derivatives `dloss / dalpha` """ shape = op.inputs[0] alpha = op.inputs[1] sample = op.outputs[0] with ops.control_dependencies([grad]): # Make the parameters alpha broadcastable with samples by appending # unit dimensions. num_sample_dimensions = array_ops.shape(shape)[0] alpha_broadcastable = add_leading_unit_dimensions( alpha, num_sample_dimensions) partial_a = gen_random_ops.random_gamma_grad(alpha_broadcastable, sample) # The first input is shape; the second input is alpha. return (None, math_ops.reduce_sum( grad * partial_a, axis=math_ops.range(num_sample_dimensions)))
def _RandomGammaGrad(op, grad): # pylint: disable=invalid-name """Returns the gradient of a Gamma sample w.r.t. alpha. The gradient is computed using implicit differentiation, see "Implicit Reparameterization Gradients" (https://arxiv.org/abs/1805.08498). Args: op: A `RandomGamma` operation. We assume that the inputs to the operation are `shape` and `alpha` tensors, and the output is the `sample` tensor. grad: The incoming gradient `dloss / dsample` of the same shape as `op.outputs[0]`. Returns: A `Tensor` with derivatives `dloss / dalpha` """ shape = op.inputs[0] alpha = op.inputs[1] sample = op.outputs[0] with ops.control_dependencies([grad]): # Make the parameters alpha broadcastable with samples by appending # unit dimensions. num_sample_dimensions = array_ops.shape(shape)[0] alpha_broadcastable = add_leading_unit_dimensions( alpha, num_sample_dimensions) partial_a = gen_random_ops.random_gamma_grad(alpha_broadcastable, sample) # The first input is shape; the second input is alpha. return (None, math_ops.reduce_sum( grad * partial_a, axis=math_ops.range(num_sample_dimensions)))
def _StatelessRandomGammaV2Grad(op, grad): # pylint: disable=invalid-name """Returns the gradient of a Gamma sample w.r.t. alpha. The gradient is computed using implicit differentiation (Figurnov et al., 2018). Args: op: A `StatelessRandomGamma` operation. We assume that the inputs to the operation are `shape`, `seed` and `alpha` tensors, and the output is the `sample` tensor. grad: The incoming gradient `dloss / dsample` of the same shape as `op.outputs[0]`. Returns: A `Tensor` with derivatives `dloss / dalpha`. References: Implicit Reparameterization Gradients: [Figurnov et al., 2018] (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients) ([pdf] (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients.pdf)) """ shape = op.inputs[0] alpha = op.inputs[2] sample = op.outputs[0] with ops.control_dependencies([grad]): # Note that the shape handling is slightly different for stateless_gamma, # in particular num_sample_dimensions is different. num_sample_dimensions = array_ops.shape(shape)[0] - array_ops.rank( alpha) # Make the parameters alpha broadcastable with samples by appending # unit dimensions. alpha_broadcastable = add_leading_unit_dimensions( alpha, num_sample_dimensions) partial_a = gen_random_ops.random_gamma_grad(alpha_broadcastable, sample) # The first two inputs are shape, seed, third input is alpha. return (None, None, math_ops.reduce_sum( grad * partial_a, axis=math_ops.range(num_sample_dimensions)))