Exemple #1
0
    def testRandomGammaGradMediumValues(self, dtype, rtol, atol):
        self.maybe_skip_test(dtype)
        rtol, atol = self.adjust_tolerance_for_tpu(dtype, rtol, atol)

        with self.session() as sess:
            with self.test_scope():
                x = constant_op.constant(
                    np.random.uniform(low=1., high=10.,
                                      size=[NUM_SAMPLES]).astype(dtype))
                a = constant_op.constant(
                    np.random.uniform(low=1., high=10.,
                                      size=[NUM_SAMPLES]).astype(dtype))
                gamma_sample_grad = gen_random_ops.random_gamma_grad(a, x)
                actual_grad = implicit_reparameterization_grad(a, x)
                gamma_sample_grad, actual_grad = sess.run(
                    [gamma_sample_grad, actual_grad])
                # We do this because the ratio computed in
                # implicit_reparameterization_grad can very easily result in a NaN due
                # to the computed numerator and denominator zeroing out.
                gamma_sample_grad = gamma_sample_grad[~np.logical_or(
                    np.isnan(actual_grad), np.isinf(actual_grad))]
                actual_grad = actual_grad[~np.logical_or(
                    np.isnan(actual_grad), np.isinf(actual_grad))]
        self.assertAllClose(actual_grad,
                            gamma_sample_grad,
                            atol=atol,
                            rtol=rtol)
Exemple #2
0
def _RandomGammaGrad(op, grad):  # pylint: disable=invalid-name
  """Returns the gradient of a Gamma sample w.r.t. alpha.

  The gradient is computed using implicit differentiation, see
  "Implicit Reparameterization Gradients" (https://arxiv.org/abs/1805.08498).

  Args:
    op: A `RandomGamma` operation. We assume that the inputs to the operation
      are `shape` and `alpha` tensors, and the output is the `sample` tensor.
    grad: The incoming gradient `dloss / dsample` of the same shape as
      `op.outputs[0]`.

  Returns:
    A `Tensor` with derivatives `dloss / dalpha`
  """
  shape = op.inputs[0]
  alpha = op.inputs[1]
  sample = op.outputs[0]

  with ops.control_dependencies([grad]):
    # Make the parameters alpha broadcastable with samples by appending
    # unit dimensions.
    num_sample_dimensions = array_ops.shape(shape)[0]
    alpha_broadcastable = add_leading_unit_dimensions(
        alpha, num_sample_dimensions)
    partial_a = gen_random_ops.random_gamma_grad(alpha_broadcastable, sample)

    # The first input is shape; the second input is alpha.
    return (None, math_ops.reduce_sum(
        grad * partial_a, axis=math_ops.range(num_sample_dimensions)))
Exemple #3
0
def _RandomGammaGrad(op, grad):  # pylint: disable=invalid-name
    """Returns the gradient of a Gamma sample w.r.t. alpha.

  The gradient is computed using implicit differentiation, see
  "Implicit Reparameterization Gradients" (https://arxiv.org/abs/1805.08498).

  Args:
    op: A `RandomGamma` operation. We assume that the inputs to the operation
      are `shape` and `alpha` tensors, and the output is the `sample` tensor.
    grad: The incoming gradient `dloss / dsample` of the same shape as
      `op.outputs[0]`.

  Returns:
    A `Tensor` with derivatives `dloss / dalpha`
  """
    shape = op.inputs[0]
    alpha = op.inputs[1]
    sample = op.outputs[0]

    with ops.control_dependencies([grad]):
        # Make the parameters alpha broadcastable with samples by appending
        # unit dimensions.
        num_sample_dimensions = array_ops.shape(shape)[0]
        alpha_broadcastable = add_leading_unit_dimensions(
            alpha, num_sample_dimensions)
        partial_a = gen_random_ops.random_gamma_grad(alpha_broadcastable,
                                                     sample)

        # The first input is shape; the second input is alpha.
        return (None,
                math_ops.reduce_sum(
                    grad * partial_a,
                    axis=math_ops.range(num_sample_dimensions)))
Exemple #4
0
def _StatelessRandomGammaV2Grad(op, grad):  # pylint: disable=invalid-name
    """Returns the gradient of a Gamma sample w.r.t. alpha.

  The gradient is computed using implicit differentiation
  (Figurnov et al., 2018).

  Args:
    op: A `StatelessRandomGamma` operation. We assume that the inputs to the
      operation are `shape`, `seed` and `alpha` tensors, and the output is the
      `sample` tensor.
    grad: The incoming gradient `dloss / dsample` of the same shape as
      `op.outputs[0]`.

  Returns:
    A `Tensor` with derivatives `dloss / dalpha`.

  References:
    Implicit Reparameterization Gradients:
      [Figurnov et al., 2018]
      (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients)
      ([pdf]
      (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients.pdf))
  """
    shape = op.inputs[0]
    alpha = op.inputs[2]
    sample = op.outputs[0]

    with ops.control_dependencies([grad]):
        # Note that the shape handling is slightly different for stateless_gamma,
        # in particular num_sample_dimensions is different.
        num_sample_dimensions = array_ops.shape(shape)[0] - array_ops.rank(
            alpha)
        # Make the parameters alpha broadcastable with samples by appending
        # unit dimensions.
        alpha_broadcastable = add_leading_unit_dimensions(
            alpha, num_sample_dimensions)
        partial_a = gen_random_ops.random_gamma_grad(alpha_broadcastable,
                                                     sample)

        # The first two inputs are shape, seed, third input is alpha.
        return (None, None,
                math_ops.reduce_sum(
                    grad * partial_a,
                    axis=math_ops.range(num_sample_dimensions)))