コード例 #1
0
 def testApproximateLossIsAccurate(self, float_dtype):
   # Check that the approximate loss (lossfun() with epsilon=1e-6) reasonably
   # approximates the true loss (lossfun() with epsilon=0.) for a range of
   # values of alpha (skipping alpha=0, where the approximation is poor).
   x = np.arange(-10, 10, 0.1, float_dtype)
   scale = float_dtype(1.7)
   for alpha in [-4, -2, -0.2, -0.01, 0.01, 0.2, 1, 1.99, 2, 2.01, 4]:
     alpha = float_dtype(alpha)
     loss = general.lossfun(x, alpha, scale)
     loss_approx = general.lossfun(x, alpha, scale, approximate=True)
     self.assertAllClose(
         loss, loss_approx, rtol=1e-5, atol=1e-4, msg='alpha=%g' % (alpha))
コード例 #2
0
    def testGradientMatchesFiniteDifferences(self, float_dtype):
        # Test that the loss and its approximation both return gradients that are
        # close to the numerical gradient from finite differences, with forward
        # differencing. Returning correct gradients is TensorFlow's job, so this is
        # just an aggressive sanity check in case some implementation detail causes
        # gradients to incorrectly go to zero due to quantization or stop_gradients
        # in some op that is used by the loss.
        for approximate in [False, True]:
            num_samples = 100000

            # Normally distributed inputs.
            x = float_dtype(np.random.normal(size=num_samples))

            # Uniformly distributed values in (-16, 3), quantized to the nearest
            # 0.1 and then shifted by 0.05 so that we avoid the special cases at
            # 0 and 2 where the analytical gradient wont match finite differences.
            alpha = float_dtype(
                np.round(np.random.uniform(-16, 3, num_samples) * 10) / 10.)

            # Random uniformy distributed values in [0.5, 1.5]
            scale = float_dtype(np.random.uniform(0.5, 1.5, num_samples))

            # Compute the loss and its derivative with respect to all three inputs.
            x, alpha, scale = [
                tf.convert_to_tensor(z) for z in (x, alpha, scale)
            ]
            with tf.GradientTape(persistent=True) as tape:
                for z in (x, alpha, scale):
                    tape.watch(z)
                loss = general.lossfun(x,
                                       alpha,
                                       scale,
                                       approximate=approximate)
                d_x, d_alpha, d_scale = [
                    tape.gradient(tf.reduce_sum(loss), z)
                    for z in (x, alpha, scale)
                ]

            # Assert that the 95th percentile of errors is <= 1e-2.
            def assert_percentile_close(v1, v2):
                self.assertLessEqual(np.percentile(np.abs(v1 - v2), 95), 1e-2)

            step_size = float_dtype(1e-3)
            n_x = (general.lossfun(x + step_size, alpha, scale) -
                   loss) / step_size
            n_alpha = (general.lossfun(x, alpha + step_size, scale) -
                       loss) / step_size
            n_scale = (general.lossfun(x, alpha, scale + step_size) -
                       loss) / step_size
            assert_percentile_close(n_x, d_x)
            assert_percentile_close(n_alpha, d_alpha)
            assert_percentile_close(n_scale, d_scale)
コード例 #3
0
            def while_body(samples, accepted):
                """Generate N proposal samples, and then perform rejection sampling."""
                # Draw N samples from a Cauchy, our proposal distribution.
                cauchy_sample = tf.cast(cauchy.sample(shape), float_dtype)

                # Compute the likelihood of each sample under its target distribution.
                nll = self.nllfun(cauchy_sample, alpha,
                                  tf.cast(1, float_dtype))
                # Bound the NLL. We don't use the approximate loss as it may cause
                # unpredictable behavior in the context of sampling.
                nll_bound = general.lossfun(
                    cauchy_sample,
                    tf.cast(0, float_dtype),
                    tf.cast(1, float_dtype),
                    approximate=False) + self.log_base_partition_function(
                        alpha)

                # Draw N samples from a uniform distribution, and use each uniform
                # sample to decide whether or not to accept each proposal sample.
                uniform_sample = tf.cast(uniform.sample(shape), float_dtype)
                accept = uniform_sample <= tf.math.exp(nll_bound - nll)

                # If a sample is accepted, replace its element in `samples` with the
                # proposal sample, and set its bit in `accepted` to True.
                samples = tf.where(accept, cauchy_sample, samples)
                accepted = accept | accepted
                return (samples, accepted)
コード例 #4
0
  def testLossAndGradientsAreFinite(self, float_dtype):
    # Test that the loss and its approximation both give finite losses and
    # derivatives everywhere that they should for a wide range of values.
    for approximate in [False, True]:
      num_samples = 100000

      # Normally distributed inputs.
      x = float_dtype(np.random.normal(size=num_samples))

      # Uniformly distributed values in (-16, 3), quantized to the nearest
      # 0.1 to ensure that we hit the special cases at 0, 2.
      alpha = float_dtype(
          np.round(np.random.uniform(-16, 3, num_samples) * 10) / 10.)

      # Random log-normally distributed values in approx (1e-5, 100000):
      scale = float_dtype(
          np.exp(np.random.normal(size=num_samples) * 4.) + 1e-5)

      # Compute the loss and its derivative with respect to all three inputs.
      x, alpha, scale = [tf.convert_to_tensor(z) for z in (x, alpha, scale)]
      with tf.GradientTape(persistent=True) as tape:
        for z in (x, alpha, scale):
          tape.watch(z)
        loss = general.lossfun(x, alpha, scale, approximate=approximate)
        d_x, d_alpha, d_scale = [
            tape.gradient(tf.reduce_sum(loss), z) for z in (x, alpha, scale)
        ]

      for v in [loss, d_x, d_alpha, d_scale]:
        self.assertTrue(np.all(np.isfinite(v)))
コード例 #5
0
    def _precompute_lossfun_inputs(self, float_dtype):
        """Precompute a loss and its derivatives for random inputs and parameters.

    Generates a large number of random inputs to the loss, and random
    shape/scale parameters for the loss function at each sample, and
    computes the loss and its derivative with respect to all inputs and
    parameters, returning everything to be used to assert various properties
    in our unit tests.

    Args:
      float_dtype: The float precision to be used (np.float32 or np.float64).

    Returns:
      A tuple containing:
       (the number (int) of samples, and the length of all following arrays,
        A np.array (float_dtype) of losses for each sample,
        A np.array (float_dtype) of residuals of each sample (the loss inputs),
        A np array (float_dtype) of shape parameters of each loss,
        A np.array (float_dtype) of scale parameters of each loss,
        A np.array (float_dtype) of derivatives of each loss wrt each x,
        A np.array (float_dtype) of derivatives of each loss wrt each alpha,
        A np.array (float_dtype) of derivatives of each loss wrt each scale)

    Typical usage example:
    (num_samples, loss, x, alpha, scale, d_x, d_alpha, d_scale)
        = self._precompute_lossfun_inputs(np.float32)
    """
        with self.session() as sess:
            num_samples = 100000
            # Normally distributed inputs.
            x = float_dtype(np.random.normal(size=num_samples))

            # Uniformly distributed values in (-16, 3), quantized to the nearest 0.1
            # to ensure that we hit the special cases at 0, 2.
            alpha = float_dtype(
                np.round(np.random.uniform(-16, 3, num_samples) * 10) / 10.)
            # Push the sampled alphas at the extents of the range to +/- infinity, so
            # that we probe those cases too.
            alpha[alpha == 3.] = float_dtype(float('inf'))
            alpha[alpha == -16.] = -float_dtype(float('inf'))

            # Random log-normally distributed values in approx (1e-5, 100000):
            scale = float_dtype(
                np.exp(np.random.normal(size=num_samples) * 4.) + 1e-5)

            # Compute the loss and its derivative with respect to all three inputs.
            x_ph = tf.placeholder(x.dtype, num_samples)
            alpha_ph = tf.placeholder(alpha.dtype, num_samples)
            scale_ph = tf.placeholder(scale.dtype, num_samples)
            lossfun_ph = general.lossfun(x_ph, alpha_ph, scale_ph)
            loss, (d_x, d_alpha, d_scale) = sess.run(
                (lossfun_ph,
                 tf.gradients(tf.reduce_sum(lossfun_ph),
                              (x_ph, alpha_ph, scale_ph))), {
                                  x_ph: x,
                                  alpha_ph: alpha,
                                  scale_ph: scale,
                              })
            return (num_samples, loss, x, alpha, scale, d_x, d_alpha, d_scale)
コード例 #6
0
 def testLossfunPreservesDtype(self, float_dtype):
   """Check the loss's output has the same precision as its input."""
   n = 16
   x = float_dtype(np.random.normal(size=n))
   alpha = float_dtype(np.random.normal(size=n))
   scale = float_dtype(np.exp(np.random.normal(size=n)))
   y = general.lossfun(x, alpha, scale)
   self.assertDTypeEqual(y, float_dtype)
コード例 #7
0
 def _lossfun_preserves_dtype(self, float_dtype):
     """Check the loss's output has the same precision as its input."""
     n = 16
     x = float_dtype(np.random.normal(size=n))
     alpha = float_dtype(np.random.normal(size=n))
     scale = float_dtype(np.exp(np.random.normal(size=n)))
     with self.session():
         y = general.lossfun(x, alpha, scale).eval()
     self.assertDTypeEqual(y, float_dtype)
コード例 #8
0
 def testLossIsScaleInvariant(self, float_dtype):
   # Check that loss(mult * x, alpha, mult * scale) == loss(x, alpha, scale)
   (num_samples, loss, x, alpha, scale, _, _, _) = (
       self._precompute_lossfun_inputs(float_dtype))
   # Random log-normally distributed scalings in ~(0.2, 20)
   mult = float_dtype(
       np.maximum(0.2, np.exp(np.random.normal(size=num_samples))))
   # Compute the scaled loss.
   loss_scaled = general.lossfun(mult * x, alpha, mult * scale)
   self.assertAllClose(loss, loss_scaled, atol=1e-4, rtol=1e-4)
コード例 #9
0
  def testAlphaEqualsZero(self, float_dtype):
    # Check that alpha == 0 reproduces Cauchy aka Lorentzian loss.
    x = np.arange(-20, 20, 0.1, float_dtype)
    alpha = float_dtype(0.)
    scale = float_dtype(1.7)

    # Our loss.
    loss = general.lossfun(x, alpha, scale)

    # Cauchy/Lorentzian loss.
    loss_true = (tf.math.log(0.5 * tf.square(x / scale) + 1.))

    self._assert_all_close_according_to_type(loss, loss_true)
コード例 #10
0
  def testAlphaEqualsNegativeInfinity(self, float_dtype):
    # Check that alpha == -Infinity reproduces Welsch aka Leclerc loss.
    x = np.arange(-20, 20, 0.1, float_dtype)
    alpha = float_dtype(-float('inf'))
    scale = float_dtype(1.7)

    # Our loss.
    loss = general.lossfun(x, alpha, scale)

    # Welsch/Leclerc loss.
    loss_true = (1. - tf.math.exp(-0.5 * tf.square(x / scale)))

    self._assert_all_close_according_to_type(loss, loss_true)
コード例 #11
0
  def testAlphaEqualsInfinity(self, float_dtype):
    # Check that alpha == Infinity takes the correct form.
    x = np.arange(-20, 20, 0.1, float_dtype)
    alpha = float_dtype(float('inf'))
    scale = float_dtype(1.7)

    # Our loss.
    loss = general.lossfun(x, alpha, scale)

    # The true loss.
    loss_true = (tf.math.exp(0.5 * tf.square(x / scale)) - 1.)

    self._assert_all_close_according_to_type(loss, loss_true)
コード例 #12
0
  def testAlphaEqualsTwo(self, float_dtype):
    # Check that alpha == 2 reproduces L2 loss.
    x = np.arange(-20, 20, 0.1, float_dtype)
    alpha = float_dtype(2.)
    scale = float_dtype(1.7)

    # Our loss.
    loss = general.lossfun(x, alpha, scale)

    # L2 Loss.
    loss_true = (0.5 * tf.square(x / scale))

    self._assert_all_close_according_to_type(loss, loss_true)
コード例 #13
0
  def testAlphaEqualsOne(self, float_dtype):
    # Check that alpha == 1 reproduces Charbonnier aka pseudo-Huber loss.
    x = np.arange(-20, 20, 0.1, float_dtype)
    alpha = float_dtype(1.)
    scale = float_dtype(1.7)

    # Our loss.
    loss = general.lossfun(x, alpha, scale)

    # Charbonnier loss.
    loss_true = (tf.sqrt(tf.square(x / scale) + 1.) - 1.)

    self._assert_all_close_according_to_type(loss, loss_true)
コード例 #14
0
  def testAlphaEqualsNegativeTwo(self, float_dtype):
    # Check that alpha == -2 reproduces Geman-McClure loss.
    x = np.arange(-20, 20, 0.1, float_dtype)
    alpha = float_dtype(-2.)
    scale = float_dtype(1.7)

    # Our loss.
    loss = general.lossfun(x, alpha, scale)

    # Geman-McClure loss.
    loss_true = (2. * tf.square(x / scale) / (tf.square(x / scale) + 4.))

    self._assert_all_close_according_to_type(loss, loss_true)
コード例 #15
0
    def _alpha_equals_two(self, float_dtype):
        # Check that alpha == 2 reproduces L2 loss.
        with self.session():
            x = np.arange(-20, 20, 0.1, float_dtype)
            alpha = float_dtype(2.)
            scale = float_dtype(1.7)

            # Our loss.
            loss = general.lossfun(x, alpha, scale).eval()

            # L2 Loss.
            loss_true = (0.5 * tf.square(x / scale)).eval()

            self._assert_all_close_according_to_type(loss, loss_true)
コード例 #16
0
    def _alpha_equals_zero(self, float_dtype):
        # Check that alpha == 0 reproduces Cauchy aka Lorentzian loss.
        with self.session():
            x = np.arange(-20, 20, 0.1, float_dtype)
            alpha = float_dtype(0.)
            scale = float_dtype(1.7)

            # Our loss.
            loss = general.lossfun(x, alpha, scale).eval()

            # Cauchy/Lorentzian loss.
            loss_true = (tf.log(0.5 * tf.square(x / scale) + 1.)).eval()

            self._assert_all_close_according_to_type(loss, loss_true)
コード例 #17
0
  def testAlphaEqualsFour(self, float_dtype):
    # Check that alpha == 4 reproduces a quartic.
    x = np.arange(-20, 20, 0.1, float_dtype)
    alpha = float_dtype(4.)
    scale = float_dtype(1.7)

    # Our loss.
    loss = general.lossfun(x, alpha, scale)

    # The true loss.
    loss_true = (
        tf.square(tf.square(x / scale)) / 8. + tf.square(x / scale) / 2.)

    self._assert_all_close_according_to_type(loss, loss_true)
コード例 #18
0
    def _alpha_equals_four(self, float_dtype):
        # Check that alpha == 4 reproduces a quartic.
        with self.session():
            x = np.arange(-20, 20, 0.1, float_dtype)
            alpha = float_dtype(4.)
            scale = float_dtype(1.7)

            # Our loss.
            loss = general.lossfun(x, alpha, scale).eval()

            # The true loss.
            loss_true = (tf.square(tf.square(x / scale)) / 8. +
                         tf.square(x / scale) / 2.).eval()

            self._assert_all_close_according_to_type(loss, loss_true)
コード例 #19
0
    def _alpha_equals_negative_two(self, float_dtype):
        # Check that alpha == -2 reproduces Geman-McClure loss.
        with self.session():
            x = np.arange(-20, 20, 0.1, float_dtype)
            alpha = float_dtype(-2.)
            scale = float_dtype(1.7)

            # Our loss.
            loss = general.lossfun(x, alpha, scale).eval()

            # Geman-McClure loss.
            loss_true = (2. * tf.square(x / scale) /
                         (tf.square(x / scale) + 4.)).eval()

            self._assert_all_close_according_to_type(loss, loss_true)
コード例 #20
0
def numerical_base_partition_function(alpha):
  """Numerically approximate the partition function Z(alpha)."""
  # Generate values `num_samples` values in [-x_max, x_max], with more samples
  # near the origin as `power` is set to larger values.
  num_samples = 2**24 + 1  # We want an odd value so that 0 gets sampled.
  x_max = 10**10
  power = 6
  t = t = tf.linspace(
      tf.constant(-1, tf.float64), tf.constant(1, tf.float64), num_samples)
  t = tf.sign(t) * tf.abs(t)**power
  x = t * x_max

  # Compute losses for the values, then exponentiate the negative losses and
  # integrate with the trapezoid rule to get the partition function.
  losses = general.lossfun(x, alpha, np.float64(1))
  y = tf.math.exp(-losses)
  partition = tf.reduce_sum((y[1:] + y[:-1]) * (x[1:] - x[:-1])) / 2.
  return partition
コード例 #21
0
    def nllfun(self, x, alpha, scale):
        r"""Implements the negative log-likelihood (NLL).

    Specifically, we implement -log(p(x | 0, \alpha, c) of Equation 16 in the
    paper as nllfun(x, alpha, shape).

    Args:
      x: The residual for which the NLL is being computed. x can have any shape,
        and alpha and scale will be broadcasted to match x's shape if necessary.
        Must be a tensorflow tensor or numpy array of floats.
      alpha: The shape parameter of the NLL (\alpha in the paper), where more
        negative values cause outliers to "cost" more and inliers to "cost"
        less. Alpha can be any non-negative value, but the gradient of the NLL
        with respect to alpha has singularities at 0 and 2 so you may want to
        limit usage to (0, 2) during gradient descent. Must be a tensorflow
        tensor or numpy array of floats. Varying alpha in that range allows for
        smooth interpolation between a Cauchy distribution (alpha = 0) and a
        Normal distribution (alpha = 2) similar to a Student's T distribution.
      scale: The scale parameter of the loss. When |x| < scale, the NLL is like
        that of a (possibly unnormalized) normal distribution, and when |x| >
        scale the NLL takes on a different shape according to alpha. Must be a
        tensorflow tensor or numpy array of floats.

    Returns:
      The NLLs for each element of x, in the same shape as x. This is returned
      as a TensorFlow graph node of floats with the same precision as x.
    """
        # `scale` and `alpha` must have the same type as `x`.
        tf.debugging.assert_type(scale, x.dtype)
        tf.debugging.assert_type(alpha, x.dtype)
        assert_ops = [
            # `scale` must be > 0.
            tf.Assert(tf.reduce_all(scale > 0.), [scale]),
            # `alpha` must be >= 0.
            tf.Assert(tf.reduce_all(alpha >= 0.), [alpha]),
        ]
        with tf.control_dependencies(assert_ops):
            loss = general.lossfun(x, alpha, scale, approximate=False)
            log_partition = (tf.math.log(scale) +
                             self.log_base_partition_function(alpha))
            nll = loss + log_partition
            return nll
コード例 #22
0
    def _loss_is_scale_invariant(self, float_dtype):
        # Check that loss(mult * x, alpha, mult * scale) == loss(x, alpha, scale)
        (num_samples, loss, x, alpha, scale, _, _,
         _) = self._precompute_lossfun_inputs(float_dtype)
        with self.session() as sess:
            # Random log-normally distributed scalings in ~(0.2, 20)
            mult = float_dtype(
                np.maximum(0.2, np.exp(np.random.normal(size=num_samples))))

            # Compute the scaled loss.
            x_ph = tf.placeholder(x.dtype, num_samples)
            alpha_ph = tf.placeholder(alpha.dtype, num_samples)
            scale_ph = tf.placeholder(scale.dtype, num_samples)
            lossfun_ph = general.lossfun(x_ph, alpha_ph, scale_ph)
            loss_scaled = sess.run(lossfun_ph, {
                x_ph: mult * x,
                scale_ph: mult * scale,
                alpha_ph: alpha
            })
            self.assertAllClose(loss, loss_scaled, atol=1e-4, rtol=1e-4)
コード例 #23
0
    def _loss_and_gradients_are_finite(self, float_dtype):
        # Test that the loss and its approximation both give finite losses and
        # derivatives everywhere that they should for a wide range of values.
        for approximate in [False, True]:
            with self.session() as sess:
                num_samples = 100000

                # Normally distributed inputs.
                x = float_dtype(np.random.normal(size=num_samples))

                # Uniformly distributed values in (-16, 3), quantized to the nearest
                # 0.1 to ensure that we hit the special cases at 0, 2.
                alpha = float_dtype(
                    np.round(np.random.uniform(-16, 3, num_samples) * 10) /
                    10.)

                # Random log-normally distributed values in approx (1e-5, 100000):
                scale = float_dtype(
                    np.exp(np.random.normal(size=num_samples) * 4.) + 1e-5)

                # Compute the loss and its derivative with respect to all three inputs.
                x_ph = tf.placeholder(x.dtype, num_samples)
                alpha_ph = tf.placeholder(alpha.dtype, num_samples)
                scale_ph = tf.placeholder(scale.dtype, num_samples)
                lossfun_ph = general.lossfun(x_ph,
                                             alpha_ph,
                                             scale_ph,
                                             approximate=approximate)
                loss, (d_x, d_alpha, d_scale) = sess.run(
                    (lossfun_ph,
                     tf.gradients(tf.reduce_sum(lossfun_ph),
                                  (x_ph, alpha_ph, scale_ph))), {
                                      x_ph: x,
                                      scale_ph: scale,
                                      alpha_ph: alpha
                                  })

                for v in [loss, d_x, d_alpha, d_scale]:
                    self.assertTrue(np.all(np.isfinite(v)))
コード例 #24
0
    def _gradient_matches_finite_differences(self, float_dtype):
        # Test that the loss and its approximation both return gradients that are
        # close to the numerical gradient from finite differences, with forward
        # differencing. Returning correct gradients is TensorFlow's job, so this is
        # just an aggressive sanity check in case some implementation detail causes
        # gradients to incorrectly go to zero due to quantization or stop_gradients
        # in some op that is used by the loss.
        for approximate in [False, True]:
            with self.session() as sess:
                num_samples = 100000

                # Normally distributed inputs.
                x = float_dtype(np.random.normal(size=num_samples))

                # Uniformly distributed values in (-16, 3), quantized to the nearest
                # 0.1 and then shifted by 0.05 so that we avoid the special cases at
                # 0 and 2 where the analytical gradient wont match finite differences.
                alpha = float_dtype(
                    np.round(np.random.uniform(-16, 3, num_samples) * 10) /
                    10.)

                # Random uniformy distributed values in [0.5, 1.5]
                scale = float_dtype(np.random.uniform(0.5, 1.5, num_samples))

                # Compute the loss and its derivative with respect to all three inputs.
                x_ph = tf.placeholder(x.dtype, num_samples)
                alpha_ph = tf.placeholder(alpha.dtype, num_samples)
                scale_ph = tf.placeholder(scale.dtype, num_samples)
                lossfun_ph = general.lossfun(x_ph,
                                             alpha_ph,
                                             scale_ph,
                                             approximate=approximate)
                loss, (d_x, d_alpha, d_scale) = sess.run(
                    (lossfun_ph,
                     tf.gradients(tf.reduce_sum(lossfun_ph),
                                  (x_ph, alpha_ph, scale_ph))), {
                                      x_ph: x,
                                      alpha_ph: alpha,
                                      scale_ph: scale
                                  })

                step_size = float_dtype(1e-3)

                # Assert that the 95th percentile of errors is <= 1e-2.
                def assert_percentile_close(v1, v2):
                    self.assertLessEqual(np.percentile(np.abs(v1 - v2), 95),
                                         1e-2)

                n_x = (sess.run(lossfun_ph, {
                    x_ph: x + step_size,
                    alpha_ph: alpha,
                    scale_ph: scale
                }) - loss) / step_size
                assert_percentile_close(n_x, d_x)

                n_alpha = (
                    sess.run(lossfun_ph, {
                        x_ph: x,
                        alpha_ph: alpha + step_size,
                        scale_ph: scale
                    }) - loss) / step_size
                assert_percentile_close(n_alpha, d_alpha)

                n_scale = (
                    sess.run(lossfun_ph, {
                        x_ph: x,
                        alpha_ph: alpha,
                        scale_ph: scale + step_size
                    }) - loss) / step_size
                assert_percentile_close(n_scale, d_scale)