def testGradientMatchesFiniteDifferences(self, float_dtype):
        # Test that the loss and its approximation both return gradients that are
        # close to the numerical gradient from finite differences, with forward
        # differencing. Returning correct gradients is TensorFlow's job, so this is
        # just an aggressive sanity check in case some implementation detail causes
        # gradients to incorrectly go to zero due to quantization or stop_gradients
        # in some op that is used by the loss.
        for approximate in [False, True]:
            num_samples = 100000

            # Normally distributed inputs.
            x = float_dtype(np.random.normal(size=num_samples))

            # Uniformly distributed values in (-16, 3), quantized to the nearest
            # 0.1 and then shifted by 0.05 so that we avoid the special cases at
            # 0 and 2 where the analytical gradient won't match finite differences.
            alpha = float_dtype(
                np.round(np.random.uniform(-16, 3, num_samples) * 10) /
                10.) + 0.05

            # Random uniformly distributed values in [0.5, 1.5]
            scale = float_dtype(np.random.uniform(0.5, 1.5, num_samples))

            # Compute the loss and its derivative with respect to all three inputs.
            x, alpha, scale = [
                tf.convert_to_tensor(z) for z in (x, alpha, scale)
            ]
            with tf.GradientTape(persistent=True) as tape:
                for z in (x, alpha, scale):
                    tape.watch(z)
                loss = general.lossfun(x,
                                       alpha,
                                       scale,
                                       approximate=approximate)
                d_x, d_alpha, d_scale = [
                    tape.gradient(tf.reduce_sum(loss), z)
                    for z in (x, alpha, scale)
                ]

            step_size = float_dtype(1e-3)
            n_x = (general.lossfun(x + step_size, alpha, scale) -
                   loss) / step_size
            n_alpha = (general.lossfun(x, alpha + step_size, scale) -
                       loss) / step_size
            n_scale = (general.lossfun(x, alpha, scale + step_size) -
                       loss) / step_size

            self.assertAllClose(n_x, d_x, rtol=1e-2, atol=1e-2)
            self.assertAllClose(n_alpha, d_alpha, rtol=1e-2, atol=1e-2)
            self.assertAllClose(n_scale, d_scale, rtol=1e-2, atol=1e-2)
 def testApproximateLossIsAccurate(self, float_dtype):
     # Check that the approximate loss (lossfun() with epsilon=1e-6) reasonably
     # approximates the true loss (lossfun() with epsilon=0.) for a range of
     # values of alpha (skipping alpha=0, where the approximation is poor).
     x = np.arange(-10, 10, 0.1, float_dtype)
     scale = float_dtype(1.7)
     for alpha in [-4, -2, -0.2, -0.01, 0.01, 0.2, 1, 1.99, 2, 2.01, 4]:
         alpha = float_dtype(alpha)
         loss = general.lossfun(x, alpha, scale)
         loss_approx = general.lossfun(x, alpha, scale, approximate=True)
         self.assertAllClose(loss,
                             loss_approx,
                             rtol=1e-5,
                             atol=1e-4,
                             msg='alpha=%g' % (alpha))
 def testLossfunPreservesDtype(self, float_dtype):
     """Check the loss's output has the same precision as its input."""
     n = 16
     x = float_dtype(np.random.normal(size=n))
     alpha = float_dtype(np.random.normal(size=n))
     scale = float_dtype(np.exp(np.random.normal(size=n)))
     y = general.lossfun(x, alpha, scale)
     self.assertDTypeEqual(y, float_dtype)
 def testLossIsScaleInvariant(self, float_dtype):
     # Check that loss(mult * x, alpha, mult * scale) == loss(x, alpha, scale)
     (num_samples, loss, x, alpha, scale, _, _,
      _) = (self._precompute_lossfun_inputs(float_dtype))
     # Random log-normally distributed scalings in ~(0.2, 20)
     mult = float_dtype(
         np.maximum(0.2, np.exp(np.random.normal(size=num_samples))))
     # Compute the scaled loss.
     loss_scaled = general.lossfun(mult * x, alpha, mult * scale)
     self.assertAllClose(loss, loss_scaled, atol=1e-4, rtol=1e-4)
    def _precompute_lossfun_inputs(self, float_dtype):
        """Precompute a loss and its derivatives for random inputs and parameters.

    Generates a large number of random inputs to the loss, and random
    shape/scale parameters for the loss function at each sample, and
    computes the loss and its derivative with respect to all inputs and
    parameters, returning everything to be used to assert various properties
    in our unit tests.

    Args:
      float_dtype: The float precision to be used (np.float32 or np.float64).

    Returns:
      A tuple containing:
       (the number (int) of samples, and the length of all following arrays,
        A np.array (float_dtype) of losses for each sample,
        A np.array (float_dtype) of residuals of each sample (the loss inputs),
        A np array (float_dtype) of shape parameters of each loss,
        A np.array (float_dtype) of scale parameters of each loss,
        A np.array (float_dtype) of derivatives of each loss wrt each x,
        A np.array (float_dtype) of derivatives of each loss wrt each alpha,
        A np.array (float_dtype) of derivatives of each loss wrt each scale)

    Typical usage example:
    (num_samples, loss, x, alpha, scale, d_x, d_alpha, d_scale)
        = self._precompute_lossfun_inputs(np.float32)
    """
        num_samples = 100000
        # Normally distributed inputs.
        x = float_dtype(np.random.normal(size=num_samples))

        # Uniformly distributed values in (-16, 3), quantized to the nearest 0.1
        # to ensure that we hit the special cases at 0, 2.
        alpha = float_dtype(
            np.round(np.random.uniform(-16, 3, num_samples) * 10) / 10.)
        # Push the sampled alphas at the extents of the range to +/- infinity, so
        # that we probe those cases too.
        alpha[alpha == 3.] = float_dtype(float('inf'))
        alpha[alpha == -16.] = -float_dtype(float('inf'))

        # Random log-normally distributed values in approx (1e-5, 100000):
        scale = float_dtype(
            np.exp(np.random.normal(size=num_samples) * 4.) + 1e-5)

        x, alpha, scale = [tf.convert_to_tensor(z) for z in (x, alpha, scale)]
        with tf.GradientTape(persistent=True) as tape:
            for z in (x, alpha, scale):
                tape.watch(z)
            loss = general.lossfun(x, alpha, scale)
            d_x, d_alpha, d_scale = [
                tape.gradient(tf.reduce_sum(loss), z)
                for z in (x, alpha, scale)
            ]
        return (num_samples, loss, x, alpha, scale, d_x, d_alpha, d_scale)
    def testAlphaEqualsInfinity(self, float_dtype):
        # Check that alpha == Infinity takes the correct form.
        x = np.arange(-20, 20, 0.1, float_dtype)
        alpha = float_dtype(float('inf'))
        scale = float_dtype(1.7)

        # Our loss.
        loss = general.lossfun(x, alpha, scale)

        # The true loss.
        loss_true = (tf.math.exp(0.5 * tf.square(x / scale)) - 1.)

        self._assert_all_close_according_to_type(loss, loss_true)
    def testAlphaEqualsTwo(self, float_dtype):
        # Check that alpha == 2 reproduces L2 loss.
        x = np.arange(-20, 20, 0.1, float_dtype)
        alpha = float_dtype(2.)
        scale = float_dtype(1.7)

        # Our loss.
        loss = general.lossfun(x, alpha, scale)

        # L2 Loss.
        loss_true = (0.5 * tf.square(x / scale))

        self._assert_all_close_according_to_type(loss, loss_true)
    def testAlphaEqualsOne(self, float_dtype):
        # Check that alpha == 1 reproduces Charbonnier aka pseudo-Huber loss.
        x = np.arange(-20, 20, 0.1, float_dtype)
        alpha = float_dtype(1.)
        scale = float_dtype(1.7)

        # Our loss.
        loss = general.lossfun(x, alpha, scale)

        # Charbonnier loss.
        loss_true = (tf.sqrt(tf.square(x / scale) + 1.) - 1.)

        self._assert_all_close_according_to_type(loss, loss_true)
    def testAlphaEqualsZero(self, float_dtype):
        # Check that alpha == 0 reproduces Cauchy aka Lorentzian loss.
        x = np.arange(-20, 20, 0.1, float_dtype)
        alpha = float_dtype(0.)
        scale = float_dtype(1.7)

        # Our loss.
        loss = general.lossfun(x, alpha, scale)

        # Cauchy/Lorentzian loss.
        loss_true = (tf.math.log(0.5 * tf.square(x / scale) + 1.))

        self._assert_all_close_according_to_type(loss, loss_true)
    def testAlphaEqualsNegativeTwo(self, float_dtype):
        # Check that alpha == -2 reproduces Geman-McClure loss.
        x = np.arange(-20, 20, 0.1, float_dtype)
        alpha = float_dtype(-2.)
        scale = float_dtype(1.7)

        # Our loss.
        loss = general.lossfun(x, alpha, scale)

        # Geman-McClure loss.
        loss_true = (2. * tf.square(x / scale) / (tf.square(x / scale) + 4.))

        self._assert_all_close_according_to_type(loss, loss_true)
    def testAlphaEqualsNegativeInfinity(self, float_dtype):
        # Check that alpha == -Infinity reproduces Welsch aka Leclerc loss.
        x = np.arange(-20, 20, 0.1, float_dtype)
        alpha = float_dtype(-float('inf'))
        scale = float_dtype(1.7)

        # Our loss.
        loss = general.lossfun(x, alpha, scale)

        # Welsch/Leclerc loss.
        loss_true = (1. - tf.math.exp(-0.5 * tf.square(x / scale)))

        self._assert_all_close_according_to_type(loss, loss_true)
    def testAlphaEqualsFour(self, float_dtype):
        # Check that alpha == 4 reproduces a quartic.
        x = np.arange(-20, 20, 0.1, float_dtype)
        alpha = float_dtype(4.)
        scale = float_dtype(1.7)

        # Our loss.
        loss = general.lossfun(x, alpha, scale)

        # The true loss.
        loss_true = (tf.square(tf.square(x / scale)) / 8. +
                     tf.square(x / scale) / 2.)

        self._assert_all_close_according_to_type(loss, loss_true)
Beispiel #13
0
def numerical_base_partition_function(alpha):
    """Numerically approximate the partition function Z(alpha)."""
    # Generate values `num_samples` values in [-x_max, x_max], with more samples
    # near the origin as `power` is set to larger values.
    num_samples = 2**24 + 1  # We want an odd value so that 0 gets sampled.
    x_max = 10**10
    power = 6
    t = t = tf.linspace(tf.constant(-1, tf.float64),
                        tf.constant(1, tf.float64), num_samples)
    t = tf.sign(t) * tf.abs(t)**power
    x = t * x_max

    # Compute losses for the values, then exponentiate the negative losses and
    # integrate with the trapezoid rule to get the partition function.
    losses = general.lossfun(x, alpha, np.float64(1))
    y = tf.math.exp(-losses)
    partition = tf.reduce_sum((y[1:] + y[:-1]) * (x[1:] - x[:-1])) / 2.
    return partition
    def testLossAndGradientsAreFinite(self, float_dtype):
        # Test that the loss and its approximation both give finite losses and
        # derivatives everywhere that they should for a wide range of values.
        for approximate in [False, True]:
            num_samples = 100000

            # Normally distributed inputs.
            x = float_dtype(np.random.normal(size=num_samples))

            # Uniformly distributed values in (-16, 3), quantized to the nearest
            # 0.1 to ensure that we hit the special cases at 0, 2.
            alpha = float_dtype(
                np.round(np.random.uniform(-16, 3, num_samples) * 10) / 10.)

            # Random log-normally distributed values in approx (1e-5, 100000):
            scale = float_dtype(
                np.exp(np.random.normal(size=num_samples) * 4.) + 1e-5)

            # Compute the loss and its derivative with respect to all three inputs.
            x, alpha, scale = [
                tf.convert_to_tensor(z) for z in (x, alpha, scale)
            ]
            with tf.GradientTape(persistent=True) as tape:
                for z in (x, alpha, scale):
                    tape.watch(z)
                loss = general.lossfun(x,
                                       alpha,
                                       scale,
                                       approximate=approximate)
                d_x, d_alpha, d_scale = [
                    tape.gradient(tf.reduce_sum(loss), z)
                    for z in (x, alpha, scale)
                ]

            for v in [loss, d_x, d_alpha, d_scale]:
                self.assertTrue(np.all(np.isfinite(v)))