def test_can_instantiate(self):
     noisy = uniform_noise.NoisyNormal(loc=0., scale=1.)
     em = ContinuousBatchedEntropyModel(noisy, 1)
     self.assertIs(em.prior, noisy)
     self.assertEqual(em.coding_rank, 1)
     self.assertEqual(em.tail_mass, 2**-8)
     self.assertEqual(em.dtype, noisy.dtype)
예제 #2
0
 def test_can_instantiate_statelessly(self):
     noisy = uniform_noise.NoisyNormal(loc=.25, scale=1.)
     em = ContinuousBatchedEntropyModel(noisy,
                                        coding_rank=1,
                                        compression=True)
     self.assertEqual(em.compression, True)
     self.assertEqual(em.stateless, False)
     self.assertAllEqual(.25, em.quantization_offset)
     em = ContinuousBatchedEntropyModel(
         compression=True,
         stateless=True,
         coding_rank=1,
         prior_shape=noisy.batch_shape,
         cdf=em.cdf,
         cdf_offset=em.cdf_offset,
         quantization_offset=em.quantization_offset,
     )
     self.assertEqual(em.compression, True)
     self.assertEqual(em.stateless, True)
     self.assertAllEqual(.25, em.quantization_offset)
     with self.assertRaises(RuntimeError):
         em.prior  # pylint:disable=pointless-statement
     self.assertEqual(em.coding_rank, 1)
     self.assertEqual(em.tail_mass, 2**-8)
     self.assertEqual(em.range_coder_precision, 12)
     self.assertEqual(em.bottleneck_dtype, tf.float32)
예제 #3
0
 def test_quantizes_to_integers_modulo_offset(self):
     noisy = uniform_noise.NoisyNormal(loc=.25, scale=10.)
     em = ContinuousBatchedEntropyModel(noisy, 1)
     x = tf.range(-20., 20.) + .25
     x_perturbed = x + tf.random.uniform(x.shape, -.49, .49)
     x_quantized = em.quantize(x_perturbed)
     self.assertAllEqual(x, x_quantized)
예제 #4
0
 def test_compression_consistent_with_quantization(self):
     noisy = uniform_noise.NoisyNormal(loc=.25, scale=10.)
     em = ContinuousBatchedEntropyModel(noisy, 1, compression=True)
     x = noisy.base.sample([100])
     x_quantized = em.quantize(x)
     x_decompressed = em.decompress(em.compress(x), [100])
     self.assertAllEqual(x_decompressed, x_quantized)
예제 #5
0
 def test_bitstring_length_matches_estimates(self, training, prior):
     priors = {
         "deep_factorized":
         deep_factorized.NoisyDeepFactorized(batch_shape=(16, )),
         "normal":
         uniform_noise.NoisyNormal(loc=tf.range(16.0), scale=1.0)
     }
     prior = priors[prior]
     em = universal.UniversalBatchedEntropyModel(prior,
                                                 coding_rank=2,
                                                 compression=True)
     num_symbols = 1000
     # Source distribution is fixed as gaussian.
     source = priors["normal"].base
     x = source.sample((3, num_symbols), seed=0)
     x_perturbed, bits_estimate = em(x, training=training)
     bitstring = em.compress(x)
     x_decoded = em.decompress(bitstring, (num_symbols, ))
     bitstring_bits = tf.reshape(
         [len(b) * 8 for b in bitstring.numpy().flatten()], bitstring.shape)
     # Max error 1% and 2 bytes.
     self.assertAllClose(bits_estimate, bitstring_bits, atol=16, rtol=0.01)
     # Quantization noise should be between -.5 and .5
     self.assertAllLessEqual(tf.abs(x - x_decoded), 0.5)
     self.assertAllLessEqual(tf.abs(x - x_perturbed), 0.5)
예제 #6
0
 def test_small_cdfs_for_dirac_prior_without_quantization_offset(self):
     prior = uniform_noise.NoisyNormal(loc=100 * tf.range(16.0),
                                       scale=1e-10)
     prior._quantization_offset = lambda: 0.0
     em = ContinuousBatchedEntropyModel(prior,
                                        coding_rank=2,
                                        compression=True)
     self.assertAllLessEqual(em._cdf_length, 10)
예제 #7
0
 def test_default_kwargs_throw_error_on_compression(self):
     noisy = uniform_noise.NoisyNormal(loc=.25, scale=10.)
     em = ContinuousBatchedEntropyModel(noisy, 1)
     x = tf.zeros(10)
     with self.assertRaises(RuntimeError):
         em.compress(x)
     s = tf.zeros(10, dtype=tf.string)
     with self.assertRaises(RuntimeError):
         em.decompress(s, [10])
 def test_can_instantiate(self):
   noisy = uniform_noise.NoisyNormal(loc=0., scale=1.)
   em = ContinuousBatchedEntropyModel(noisy, 1)
   self.assertIs(em.prior, noisy)
   self.assertEqual(em.coding_rank, 1)
   self.assertEqual(em.likelihood_bound, 1e-9)
   self.assertEqual(em.tail_mass, 2**-8)
   self.assertEqual(em.range_coder_precision, 12)
   self.assertEqual(em.dtype, noisy.dtype)
예제 #9
0
 def test_small_cdfs_for_dirac_prior_without_quantization_offset(self):
     prior = uniform_noise.NoisyNormal(loc=100. * tf.range(16.),
                                       scale=1e-10)
     em = ContinuousBatchedEntropyModel(prior,
                                        coding_rank=2,
                                        offset_heuristic=False,
                                        compression=True)
     self.assertEqual(em.cdf_offset.shape[0], 16)
     self.assertLessEqual(em.cdf.shape[0], 16 * 6)
예제 #10
0
 def test_gradients_are_straight_through(self):
     noisy = uniform_noise.NoisyNormal(loc=0, scale=1)
     em = ContinuousBatchedEntropyModel(noisy, 1)
     x = tf.range(-20., 20.)
     x_perturbed = x + tf.random.uniform(x.shape, -.49, .49)
     with tf.GradientTape() as tape:
         tape.watch(x_perturbed)
         x_quantized = em.quantize(x_perturbed)
     gradients = tape.gradient(x_quantized, x_perturbed)
     self.assertAllEqual(gradients, tf.ones_like(gradients))
 def test_high_entropy_bounds(self):
   # For high entropy distributions, the training bound should be very tight,
   # and the overhead of range coding manageable.
   noisy = uniform_noise.NoisyNormal(loc=0., scale=100.)
   em = ContinuousBatchedEntropyModel(noisy, 1, compression=True)
   x = noisy.base.sample([10000])
   bits_eval = em.bits(x, training=False)
   bits_training = em.bits(x, training=True)
   bits_compressed = 8 * len(em.compress(x).numpy())
   self.assertAllClose(bits_training, bits_eval, atol=0, rtol=5e-5)
   self.assertAllClose(bits_compressed, bits_eval, atol=0, rtol=5e-3)
예제 #12
0
 def test_can_instantiate(self):
     noisy = uniform_noise.NoisyNormal(loc=0., scale=1.)
     em = ContinuousBatchedEntropyModel(noisy, 1)
     self.assertIs(em.distribution, noisy)
     self.assertEqual(em.coding_rank, 1)
     self.assertEqual(em.likelihood_bound, 1e-9)
     self.assertEqual(em.tail_mass, 2**-8)
     self.assertEqual(em.range_coder_precision, 12)
     self.assertEqual(em.dtype, noisy.dtype)
     self.assertEqual(em.quantization_offset(), 0)
     self.assertEqual(em.upper_tail(), 2.885635)
     self.assertEqual(em.lower_tail(), -2.885635)
예제 #13
0
 def test_compression_works_after_serialization_no_offset(self):
     noisy = uniform_noise.NoisyNormal(loc=0, scale=5.)
     em = ContinuousBatchedEntropyModel(noisy, 1, compression=True)
     self.assertIs(em._quantization_offset, None)
     json = tf.keras.utils.serialize_keras_object(em)
     weights = em.get_weights()
     x = noisy.base.sample([100])
     x_quantized = em.quantize(x)
     x_compressed = em.compress(x)
     em = tf.keras.utils.deserialize_keras_object(json)
     em.set_weights(weights)
     self.assertAllEqual(em.compress(x), x_compressed)
     self.assertAllEqual(em.decompress(x_compressed, [100]), x_quantized)
 def test_information_bounds(self):
   # `bits(training=True)` should be greater than `bits(training=False)`
   # because it is defined as an upper bound (albeit for infinite data). The
   # actual length of the bit string should always be greater than
   # `bits(training=False)` because range coding is only asymptotically
   # optimal, and because it operates on quantized probabilities.
   for scale in 2 ** tf.linspace(-2., 7., 10):
     noisy = uniform_noise.NoisyNormal(loc=0., scale=scale)
     em = ContinuousBatchedEntropyModel(noisy, 1, compression=True)
     x = noisy.base.sample([10000])
     bits_eval = em.bits(x, training=False)
     bits_training = em.bits(x, training=True)
     bits_compressed = 8 * len(em.compress(x).numpy())
     self.assertGreater(bits_training, .9975 * bits_eval)
     self.assertGreater(bits_compressed, bits_eval)
예제 #15
0
 def test_small_bitcost_for_dirac_prior(self):
   prior = uniform_noise.NoisyNormal(loc=100 * tf.range(16.0), scale=1e-10)
   em = ContinuousBatchedEntropyModel(
       prior, coding_rank=2, compression=True)
   num_symbols = 1000
   source = prior.base
   x = source.sample((3, num_symbols))
   _, bits_estimate = em(x, training=True)
   bitstring = em.compress(x)
   x_decoded = em.decompress(bitstring, (num_symbols,))
   bitstring_bits = tf.reshape(
       [len(b) * 8 for b in bitstring.numpy().flatten()], bitstring.shape)
   # Max 2 bytes.
   self.assertAllLessEqual(bits_estimate, 16)
   self.assertAllLessEqual(bitstring_bits, 16)
   # Quantization noise should be between -.5 and .5
   self.assertAllLessEqual(tf.abs(x - x_decoded), 0.5)
예제 #16
0
 def test_information_bounds(self, scale):
     # Off-center prior to test quantization offset heuristic. Without it, it
     # should be harder to achieve the bounds below.
     prior = uniform_noise.NoisyNormal(loc=.5, scale=scale)
     em = ContinuousBatchedEntropyModel(prior,
                                        coding_rank=1,
                                        compression=True)
     x = prior.base.sample([1000000])
     _, bits_eval = em(x, training=False)
     _, bits_training = em(x, training=True)
     bits_compressed = 8 * len(em.compress(x).numpy())
     # Asymptotically, the entropy estimate with `training=True` is an upper
     # bound on the entropy estimate with `training=False`. (With limited data,
     # fluctuations are possible.)
     with self.subTest("training bits > eval bits"):
         # Sample size is too small for the bound to be asymptotic. Increasing it
         # would make tests run too long.
         self.assertGreater(bits_training, 0.999999 * bits_eval)
     # Asymptotically, the length of the bit string should be greater than the
     # entropy estimate with `training=False` because range coding is only
     # asymptotically optimal, and because it operates on quantized
     # probabilities.
     with self.subTest("compressed bits > eval bits"):
         self.assertGreater(bits_compressed, bits_eval)
     # For low entropy distributions, the training bound can be very loose.
     if scale <= .5:
         with self.subTest("training bound loose"):
             self.assertAllClose(bits_training,
                                 bits_eval,
                                 atol=0,
                                 rtol=1.25)
             self.assertNotAllClose(bits_training,
                                    bits_eval,
                                    atol=0,
                                    rtol=1e-2)
     # For high entropy distributions, the training bound should be tight.
     if scale >= 64:
         with self.subTest("training bound tight"):
             self.assertAllClose(bits_training,
                                 bits_eval,
                                 atol=0,
                                 rtol=1e-5)
     # The overhead of range coding should always be manageable.
     with self.subTest("range coding overhead"):
         self.assertAllClose(bits_compressed, bits_eval, atol=0, rtol=5e-3)
예제 #17
0
  def test_bitstring_length_matches_entropy_normal(self, scale=1e-8):
    prior = uniform_noise.NoisyNormal(loc=100 * tf.range(15.0), scale=scale)
    base_df = prior.base
    em = universal.UniversalBatchedEntropyModel(
        prior, coding_rank=2, compression=True)
    num_samples = 100000
    x = base_df.sample(num_samples, seed=0)
    bitstring = em.compress(x)
    x_decoded = em.decompress(bitstring, (num_samples,))
    bits = len(bitstring.numpy()) * 8
    bits_per_sample = bits / num_samples
    # Quantization noise should be between -.5 and .5
    self.assertAllLessEqual(tf.abs(x - x_decoded), 0.5)

    # Lets estimate entropy via sampling the distribution.
    samples = prior.sample(num_samples, seed=0)
    log_probs = prior.log_prob(samples) / tf.math.log(2.0)
    entropy_bits = -tf.reduce_sum(log_probs)
    rtol = 0.01  # Maximum relative error 1%.
    atol = 16  # Maximum 2 bytes absolute error.
    self.assertLessEqual(bits_per_sample, entropy_bits * (1 + rtol) + atol)
  def test_compression_works_in_tf_function(self):
    noisy = uniform_noise.NoisyNormal(loc=0, scale=5.)
    sample = noisy.base.sample([100])

    # Since tf.function traces each function twice, and only allows variable
    # creation in the first call, we need to have a stateful object in which we
    # create the entropy model only the first time the function is called, and
    # store it for the second time.

    class Compressor(object):

      def compress(self, values):
        if not hasattr(self, "em"):
          self.em = ContinuousBatchedEntropyModel(noisy, 1, compression=True)
        compressed = self.em.compress(values)
        decompressed = self.em.decompress(compressed, [])
        return decompressed

    values_eager = Compressor().compress(sample)
    values_function = tf.function(Compressor().compress)(sample)
    self.assertAllEqual(values_eager, values_function)
예제 #19
0
 def test_expected_grads_gives_gradients(self):
   priors = {
       "deep_factorized":
           deep_factorized.NoisyDeepFactorized(batch_shape=(16,)),
       "normal":
           uniform_noise.NoisyNormal(loc=tf.range(16.0), scale=1.0)
   }
   prior = priors["deep_factorized"]
   em = universal.UniversalBatchedEntropyModel(
       prior, coding_rank=2, compression=True, expected_grads=True)
   self.assertTrue(em._expected_grads)
   num_symbols = 1000
   # Source distribution is fixed as gaussian.
   source = priors["normal"].base
   x = source.sample((3, num_symbols), seed=0)
   with tf.GradientTape(persistent=True) as g:
     g.watch(x)
     x2, bits = em(x, training=True)
   self.assertIsInstance(g.gradient(x2, x), tf.Tensor)
   self.assertIsInstance(g.gradient(bits, x), tf.Tensor)
   for variable in em.trainable_variables:
     self.assertIsInstance(g.gradient(bits, variable), tf.Tensor)
예제 #20
0
 def test_dtypes_are_correct_with_mixed_precision(self):
     tf.keras.mixed_precision.set_global_policy("mixed_float16")
     try:
         noisy = uniform_noise.NoisyNormal(
             loc=tf.constant(0, dtype=tf.float64),
             scale=tf.constant(1, dtype=tf.float64))
         em = ContinuousBatchedEntropyModel(noisy, 1, compression=True)
         self.assertEqual(em.bottleneck_dtype, tf.float16)
         self.assertEqual(em.prior.dtype, tf.float64)
         x = tf.random.stateless_normal((2, 5),
                                        seed=(0, 1),
                                        dtype=tf.float16)
         x_tilde, bits = em(x)
         bitstring = em.compress(x)
         x_hat = em.decompress(bitstring, (5, ))
         self.assertEqual(x_hat.dtype, tf.float16)
         self.assertAllClose(x, x_hat, rtol=0, atol=.5)
         self.assertEqual(x_tilde.dtype, tf.float16)
         self.assertAllClose(x, x_tilde, rtol=0, atol=.5)
         self.assertEqual(bits.dtype, tf.float64)
         self.assertEqual(bits.shape, (2, ))
         self.assertAllGreaterEqual(bits, 0.)
     finally:
         tf.keras.mixed_precision.set_global_policy(None)