def __call__(self, bottleneck, training=True): """Perturbs a tensor with (quantization) noise and estimates rate. Args: bottleneck: `tf.Tensor` containing the data to be compressed. Must have at least `self.coding_rank` dimensions, and the innermost dimensions must be broadcastable to `self.prior_shape`. training: Boolean. If `False`, computes the Shannon information of `bottleneck` under the distribution `self.prior`, which is a non-differentiable, tight *lower* bound on the number of bits needed to compress `bottleneck` using `compress()`. If `True`, returns a somewhat looser, but differentiable *upper* bound on this quantity. Returns: A tuple (bottleneck_perturbed, bits) where `bottleneck_perturbed` is `bottleneck` perturbed with (quantization) noise, and `bits` is the rate. `bits` has the same shape as `bottleneck` without the `self.coding_rank` innermost dimensions. """ bottleneck = tf.convert_to_tensor(bottleneck, dtype=self.bottleneck_dtype) log_prob_fn = functools.partial(self._log_prob, self.prior) if training: log_probs, bottleneck_perturbed = math_ops.perturb_and_apply( log_prob_fn, bottleneck, expected_grads=self.expected_grads) else: bottleneck_perturbed = self.quantize(bottleneck) log_probs = log_prob_fn(bottleneck_perturbed) axes = tuple(range(-self.coding_rank, 0)) bits = tf.reduce_sum(log_probs, axis=axes) / ( -tf.math.log(tf.constant(2, dtype=log_probs.dtype))) return bottleneck_perturbed, bits
def test_perturb_and_apply_gradient_parabola(self): f = lambda x, a: a * x * x x = tf.linspace(-2.0, 2.0, 200) a = 7.0 with tf.GradientTape(persistent=True) as g: g.watch(x) y = math_ops.perturb_and_apply(f, x, a, expected_grads=True)[0] dx = g.gradient(y, x) self.assertAllClose(dx, f(x + .5, a) - f(x - .5, a))
def __call__(self, bottleneck, indexes, training=True): """Perturbs a tensor with additive uniform noise and estimates bitcost. Args: bottleneck: `tf.Tensor` containing a non-perturbed bottleneck. Must have at least `self.coding_rank` dimensions. indexes: `tf.Tensor` specifying the scalar distribution for each element in `bottleneck`. See class docstring for examples. training: Boolean. If `False`, computes the bitcost using discretized uniform noise. If `True`, estimates the differential entropy with uniform noise. Returns: A tuple (bottleneck_perturbed, bits) where `bottleneck_perturbed` is `bottleneck` perturbed with nosie and `bits` is the bitcost of transmitting such a sample having the same shape as `bottleneck` without the `self.coding_rank` innermost dimensions. """ bottleneck = tf.convert_to_tensor(bottleneck, dtype=self.bottleneck_dtype) indexes = self._normalize_indexes(indexes) if training: # Here we compute `h(bottleneck + noise)`. def log_prob_fn(bottleneck_perturbed, indexes): # When using expected_grads=True, we will use a tf.custom_gradient on # this function. In this case, all non-Variable tensors that determine # the result of this function need to be declared explicitly, i.e we # need `indexes` to be a declared argument and `prior` instantiated # here. If we would instantiate it outside this function declaration and # reference here via a closure, we would get a `None` gradient for # `indexes`. prior = self._make_prior(indexes) return self._log_prob(prior, bottleneck_perturbed) log_probs, bottleneck_perturbed = math_ops.perturb_and_apply( log_prob_fn, bottleneck, indexes, expected_grads=self._expected_grads) else: prior = self._make_prior(indexes) # Here we compute `H(round(bottleneck - noise) | noise )`. offset = _offset_indexes_to_offset( _add_offset_indexes(indexes, self._num_noise_levels)[..., 0], self._num_noise_levels, self.bottleneck_dtype) symbols = tf.round(bottleneck - offset) bottleneck_perturbed = symbols + offset log_probs = self._log_prob(prior, bottleneck_perturbed) axes = tuple(range(-self.coding_rank, 0)) bits = tf.reduce_sum(log_probs, axis=axes) / ( -tf.math.log(tf.constant(2., dtype=log_probs.dtype))) return bottleneck_perturbed, bits
def __call__(self, bottleneck, indexes, training=True): """Perturbs a tensor with (quantization) noise and estimates bitcost. Args: bottleneck: `tf.Tensor` containing the data to be compressed. indexes: `tf.Tensor` specifying the scalar distribution for each element in `bottleneck`. See class docstring for examples. training: Boolean. If `False`, computes the Shannon information of `bottleneck` under the distribution computed by `self.prior_fn`, which is a non-differentiable, tight *lower* bound on the number of bits needed to compress `bottleneck` using `compress()`. If `True`, returns a somewhat looser, but differentiable *upper* bound on this quantity. Returns: A tuple (bottleneck_perturbed, bits), where `bottleneck_perturbed` is `bottleneck` perturbed with (quantization) noise and `bits` is the bitcost with the same shape as `bottleneck` without the `self.coding_rank` innermost dimensions. """ indexes = self._normalize_indexes(indexes) prior = self._make_prior(indexes) if training: bottleneck_perturbed = bottleneck + tf.random.uniform( tf.shape(bottleneck), minval=-.5, maxval=.5, dtype=bottleneck.dtype) def log_prob_fn(bottleneck_perturbed, indexes): # When using expected_grads=True, we will use a tf.custom_gradient on # this function. In this case, all non-Variable tensors that determine # the result of this function need to be declared explicitly, i.e we # need `indexes` to be a declared argument and `prior` instantiated # here. If we would instantiate it outside this function declaration and # reference here via a closure, we would get a `None` gradient for # `indexes`. prior = self._make_prior(indexes) return self._log_prob_from_prior(prior, bottleneck_perturbed) log_probs, bottleneck_perturbed = math_ops.perturb_and_apply( log_prob_fn, bottleneck, indexes, expected_grads=self._expected_grads) else: offset = helpers.quantization_offset(prior) bottleneck_perturbed = self._quantize(bottleneck, offset) log_probs = self._log_prob_from_prior(prior, bottleneck_perturbed) axes = tuple(range(-self.coding_rank, 0)) bits = tf.reduce_sum(log_probs, axis=axes) / ( -tf.math.log(tf.constant(2, dtype=log_probs.dtype))) return bottleneck_perturbed, bits
def test_perturb_and_apply_gradient_soft_round(self): f = soft_round_ops.soft_round x = tf.linspace(-2.0, 2.0, 200) temperature = 7.0 with tf.GradientTape(persistent=True) as g: g.watch(x) y = math_ops.perturb_and_apply(f, x, temperature, expected_grads=True)[0] dx = g.gradient(y, x) self.assertAllClose(dx, tf.ones_like(dx))
def test_perturb_and_apply_noise(self): x = tf.random.normal([10000], seed=0) y, x_plus_u0 = math_ops.perturb_and_apply(tf.identity, x, expected_grads=True) u0 = x_plus_u0 - x u1 = y - x # Check if residuals are as expected self.assertAllClose(u0, u1) # Check if noise has expected uniform distribution _, p = scipy.stats.kstest(u0, "uniform", (-0.5, 1.0)) self.assertAllLessEqual(tf.abs(u0), 0.5) self.assertGreater(p, 1e-6)
def __call__(self, bottleneck, training=True): """Perturbs a tensor with additive uniform noise and estimates bitcost. Args: bottleneck: `tf.Tensor` containing a non-perturbed bottleneck. Must have at least `self.coding_rank` dimensions. training: Boolean. If `False`, computes the bitcost using discretized uniform noise. If `True`, estimates the differential entropy with uniform noise. Returns: A tuple (bottleneck_perturbed, bits) where `bottleneck_perturbed` is `bottleneck` perturbed with nosie and `bits` is the bitcost of transmitting such a sample having the same shape as `bottleneck` without the `self.coding_rank` innermost dimensions. """ log_prob_fn = functools.partial(self._log_prob_from_prior, self.prior) if training: log_probs, bottleneck_perturbed = math_ops.perturb_and_apply( log_prob_fn, bottleneck, expected_grads=self._expected_grads) else: # Here we compute `H(round(bottleneck - noise) | noise )`. input_shape = tf.shape(bottleneck) input_rank = tf.shape(input_shape)[0] _, coding_shape = tf.split( input_shape, [input_rank - self.coding_rank, self.coding_rank]) broadcast_shape = coding_shape[:self.coding_rank - len(self.prior_shape)] _, offset = self._compute_indexes_and_offset(broadcast_shape) symbols = tf.round(bottleneck - offset) bottleneck_perturbed = symbols + offset log_probs = log_prob_fn(bottleneck_perturbed) axes = tuple(range(-self.coding_rank, 0)) bits = tf.reduce_sum(log_probs, axis=axes) / ( -tf.math.log(tf.constant(2., dtype=log_probs.dtype))) return bottleneck_perturbed, bits