Ejemplo n.º 1
0
    def bits(self, bottleneck, indexes, training=True):
        """Estimates the number of bits needed to compress a tensor.

    Arguments:
      bottleneck: `tf.Tensor` containing the data to be compressed.
      indexes: `tf.Tensor` specifying the scalar distribution for each element
        in `bottleneck`. See class docstring for examples.
      training: Boolean. If `False`, computes the Shannon information of
        `bottleneck` under the distribution computed by `self.prior_fn`,
        which is a non-differentiable, tight *lower* bound on the number of bits
        needed to compress `bottleneck` using `compress()`. If `True`, returns a
        somewhat looser, but differentiable *upper* bound on this quantity.

    Returns:
      A `tf.Tensor` having the same shape as `bottleneck` without the
      `self.coding_rank` innermost dimensions, containing the number of bits.
    """
        indexes = self._normalize_indexes(indexes)
        prior = self._make_prior(indexes)
        if training:
            quantized = bottleneck + tf.random.uniform(tf.shape(bottleneck),
                                                       minval=-.5,
                                                       maxval=.5,
                                                       dtype=bottleneck.dtype)
        else:
            offset = helpers.quantization_offset(prior)
            quantized = self._quantize(bottleneck, offset)
        probs = prior.prob(quantized)
        probs = math_ops.lower_bound(probs, self.likelihood_bound)
        axes = tuple(range(-self.coding_rank, 0))
        bits = tf.reduce_sum(tf.math.log(probs), axis=axes) / (
            -tf.math.log(tf.constant(2., dtype=probs.dtype)))
        return bits
Ejemplo n.º 2
0
 def test_tails_and_offset_are_in_order(self):
   dist = self.dist_cls(loc=10.3, scale=1.5)
   offset = helpers.quantization_offset(dist)
   lower_tail = helpers.lower_tail(dist, 2**-8)
   upper_tail = helpers.upper_tail(dist, 2**-8)
   self.assertGreater(upper_tail, lower_tail)
   self.assertAllClose(offset, 0.3)
Ejemplo n.º 3
0
 def test_tails_and_offset_are_in_order(self):
   dist = self.dist_cls(loc=[5.4, 8.6], scale=[1.4, 2], weight=[.6, .4])
   offset = helpers.quantization_offset(dist)
   lower_tail = helpers.lower_tail(dist, 2**-8)
   upper_tail = helpers.upper_tail(dist, 2**-8)
   self.assertGreater(upper_tail, lower_tail)
   self.assertAllClose(offset, 0.4)  # Decimal part of the peakiest mode (5.4).
Ejemplo n.º 4
0
 def test_tails_and_offset_are_in_order(self):
     df = deep_factorized.NoisyDeepFactorized()
     offset = helpers.quantization_offset(df)
     lower_tail = helpers.lower_tail(df, 2**-8)
     upper_tail = helpers.upper_tail(df, 2**-8)
     self.assertGreater(upper_tail, offset)
     self.assertGreater(offset, lower_tail)
 def test_tails_and_offset_are_in_order(self):
     dist = self.dist_cls(loc=10, scale=[1.5, 2], weight=[.5, .5])
     offset = helpers.quantization_offset(dist)
     lower_tail = helpers.lower_tail(dist, 2**-8)
     upper_tail = helpers.upper_tail(dist, 2**-8)
     self.assertGreater(upper_tail, offset)
     self.assertGreater(offset, lower_tail)
Ejemplo n.º 6
0
 def _quantization_offset(self):
     # Picks the "peakiest" of the component quantization offsets.
     offsets = helpers.quantization_offset(self.components_distribution)
     rank = self.batch_shape.rank
     transposed_offsets = tf.transpose(offsets, [rank] + list(range(rank)))
     component = tf.argmax(self.log_prob(transposed_offsets), axis=0)
     return tf.gather(offsets, component, axis=-1, batch_dims=rank)
Ejemplo n.º 7
0
    def compress(self, bottleneck, indexes):
        """Compresses a floating-point tensor.

    Compresses the tensor to bit strings. `bottleneck` is first quantized
    as in `quantize()`, and then compressed using the probability tables derived
    from `indexes`. The quantized tensor can later be recovered by calling
    `decompress()`.

    The innermost `self.coding_rank` dimensions are treated as one coding unit,
    i.e. are compressed into one string each. Any additional dimensions to the
    left are treated as batch dimensions.

    Arguments:
      bottleneck: `tf.Tensor` containing the data to be compressed.
      indexes: `tf.Tensor` specifying the scalar distribution for each element
        in `bottleneck`. See class docstring for examples.

    Returns:
      A `tf.Tensor` having the same shape as `bottleneck` without the
      `self.coding_rank` innermost dimensions, containing a string for each
      coding unit.
    """
        indexes = self._normalize_indexes(indexes)
        flat_indexes = self._flatten_indexes(indexes)

        symbols_shape = tf.shape(flat_indexes)
        batch_shape = symbols_shape[:-self.coding_rank]
        flat_shape = tf.concat([[-1], symbols_shape[-self.coding_rank:]], 0)

        flat_indexes = tf.reshape(flat_indexes, flat_shape)

        offset = helpers.quantization_offset(self._make_prior(indexes))
        symbols = tf.cast(tf.round(bottleneck - offset), tf.int32)
        symbols = tf.reshape(symbols, flat_shape)

        # Prevent tensors from bouncing back and forth between host and GPU.
        with tf.device("/cpu:0"):
            cdf = self.cdf
            cdf_length = self.cdf_length
            cdf_offset = self.cdf_offset

            def loop_body(args):
                return range_coding_ops.unbounded_index_range_encode(
                    args[0],
                    args[1],
                    cdf,
                    cdf_length,
                    cdf_offset,
                    precision=self.range_coder_precision,
                    overflow_width=4,
                    debug_level=1)

            # TODO(jonycgn,ssjhv): Consider switching to Python control flow.
            strings = tf.map_fn(loop_body, (symbols, flat_indexes),
                                dtype=tf.string,
                                name="compress")

        strings = tf.reshape(strings, batch_shape)
        return strings
Ejemplo n.º 8
0
    def __call__(self, bottleneck, indexes, training=True):
        """Perturbs a tensor with (quantization) noise and estimates bitcost.


    Args:
      bottleneck: `tf.Tensor` containing the data to be compressed.
      indexes: `tf.Tensor` specifying the scalar distribution for each element
        in `bottleneck`. See class docstring for examples.
      training: Boolean. If `False`, computes the Shannon information of
        `bottleneck` under the distribution computed by `self.prior_fn`,
        which is a non-differentiable, tight *lower* bound on the number of bits
        needed to compress `bottleneck` using `compress()`. If `True`, returns a
        somewhat looser, but differentiable *upper* bound on this quantity.

    Returns:
      A tuple (bottleneck_perturbed, bits),
      where `bottleneck_perturbed` is `bottleneck` perturbed with (quantization)
      noise and `bits` is the bitcost with the same shape as `bottleneck`
      without the `self.coding_rank` innermost dimensions.
    """
        indexes = self._normalize_indexes(indexes)
        prior = self._make_prior(indexes)
        if training:
            bottleneck_perturbed = bottleneck + tf.random.uniform(
                tf.shape(bottleneck),
                minval=-.5,
                maxval=.5,
                dtype=bottleneck.dtype)

            def log_prob_fn(bottleneck_perturbed, indexes):
                # When using expected_grads=True, we will use a tf.custom_gradient on
                # this function. In this case, all non-Variable tensors that determine
                # the result of this function need to be declared explicitly, i.e we
                # need `indexes` to be a declared argument and `prior` instantiated
                # here. If we would instantiate it outside this function declaration and
                # reference here via a closure, we would get a `None` gradient for
                # `indexes`.
                prior = self._make_prior(indexes)
                return self._log_prob_from_prior(prior, bottleneck_perturbed)

            log_probs, bottleneck_perturbed = math_ops.perturb_and_apply(
                log_prob_fn,
                bottleneck,
                indexes,
                expected_grads=self._expected_grads)
        else:
            offset = helpers.quantization_offset(prior)
            bottleneck_perturbed = self._quantize(bottleneck, offset)
            log_probs = self._log_prob_from_prior(prior, bottleneck_perturbed)
        axes = tuple(range(-self.coding_rank, 0))
        bits = tf.reduce_sum(log_probs, axis=axes) / (
            -tf.math.log(tf.constant(2, dtype=log_probs.dtype)))
        return bottleneck_perturbed, bits
Ejemplo n.º 9
0
  def __init__(self, prior, coding_rank, compression=False,
               likelihood_bound=1e-9, tail_mass=2**-8,
               range_coder_precision=12):
    """Initializer.

    Arguments:
      prior: A `tfp.distributions.Distribution` object. A density model fitting
        the marginal distribution of the bottleneck data with additive uniform
        noise, which is shared a priori between the sender and the receiver. For
        best results, the distribution should be flexible enough to have a
        unit-width uniform distribution as a special case, since this is the
        marginal distribution for bottleneck dimensions that are constant. The
        distribution parameters may not depend on data (they must be either
        variables or constants).
      coding_rank: Integer. Number of innermost dimensions considered a coding
        unit. Each coding unit is compressed to its own bit string, and the
        `bits()` method sums over each coding unit.
      compression: Boolean. If set to `True`, the range coding tables used by
        `compress()` and `decompress()` will be built on instantiation. This
        assumes eager mode (throws an error if in graph mode or inside a
        `tf.function` call). If set to `False`, these two methods will not be
        accessible.
      likelihood_bound: Float. Lower bound for likelihood values, to prevent
        training instabilities.
      tail_mass: Float. Approximate probability mass which is range encoded with
        less precision, by using a Golomb-like code.
      range_coder_precision: Integer. Precision passed to the range coding op.

    Raises:
      RuntimeError: when attempting to instantiate an entropy model with
        `compression=True` and not in eager execution mode.
    """
    if coding_rank < prior.batch_shape.rank:
      raise ValueError(
          "`coding_rank` can't be smaller than batch rank of prior.")
    super().__init__(
        prior, coding_rank, compression=compression,
        likelihood_bound=likelihood_bound, tail_mass=tail_mass,
        range_coder_precision=range_coder_precision)

    quantization_offset = helpers.quantization_offset(prior)
    if self.compression:
      # Optimization: if the quantization offset is zero, we don't need to
      # subtract/add it when quantizing, and we don't need to serialize its
      # value. Note that this code will only work in eager mode.
      if tf.reduce_all(tf.equal(quantization_offset, 0.)):
        quantization_offset = None
      else:
        quantization_offset = tf.broadcast_to(
            quantization_offset, self.prior_shape)
        quantization_offset = tf.Variable(
            quantization_offset, trainable=False, name="quantization_offset")
    self._quantization_offset = quantization_offset
Ejemplo n.º 10
0
    def decompress(self, strings, indexes):
        """Decompresses a tensor.

    Reconstructs the quantized tensor from bit strings produced by `compress()`.

    Arguments:
      strings: `tf.Tensor` containing the compressed bit strings.
      indexes: `tf.Tensor` specifying the scalar distribution for each output
        element. See class docstring for examples.

    Returns:
      A `tf.Tensor` of the same shape as `indexes` (without the optional channel
      dimension).
    """
        indexes = self._normalize_indexes(indexes)
        flat_indexes = self._flatten_indexes(indexes)

        symbols_shape = tf.shape(flat_indexes)
        flat_shape = tf.concat([[-1], symbols_shape[-self.coding_rank:]], 0)

        flat_indexes = tf.reshape(flat_indexes, flat_shape)

        strings = tf.reshape(strings, [-1])

        # Prevent tensors from bouncing back and forth between host and GPU.
        with tf.device("/cpu:0"):
            cdf = self.cdf
            cdf_length = self.cdf_length
            cdf_offset = self.cdf_offset

            def loop_body(args):
                return range_coding_ops.unbounded_index_range_decode(
                    args[0],
                    args[1],
                    cdf,
                    cdf_length,
                    cdf_offset,
                    precision=self.range_coder_precision,
                    overflow_width=4,
                    debug_level=1)

            # TODO(jonycgn,ssjhv): Consider switching to Python control flow.
            symbols = tf.map_fn(loop_body, (strings, flat_indexes),
                                dtype=tf.int32,
                                name="decompress")

        symbols = tf.reshape(symbols, symbols_shape)
        offset = helpers.quantization_offset(self._make_prior(indexes))
        return tf.cast(symbols, self.dtype) + offset
Ejemplo n.º 11
0
 def quantization_offset(self):
   if self._quantization_offset is not None:
     return tf.convert_to_tensor(self._quantization_offset)
   if self.offset_heuristic and not self.compression:
     if self._prior is None:
       raise RuntimeError(
           "To use the offset heuristic, a `prior` needs to be provided.")
     if not tf.executing_eagerly():
       logging.warning(
           "Computing quantization offsets using offset heuristic within a "
           "tf.function. Ideally, the offset heuristic should only be used "
           "to determine offsets once after training. Depending on the prior, "
           "estimating the offset might be computationally expensive.")
     return tf.cast(
         helpers.quantization_offset(self.prior), self.bottleneck_dtype)
   return None
Ejemplo n.º 12
0
 def _cache_quantization_offset(self):
     """Comptue quantization offset from prior and cache it."""
     quantization_offset = helpers.quantization_offset(self.prior)
     # Optimization: if the quantization offset is zero, we don't need to
     # subtract/add it when quantizing, and we don't need to serialize its value.
     # Note that this code will only work in eager mode.
     # TODO(jonycgn): Reconsider if this optimization is worth keeping once the
     # implementation is stable.
     if tf.executing_eagerly() and tf.reduce_all(
             tf.equal(quantization_offset, 0.)):
         quantization_offset = None
     else:
         quantization_offset = tf.broadcast_to(quantization_offset,
                                               self.prior_shape_tensor)
         if self.compression and not self.no_variables:
             quantization_offset = tf.Variable(quantization_offset,
                                               trainable=False,
                                               name="quantization_offset")
     self._quantization_offset = quantization_offset
Ejemplo n.º 13
0
    def quantize(self, bottleneck, indexes):
        """Quantizes a floating-point tensor.

    To use this entropy model as an information bottleneck during training, pass
    a tensor through this function. The tensor is rounded to integer values
    modulo a quantization offset, which depends on `indexes`. For instance, for
    Gaussian distributions, the returned values are rounded to the location of
    the mode of the distributions plus or minus an integer.

    The gradient of this rounding operation is overridden with the identity
    (straight-through gradient estimator).

    Arguments:
      bottleneck: `tf.Tensor` containing the data to be quantized.
      indexes: `tf.Tensor` specifying the scalar distribution for each element
        in `bottleneck`. See class docstring for examples.

    Returns:
      A `tf.Tensor` containing the quantized values.
    """
        indexes = self._normalize_indexes(indexes)
        offset = helpers.quantization_offset(self._make_prior(indexes))
        return self._quantize(bottleneck, offset)
Ejemplo n.º 14
0
 def _offset_from_indexes(self, indexes):
     """Compute the quantization offset from the respective prior."""
     prior = self._make_prior(indexes)
     return helpers.quantization_offset(prior)
Ejemplo n.º 15
0
 def _quantization_offset(self):
     # Same logic as for _quantile.
     if not self.invertible:
         raise NotImplementedError()
     return self.transform(helpers.quantization_offset(self.base))
Ejemplo n.º 16
0
  def _build_tables(self, prior):
    """Computes integer-valued probability tables used by the range coder.

    These tables must not be re-generated independently on the sending and
    receiving side, since small numerical discrepancies between both sides can
    occur in this process. If the tables differ slightly, this in turn would
    very likely cause catastrophic error propagation during range decoding. For
    a more in-depth discussion of this, see:

    > "Integer Networks for Data Compression with Latent-Variable Models"<br />
    > J. Ballé, N. Johnston, D. Minnen<br />
    > https://openreview.net/forum?id=S1zz2i0cY7

    The tables are stored in `tf.Variable`s as attributes of this object. The
    recommended way is to train the model, instantiate an entropy model with
    `compression=True`, and then distribute the model to a sender and a
    receiver.

    Arguments:
      prior: The `tfp.distributions.Distribution` object (see initializer).
    """
    offset = helpers.quantization_offset(prior)
    lower_tail = helpers.lower_tail(prior, self.tail_mass)
    upper_tail = helpers.upper_tail(prior, self.tail_mass)

    # Largest distance observed between lower tail and median, and between
    # median and upper tail.
    minima = offset - lower_tail
    minima = tf.cast(tf.math.ceil(minima), tf.int32)
    minima = tf.math.maximum(minima, 0)
    maxima = upper_tail - offset
    maxima = tf.cast(tf.math.ceil(maxima), tf.int32)
    maxima = tf.math.maximum(maxima, 0)

    # PMF starting positions and lengths.
    pmf_start = offset - tf.cast(minima, self.dtype)
    pmf_length = maxima + minima + 1

    # Sample the densities in the computed ranges, possibly computing more
    # samples than necessary at the upper end.
    max_length = tf.math.reduce_max(pmf_length)
    if max_length > 2048:
      logging.warning(
          "Very wide PMF with %d elements may lead to out of memory issues. "
          "Consider priors with smaller dispersion or increasing `tail_mass` "
          "parameter.", int(max_length))
    samples = tf.range(tf.cast(max_length, self.dtype), dtype=self.dtype)
    samples = tf.reshape(samples, [-1] + len(self.prior_shape) * [1])
    samples += pmf_start
    pmf = prior.prob(samples)

    # Collapse batch dimensions of distribution.
    pmf = tf.reshape(pmf, [max_length, -1])
    pmf = tf.transpose(pmf)

    pmf_length = tf.broadcast_to(pmf_length, self.prior_shape)
    pmf_length = tf.reshape(pmf_length, [-1])
    cdf_length = pmf_length + 2
    cdf_offset = tf.broadcast_to(-minima, self.prior_shape)
    cdf_offset = tf.reshape(cdf_offset, [-1])

    # Prevent tensors from bouncing back and forth between host and GPU.
    with tf.device("/cpu:0"):
      def loop_body(args):
        prob, length = args
        prob = prob[:length]
        prob = tf.concat([prob, 1 - tf.reduce_sum(prob, keepdims=True)], axis=0)
        cdf = range_coding_ops.pmf_to_quantized_cdf(
            prob, precision=self.range_coder_precision)
        return tf.pad(
            cdf, [[0, max_length - length]], mode="CONSTANT", constant_values=0)

      # TODO(jonycgn,ssjhv): Consider switching to Python control flow.
      cdf = tf.map_fn(
          loop_body, (pmf, pmf_length), dtype=tf.int32, name="pmf_to_cdf")

    self._cdf = tf.Variable(cdf, trainable=False, name="cdf")
    self._cdf_offset = tf.Variable(
        cdf_offset, trainable=False, name="cdf_offset")
    self._cdf_length = tf.Variable(
        cdf_length, trainable=False, name="cdf_length")
Ejemplo n.º 17
0
 def _offset_from_prior(self, prior):
   """Computes quantization offset from the prior distribution."""
   return helpers.quantization_offset(prior)
Ejemplo n.º 18
0
 def test_quantization_offset_is_zero(self):
     df = deep_factorized.NoisyDeepFactorized()
     self.assertEqual(helpers.quantization_offset(df), 0)
Ejemplo n.º 19
0
 def test_cauchy_quantizes_to_mode(self):
     dist = tfp.distributions.Cauchy(loc=1.5, scale=3.)
     self.assertEqual(helpers.quantization_offset(dist), 1.5)
Ejemplo n.º 20
0
  def __init__(self,
               prior=None,
               coding_rank=None,
               compression=False,
               stateless=False,
               expected_grads=False,
               tail_mass=2**-8,
               range_coder_precision=12,
               bottleneck_dtype=None,
               prior_shape=None,
               cdf=None,
               cdf_offset=None,
               cdf_shapes=None,
               offset_heuristic=True,
               quantization_offset=None,
               laplace_tail_mass=0):
    """Initializes the instance.

    Args:
      prior: A `tfp.distributions.Distribution` object. A density model fitting
        the marginal distribution of the bottleneck data with additive uniform
        noise, which is shared a priori between the sender and the receiver. For
        best results, the distribution should be flexible enough to have a
        unit-width uniform distribution as a special case, since this is the
        marginal distribution for bottleneck dimensions that are constant. The
        distribution parameters may not depend on data (they must be either
        variables or constants).
      coding_rank: Integer. Number of innermost dimensions considered a coding
        unit. Each coding unit is compressed to its own bit string, and the
        bits in the __call__ method are summed over each coding unit.
      compression: Boolean. If set to `True`, the range coding tables used by
        `compress()` and `decompress()` will be built on instantiation. If set
        to `False`, these two methods will not be accessible.
      stateless: Boolean. If `False`, range coding tables are created as
        `Variable`s. This allows the entropy model to be serialized using the
        `SavedModel` protocol, so that both the encoder and the decoder use
        identical tables when loading the stored model. If `True`, creates range
        coding tables as `Tensor`s. This makes the entropy model stateless and
        allows it to be constructed within a `tf.function` body, for when the
        range coding tables are provided manually. If `compression=False`, then
        `stateless=True` is implied and the provided value is ignored.
      expected_grads: If True, will use analytical expected gradients during
        backpropagation w.r.t. additive uniform noise.
      tail_mass: Float. Approximate probability mass which is encoded using an
        Elias gamma code embedded into the range coder.
      range_coder_precision: Integer. Precision passed to the range coding op.
      bottleneck_dtype: `tf.dtypes.DType`. Data type of bottleneck tensor.
        Defaults to `tf.keras.mixed_precision.global_policy().compute_dtype`.
      prior_shape: Batch shape of the prior (dimensions which are not assumed
        i.i.d.). Must be provided if `prior` is omitted.
      cdf: `tf.Tensor` or `None`. If provided, is used for range coding rather
        than tables built from the prior.
      cdf_offset: `tf.Tensor` or `None`. Must be provided along with `cdf`.
      cdf_shapes: Shapes of `cdf` and `cdf_offset`. If provided, empty range
        coding tables are created, which can then be restored using
        `set_weights`. Requires `compression=True` and `stateless=False`.
      offset_heuristic: Boolean. Whether to quantize to non-integer offsets
        heuristically determined from mode/median of prior. Set this to `False`
        if you are using soft quantization during training.
      quantization_offset: `tf.Tensor` or `None`. The quantization offsets to
        use. If provided (not `None`), then `offset_heuristic` is ineffective.
      laplace_tail_mass: Float. If positive, will augment the prior with a
        Laplace mixture for training stability. (experimental)
    """
    if (prior is None) == (prior_shape is None):
      raise ValueError("Either `prior` or `prior_shape` must be provided.")
    if (prior is None) + (cdf_shapes is None) + (cdf is None) != 2:
      raise ValueError(
          "Must provide exactly one of `prior`, `cdf`, or `cdf_shapes`.")
    if not compression and not (
        cdf is None and cdf_offset is None and cdf_shapes is None):
      raise ValueError("CDFs can't be provided with `compression=False`")
    if prior is not None and prior.event_shape.rank:
      raise ValueError("`prior` must be a (batch of) scalar distribution(s).")

    super().__init__(
        coding_rank=coding_rank,
        compression=compression,
        stateless=stateless,
        expected_grads=expected_grads,
        tail_mass=tail_mass,
        bottleneck_dtype=bottleneck_dtype,
        laplace_tail_mass=laplace_tail_mass,
    )
    self._prior = prior
    self._offset_heuristic = bool(offset_heuristic)
    self._prior_shape = tf.TensorShape(
        prior_shape if prior is None else prior.batch_shape)
    if self.coding_rank < self.prior_shape.rank:
      raise ValueError("`coding_rank` can't be smaller than `prior_shape`.")

    with self.name_scope:
      if cdf_shapes is not None:
        # `cdf_shapes` being set indicates that we are using the `SavedModel`
        # protocol, which can only provide JSON datatypes. So create a
        # placeholder value depending on whether `quantization_offset` was
        # `None` or not. For this purpose, we expect a Boolean (when in all
        # other cases, we expect either `None` or a tensor).
        assert isinstance(quantization_offset, bool)
        assert self.compression
        if quantization_offset:
          quantization_offset = tf.zeros(self.prior_shape_tensor)
        else:
          quantization_offset = None
      elif quantization_offset is not None:
        # If quantization offset is passed in manually, use it.
        pass
      elif self.offset_heuristic and self.compression:
        # For compression, we need to fix the offset value, so compute it here.
        if self._prior is None:
          raise ValueError(
              "To use the offset heuristic, a `prior` needs to be provided.")
        quantization_offset = helpers.quantization_offset(self.prior)
        # Optimization: if the quantization offset is zero, we don't need to
        # subtract/add it when quantizing, and we don't need to serialize its
        # value. Note that this code will only work in eager mode.
        if (tf.executing_eagerly() and
            tf.reduce_all(tf.equal(quantization_offset, 0.))):
          quantization_offset = None
        else:
          quantization_offset = tf.broadcast_to(
              quantization_offset, self.prior_shape_tensor)
      else:
        quantization_offset = None
      if quantization_offset is None:
        self._quantization_offset = None
      elif self.compression and not self.stateless:
        quantization_offset = tf.cast(
            quantization_offset, self.bottleneck_dtype)
        self._quantization_offset = tf.Variable(
            quantization_offset, trainable=False, name="quantization_offset")
      else:
        quantization_offset = tf.cast(
            quantization_offset, self.bottleneck_dtype)
        self._quantization_offset = tf.convert_to_tensor(
            quantization_offset, name="quantization_offset")
      if self.compression:
        if cdf is None and cdf_shapes is None:
          cdf, cdf_offset = self._build_tables(
              self.prior, range_coder_precision, offset=quantization_offset)
        self._init_compression(cdf, cdf_offset, cdf_shapes)
Ejemplo n.º 21
0
    def __init__(self,
                 prior=None,
                 coding_rank=None,
                 compression=False,
                 stateless=False,
                 expected_grads=False,
                 tail_mass=2**-8,
                 range_coder_precision=12,
                 dtype=None,
                 prior_shape=None,
                 cdf=None,
                 cdf_offset=None,
                 cdf_length=None,
                 cdf_max_length=None,
                 non_integer_offsets=True,
                 quantization_offset=None,
                 laplace_tail_mass=0):
        """Initializes the instance.

    Args:
      prior: A `tfp.distributions.Distribution` object. A density model fitting
        the marginal distribution of the bottleneck data with additive uniform
        noise, which is shared a priori between the sender and the receiver. For
        best results, the distribution should be flexible enough to have a
        unit-width uniform distribution as a special case, since this is the
        marginal distribution for bottleneck dimensions that are constant. The
        distribution parameters may not depend on data (they must be either
        variables or constants).
      coding_rank: Integer. Number of innermost dimensions considered a coding
        unit. Each coding unit is compressed to its own bit string, and the
        bits in the __call__ method are summed over each coding unit.
      compression: Boolean. If set to `True`, the range coding tables used by
        `compress()` and `decompress()` will be built on instantiation. If set
        to `False`, these two methods will not be accessible.
      stateless: Boolean. If `False`, range coding tables are created as
        `Variable`s. This allows the entropy model to be serialized using the
        `SavedModel` protocol, so that both the encoder and the decoder use
        identical tables when loading the stored model. If `True`, creates range
        coding tables as `Tensor`s. This makes the entropy model stateless and
        allows it to be constructed within a `tf.function` body, for when the
        range coding tables are provided manually. If `compression=False`, then
        `stateless=True` is implied and the provided value is ignored.
      expected_grads: If True, will use analytical expected gradients during
        backpropagation w.r.t. additive uniform noise.
      tail_mass: Float. Approximate probability mass which is range encoded with
        less precision, by using a Golomb-like code.
      range_coder_precision: Integer. Precision passed to the range coding op.
      dtype: Data type of prior. Must be provided when `prior` is omitted.
      prior_shape: Batch shape of the prior (dimensions which are not assumed
        i.i.d.). Must be provided when `prior` is omitted.
      cdf: `tf.Tensor` or `None`. When provided, is used for range coding rather
        than tables built from the prior.
      cdf_offset: `tf.Tensor` or `None`. Must be provided along with `cdf`.
      cdf_length: `tf.Tensor` or `None`. Must be provided along with `cdf`.
      cdf_max_length: Maximum `cdf_length`. When provided, an empty range coding
        table is created, which can then be restored using `set_weights`.
        Requires `compression=True` and `stateless=False`.
      non_integer_offsets: Boolean. Whether to quantize to non-integer offsets
        heuristically determined from mode/median of prior. Set to `False` when
        using soft quantization during training.
      quantization_offset: `tf.Tensor` or `None`. If `cdf` is provided and
        `non_integer_offsets=True`, must be provided.
      laplace_tail_mass: Float. If positive, will augment the prior with a
        Laplace mixture for training stability. (experimental)

    Raises:
      RuntimeError: when attempting to instantiate an entropy model with
        `compression=True` and not in eager execution mode.
    """
        super().__init__(
            prior=prior,
            coding_rank=coding_rank,
            compression=compression,
            stateless=stateless,
            expected_grads=expected_grads,
            tail_mass=tail_mass,
            range_coder_precision=range_coder_precision,
            dtype=dtype,
            prior_shape=prior_shape,
            cdf=cdf,
            cdf_offset=cdf_offset,
            cdf_length=cdf_length,
            cdf_max_length=cdf_max_length,
            laplace_tail_mass=laplace_tail_mass,
        )
        self._non_integer_offsets = bool(non_integer_offsets)
        if self.coding_rank < self.prior_shape.rank:
            raise ValueError(
                "`coding_rank` can't be smaller than `prior_shape`.")

        with self.name_scope:
            if not self.non_integer_offsets:
                quantization_offset = None
            elif prior is not None:
                quantization_offset = helpers.quantization_offset(self.prior)
                # Optimization: if the quantization offset is zero, we don't need to
                # subtract/add it when quantizing, and we don't need to serialize its
                # value. Note that this code will only work in eager mode.
                if (tf.executing_eagerly()
                        and tf.reduce_all(tf.equal(quantization_offset, 0.))):
                    quantization_offset = None
                else:
                    quantization_offset = tf.broadcast_to(
                        quantization_offset, self.prior_shape_tensor)
            elif cdf_max_length is not None:
                quantization_offset = tf.zeros(self.prior_shape_tensor,
                                               dtype=self.dtype)
            else:
                assert cdf is not None
                if quantization_offset is None:
                    raise ValueError(
                        "When providing `cdf` and `non_integer_offsets=True`, must also "
                        "provide `quantization_offset`.")
            if quantization_offset is None:
                self._quantization_offset = None
            elif self.compression and not self.stateless:
                self._quantization_offset = tf.Variable(
                    quantization_offset,
                    dtype=self.dtype,
                    trainable=False,
                    name="quantization_offset")
            else:
                self._quantization_offset = tf.convert_to_tensor(
                    quantization_offset,
                    dtype=self.dtype,
                    name="quantization_offset")
Ejemplo n.º 22
0
 def test_logistic_quantizes_to_mode(self):
     dist = tfp.distributions.Logistic(loc=-3., scale=1.)
     self.assertEqual(helpers.quantization_offset(dist), -3.)
Ejemplo n.º 23
0
 def test_laplace_quantizes_to_mode(self):
     dist = tfp.distributions.Laplace(loc=-2., scale=5.)
     self.assertEqual(helpers.quantization_offset(dist), -2.)
Ejemplo n.º 24
0
 def test_lognormal_quantizes_to_mode(self):
     dist = tfp.distributions.LogNormal(loc=4., scale=1.)
     self.assertEqual(helpers.quantization_offset(dist), tf.exp(3.))
Ejemplo n.º 25
0
 def _quantization_offset(self):
     return helpers.quantization_offset(self.base)
Ejemplo n.º 26
0
 def test_lognormal_quantizes_to_mode_decimal_part(self):
     dist = tfp.distributions.LogNormal(loc=4., scale=1.)
     self.assertAllClose(helpers.quantization_offset(dist),
                         tf.exp(3.) - 20.0)
Ejemplo n.º 27
0
 def quantization_offset(self):
     """Distribution-dependent quantization offset."""
     return helpers.quantization_offset(self.prior)
Ejemplo n.º 28
0
 def test_gamma_quantizes_to_mode(self):
     dist = tfp.distributions.Gamma(concentration=5., rate=1.)
     self.assertEqual(helpers.quantization_offset(dist), 4.)
Ejemplo n.º 29
0
 def test_cauchy_quantizes_to_mode_decimal_part(self):
     dist = tfp.distributions.Cauchy(loc=1.4, scale=3.)
     self.assertAllClose(helpers.quantization_offset(dist), 0.4)
Ejemplo n.º 30
0
 def test_normal_quantizes_to_mode(self):
     dist = tfp.distributions.Normal(loc=3., scale=5.)
     self.assertEqual(helpers.quantization_offset(dist), 3.)