Exemple #1
0
 def __init__(self,
              df,
              validate_args=False,
              allow_nan_stats=True,
              name='Chi2WithAbsDf'):
   parameters = dict(locals())
   with tf.name_scope(name) as name:
     super(Chi2WithAbsDf, self).__init__(
         df=tf.floor(tf.abs(df, name='abs_df'), name='floor_abs_df'),
         validate_args=validate_args,
         allow_nan_stats=allow_nan_stats,
         name=name)
   self._parameters = parameters
  def __call__(self, step):
    with tf.name_scope(self.name or "ExponentialDecay") as name:
      initial_learning_rate = tf.convert_to_tensor(
          self.initial_learning_rate, name="initial_learning_rate")
      dtype = initial_learning_rate.dtype
      decay_steps = tf.cast(self.decay_steps, dtype)
      decay_rate = tf.cast(self.decay_rate, dtype)

      global_step_recomp = tf.cast(step, dtype)
      p = global_step_recomp / decay_steps
      if self.staircase:
        p = tf.floor(p)
      return tf.multiply(
          initial_learning_rate, tf.pow(decay_rate, p), name=name)
Exemple #3
0
    def _log_prob(self, x):
        probs = self._probs_parameter_no_checks()
        if not self.validate_args:
            # For consistency with cdf, we take the floor.
            x = tf.floor(x)

        log_probs = tf.math.xlog1py(x, -probs) + tf.math.log(probs)

        if self.force_probs_to_zero_outside_support:
            # Set log_prob = -inf when value is less than 0, ie prob = 0.
            log_probs = tf.where(x < 0.,
                                 dtype_util.as_numpy_dtype(x.dtype)(-np.inf),
                                 log_probs)
        return log_probs
    def __call__(self, step):
        with tf.name_scope(self.name or "InverseTimeDecay") as name:
            initial_learning_rate = tf.convert_to_tensor(
                self.initial_learning_rate, name="initial_learning_rate")
            dtype = initial_learning_rate.dtype
            decay_steps = tf.cast(self.decay_steps, dtype)
            decay_rate = tf.cast(self.decay_rate, dtype)

            global_step_recomp = tf.cast(step, dtype)
            p = global_step_recomp / decay_steps
            if self.staircase:
                p = tf.floor(p)
            const = tf.cast(tf.constant(1), dtype)
            denom = tf.add(const, tf.multiply(decay_rate, p))
            return tf.divide(initial_learning_rate, denom, name=name)
    def _prob(self, y):
        # Changes of mass are only at the integers, so we must use tf.floor in our
        # computation of log_cdf/log_sf.  Floor now, since
        # tf.floor(y - 1) can incur unwanted rounding near powers of two, but
        # tf.floor(y) - 1 can't.
        y = tf.floor(y)

        if not hasattr(self.distribution, '_cdf'):
            raise NotImplementedError(
                '`prob` not implemented unless the base distribution implements '
                '`cdf`')
        try:
            return self._prob_with_sf_and_cdf(y)
        except NotImplementedError:
            return self._prob_with_cdf(y)
Exemple #6
0
 def _get_indices(interp_type):
     """Get values of y at the indices implied by interp_type."""
     # Note `lower` <--> ceiling.  Confusing, huh?  Due to the fact that
     # _sort_tensor sorts highest to lowest, tf.ceil corresponds to the higher
     # index, but the lower value of y!
     if interp_type == 'lower':
         indices = tf.math.ceil((d - 1) * frac_at_q_or_above)
     elif interp_type == 'higher':
         indices = tf.floor((d - 1) * frac_at_q_or_above)
     elif interp_type == 'nearest':
         indices = tf.round((d - 1) * frac_at_q_or_above)
     # d - 1 will be distinct from d in int32, but not necessarily double.
     # So clip to avoid out of bounds errors.
     return tf.clip_by_value(tf.cast(indices, tf.int32), 0,
                             tf.shape(input=y)[-1] - 1)
Exemple #7
0
    def _cdf(self, x):
        # CDF(x) at positive integer x is the probability that the Zipf variable is
        # less than or equal to x; given by the formula:
        #     CDF(x) = 1 - (zeta(power, x + 1) / Z)
        # For fractional x, the CDF is equal to the CDF at n = floor(x).
        # For x < 1, the CDF is zero.

        # If force_probs_to_zero_outside_support is False, we return a continuous
        # relaxation which agrees with the CDF at integer points.
        power = tf.convert_to_tensor(self.power)
        x = tf.cast(x, power.dtype)
        safe_x = tf.maximum(
            tf.floor(x) if self.force_probs_to_zero_outside_support else x, 0.)

        cdf = 1. - (tf.math.zeta(power, safe_x + 1.) / tf.math.zeta(power, 1.))
        return tf.where(x < 1., tf.zeros_like(cdf), cdf)
Exemple #8
0
    def _sample_n(self, n, seed=None):
        # Uniform variates must be sampled from the open-interval `(0, 1)` rather
        # than `[0, 1)`. To do so, we use `np.finfo(self.dtype.as_numpy_dtype).tiny`
        # because it is the smallest, positive, "normal" number. A "normal" number
        # is such that the mantissa has an implicit leading 1. Normal, positive
        # numbers x, y have the reasonable property that, `x + y >= max(x, y)`. In
        # this case, a subnormal number (i.e., np.nextafter) can cause us to sample
        # 0.
        sampled = tf.random.uniform(
            tf.concat([[n], tf.shape(input=self._probs)], 0),
            minval=np.finfo(self.dtype.as_numpy_dtype).tiny,
            maxval=1.,
            seed=seed,
            dtype=self.dtype)

        return tf.floor(tf.math.log(sampled) / tf.math.log1p(-self.probs))
Exemple #9
0
  def _cdf(self, x):
    # CDF(x) at positive integer x is the probability that the Zipf variable is
    # less than or equal to x; given by the formula:
    #     CDF(x) = 1 - (zeta(power, x + 1) / Z)
    # For fractional x, the CDF is equal to the CDF at n = floor(x).
    # For x < 1, the CDF is zero.

    # If interpolate_nondiscrete is True, we return a continuous relaxation
    # which agrees with the CDF at integer points.
    power = tf.convert_to_tensor(self.power)
    x = tf.cast(x, power.dtype)
    safe_x = tf.maximum(x if self.interpolate_nondiscrete else tf.floor(x), 0.)

    cdf = 1. - (
        tf.math.zeta(power, safe_x + 1.) / tf.math.zeta(power, 1.))
    return tf.where(x < 1., tf.zeros_like(cdf), cdf)
Exemple #10
0
        def loop_body(should_continue, k, seed):
            """Resample the non-accepted points."""
            u_seed, next_seed = samplers.split_seed(seed)
            # Uniform variates must be sampled from the open-interval `(0, 1)` rather
            # than `[0, 1)`. To do so, we use
            # `np.finfo(dtype_util.as_numpy_dtype(self.dtype)).tiny`
            # because it is the smallest, positive, 'normal' number. A 'normal' number
            # is such that the mantissa has an implicit leading 1. Normal, positive
            # numbers x, y have the reasonable property that, `x + y >= max(x, y)`. In
            # this case, a subnormal number (i.e., np.nextafter) can cause us to
            # sample 0.
            u = samplers.uniform(
                shape,
                minval=np.finfo(dtype_util.as_numpy_dtype(power.dtype)).tiny,
                maxval=numpy_dtype(1.),
                dtype=power.dtype,
                seed=u_seed)
            # We use (1 - u) * maxval_u + u * minval_u rather than the other way
            # around, since we want to draw samples in (minval_u, maxval_u].
            u = maxval_u + (minval_u - maxval_u) * u
            # set_shape needed here because of b/139013403
            tensorshape_util.set_shape(u, should_continue.shape)

            # Sample the point X from the continuous density h(x) \propto x^(-power).
            x = self._hat_integral_inverse(u, power=power)

            # Rejection-inversion requires a `hat` function, h(x) such that
            # \int_{k - .5}^{k + .5} h(x) dx >= pmf(k + 1) for points k in the
            # support. A natural hat function for us is h(x) = x^(-power).
            #
            # After sampling X from h(x), suppose it lies in the interval
            # (K - .5, K + .5) for integer K. Then the corresponding K is accepted if
            # if lies to the left of x_K, where x_K is defined by:
            #   \int_{x_k}^{K + .5} h(x) dx = H(x_K) - H(K + .5) = pmf(K + 1),
            # where H(x) = \int_x^inf h(x) dx.

            # Solving for x_K, we find that x_K = H_inverse(H(K + .5) + pmf(K + 1)).
            # Or, the acceptance condition is X <= H_inverse(H(K + .5) + pmf(K + 1)).
            # Since X = H_inverse(U), this simplifies to U <= H(K + .5) + pmf(K + 1).

            # Update the non-accepted points.
            # Since X \in (K - .5, K + .5), the sample K is chosen as floor(X + 0.5).
            k = tf.where(should_continue, tf.floor(x + 0.5), k)
            accept = (u <= self._hat_integral(k + .5, power=power) +
                      tf.exp(self._log_prob(k + 1, power=power)))

            return [should_continue & (~accept), k, next_seed]
Exemple #11
0
  def _log_prob(self, x, power=None):
    # The log probability at positive integer points x is log(x^(-power) / Z)
    # where Z is the normalization constant. For x < 1 and non-integer points,
    # the log-probability is -inf.
    #
    # However, if interpolate_nondiscrete is True, we return the natural
    # continuous relaxation for x >= 1 which agrees with the log probability at
    # positive integer points.
    power = power if power is not None else tf.convert_to_tensor(self.power)
    x = tf.cast(x, power.dtype)
    log_normalization = tf.math.log(tf.math.zeta(power, 1.))

    safe_x = tf.maximum(x if self.interpolate_nondiscrete else tf.floor(x), 1.)
    y = -power * tf.math.log(safe_x)
    log_unnormalized_prob = tf.where(
        tf.equal(x, safe_x), y, dtype_util.as_numpy_dtype(y.dtype)(-np.inf))

    return log_unnormalized_prob - log_normalization
Exemple #12
0
    def _sample_n(self, n, seed=None):
        # Uniform variates must be sampled from the open-interval `(0, 1)` rather
        # than `[0, 1)`. To do so, we use
        # `np.finfo(dtype_util.as_numpy_dtype(self.dtype)).tiny`
        # because it is the smallest, positive, 'normal' number. A 'normal' number
        # is such that the mantissa has an implicit leading 1. Normal, positive
        # numbers x, y have the reasonable property that, `x + y >= max(x, y)`. In
        # this case, a subnormal number (i.e., np.nextafter) can cause us to sample
        # 0.
        probs = self._probs_parameter_no_checks()
        sampled = samplers.uniform(
            ps.concat([[n], ps.shape(probs)], 0),
            minval=np.finfo(dtype_util.as_numpy_dtype(self.dtype)).tiny,
            maxval=1.,
            seed=seed,
            dtype=self.dtype)

        return tf.floor(tf.math.log(sampled) / tf.math.log1p(-probs))
 def preprocess(x):
     img = tf.cast(x['image'], tf.float32)
     aug = None
     if augment:  # NOTE: this makes training nondeterministic
         if self._randflip:
             augment_img = tf.image.flip_left_right(img)
             aug = tf.random.uniform(shape=[]) > 0.5
             img = tf.where(aug, augment_img, img)
         if self._rot90:
             u = tf.random.uniform(shape=[])
             k = tf.cast(tf.floor(4. * u), tf.int32)
             img = tf.image.rot90(img, k=k)
             aug = aug | (k > 0)
     if aug is None:
         aug = tf.convert_to_tensor(False, dtype=tf.bool)
     out = {'image': img, 'augmented': aug}
     if self._class_conditional:
         out['label'] = tf.cast(x['label'], tf.int32)
     return out
Exemple #14
0
    def _log_prob(self, x):
        # The log-probability at negative points is always -inf.
        # Catch such x's and set the output value accordingly.
        lr1, r1, lr2, r2 = self._all_rate_parameters()

        safe_x = tf.floor(x) if self.force_probs_to_zero_outside_support else x
        y = tf.math.multiply_no_nan(0.5 * (lr1 - lr2), safe_x)
        numpy_dtype = dtype_util.as_numpy_dtype(y.dtype)

        # When both rates are zero, the above computation gives a NaN, whereas
        # it should give zero.
        y = tf.where(
            tf.math.equal(r1, 0.) & tf.math.equal(r2, 0.), numpy_dtype(0.), y)
        y = y + tfp_math.log_bessel_ive(safe_x, 2. * tf.math.sqrt(
            r1 * r2)) - tf.math.square(tf.math.sqrt(r1) - tf.math.sqrt(r2))
        y = tf.where(tf.math.equal(x, safe_x), y, numpy_dtype(-np.inf))
        if self.force_probs_to_zero_outside_support:
            # Ensure the gradient wrt `rate` is zero at non-integer points.
            y = tf.where((y < 0.) & tf.math.is_inf(y), numpy_dtype(-np.inf), y)
        return y
Exemple #15
0
def _base_expansion_size(num, bases):
    """Computes the number of terms in the place value expansion.

  Let num = a0 + a1 b + a2 b^2 + ... ak b^k be the place value expansion of
  `num` in base b (ak <> 0). This function computes and returns `k+1` for each
  base `b` specified in `bases`.

  This can be inferred from the base `b` logarithm of `num` as follows:
    $$k = Floor(log_b (num)) + 1  = Floor( log(num) / log(b)) + 1$$

  Args:
    num: Scalar `Tensor` of dtype either `float32` or `float64`. The number to
      compute the base expansion size of.
    bases: `Tensor` of the same dtype as num. The bases to compute the size
      against.

  Returns:
    Tensor of same dtype and shape as `bases` containing the size of num when
    written in that base.
  """
    return tf.floor(tf.math.log(num) / tf.math.log(bases)) + 1
Exemple #16
0
    def loop_body(should_continue, k, seed):
      """Resample the non-accepted points."""
      u_seed, next_seed = samplers.split_seed(seed)
      # The range of U is chosen so that the resulting sample K lies in
      # [0, tf.int64.max). The final sample, if accepted, is K + 1.
      u = samplers.uniform(
          shape,
          minval=minval_u,
          maxval=maxval_u,
          dtype=power.dtype,
          seed=u_seed)
      # set_shape needed here because of b/139013403
      tensorshape_util.set_shape(u, should_continue.shape)

      # Sample the point X from the continuous density h(x) \propto x^(-power).
      x = self._hat_integral_inverse(u, power=power)

      # Rejection-inversion requires a `hat` function, h(x) such that
      # \int_{k - .5}^{k + .5} h(x) dx >= pmf(k + 1) for points k in the
      # support. A natural hat function for us is h(x) = x^(-power).
      #
      # After sampling X from h(x), suppose it lies in the interval
      # (K - .5, K + .5) for integer K. Then the corresponding K is accepted if
      # if lies to the left of x_K, where x_K is defined by:
      #   \int_{x_k}^{K + .5} h(x) dx = H(x_K) - H(K + .5) = pmf(K + 1),
      # where H(x) = \int_x^inf h(x) dx.

      # Solving for x_K, we find that x_K = H_inverse(H(K + .5) + pmf(K + 1)).
      # Or, the acceptance condition is X <= H_inverse(H(K + .5) + pmf(K + 1)).
      # Since X = H_inverse(U), this simplifies to U <= H(K + .5) + pmf(K + 1).

      # Update the non-accepted points.
      # Since X \in (K - .5, K + .5), the sample K is chosen as floor(X + 0.5).
      k = tf.where(should_continue, tf.floor(x + 0.5), k)
      accept = (u <= self._hat_integral(k + .5, power=power) + tf.exp(
          self._log_prob(k + 1, power=power)))

      return [should_continue & (~accept), k, next_seed]
 def _mode(self):
     total_count = tf.convert_to_tensor(self.total_count)
     adjusted_count = tf.where(1. < total_count, total_count - 1.,
                               tf.zeros_like(total_count))
     return tf.floor(adjusted_count *
                     tf.exp(self._logits_parameter_no_checks()))
Exemple #18
0
 def _mode(self):
     return tf.floor((1. + self.total_count) * self.probs)
Exemple #19
0
 def _mode(self):
     return tf.floor(
         (1. + self._total_count) * self._probs_parameter_no_checks())
Exemple #20
0
 'rate':
     tfp_hps.softplus_plus_eps(),
 'scale':
     tfp_hps.softplus_plus_eps(),
 'Wishart.scale':
     tfp_hps.positive_definite,
 'scale_diag':
     tfp_hps.softplus_plus_eps(),
 'scale_identity_multiplier':
     tfp_hps.softplus_plus_eps(),
 'scale_tril':
     tfp_hps.lower_tril_positive_definite,
 'temperature':
     tfp_hps.softplus_plus_eps(),
 'total_count':
     lambda x: tf.floor(tf.sigmoid(x / 100) * 100) + 1,
 'Bernoulli':
     lambda d: dict(d, dtype=tf.float32),
 'LKJ':
     fix_lkj,
 'Triangular':
     fix_triangular,
 'TruncatedNormal':
     lambda d: dict(d, high=ensure_high_gt_low(d['low'], d['high'])),
 'Uniform':
     lambda d: dict(d, high=ensure_high_gt_low(d['low'], d['high'])),
 'Wishart':
     fix_wishart,
 'Zipf':
     lambda d: dict(d, dtype=tf.float32),
 'FiniteDiscrete':
Exemple #21
0
 def _survival_function(self, x):
   safe_x = tf.maximum(
       tf.floor(x) if self.force_probs_to_zero_outside_support else x, 0.)
   survival = tf.math.igamma(1. + safe_x, self._rate_parameter_no_checks())
   return tf.where(x < 0., tf.ones_like(survival), survival)
Exemple #22
0
 def _mode(self):
   adjusted_count = tf.where(1. < self.total_count, self.total_count - 1.,
                             tf.zeros_like(self.total_count))
   return tf.floor(adjusted_count * tf.exp(self.logits))
Exemple #23
0
 def _mode(self):
   return tf.floor(self._rate_parameter_no_checks())
Exemple #24
0
 def _mode(self):
     return tf.floor(self.rate)
def resample_deterministic_minimum_error(
        log_probs,
        event_size,
        sample_shape,
        seed=None,
        name='resample_deterministic_minimum_error'):
    """Deterministic minimum error resampler for sequential Monte Carlo.

    The return value of this function is similar to sampling with

    ```python
    expanded_sample_shape = tf.concat([sample_shape, [event_size]]), axis=-1)
    tfd.Categorical(logits=log_probs).sample(expanded_sample_shape)`
    ```

    but with values chosen deterministically so that the empirical distribution
    is as close as possible to the specified distribution.
    (Note that the empirical distribution can only exactly equal the requested
    distribution if multiplying every probability by `event_size` gives
    an integer. So in general this is a biased "sampler".)
    It is intended to provide a good representative sample, suitable for use
    with some Sequential Monte Carlo algorithms.

  This function is based on Algorithm #3 in [Maskell et al. (2006)][1].

  Args:
    log_probs: a tensor-valued batch of discrete log probability distributions.
    event_size: the dimension of the vector considered a single draw.
    sample_shape: the `sample_shape` determining the number of draws. Because
      this resampler is deterministic it simply replicates the draw you
      would get for `sample_shape=[1]`.
    seed: This argument is unused but is present so that this function shares
      its interface with the other resampling functions.
      Default value: None
    name: Python `str` name for ops created by this method.
      Default value: `None` (i.e., `'resample_deterministic_minimum_error'`).

  Returns:
    resampled_indices: a tensor of samples.

  #### References
  [1]: S. Maskell, B. Alun-Jones and M. Macleod. A Single Instruction Multiple
       Data Particle Filter.
       In 2006 IEEE Nonlinear Statistical Signal Processing Workshop.
       http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf

  """
    del seed

    with tf.name_scope(name or 'resample_deterministic_minimum_error'):
        sample_shape = tf.convert_to_tensor(sample_shape, dtype_hint=tf.int32)
        log_probs = dist_util.move_dimension(log_probs,
                                             source_idx=0,
                                             dest_idx=-1)
        probs = tf.math.exp(log_probs)
        prob_shape = ps.shape(probs)
        pdf_size = prob_shape[-1]
        # If we could draw fractional numbers of samples we would
        # choose `ideal_numbers` for the number of each element.
        ideal_numbers = event_size * probs
        # We approximate the ideal numbers by truncating to integers
        # and then repair the counts starting with the one with the
        # largest fractional error and working our way down.
        first_approximation = tf.floor(ideal_numbers)
        missing_fractions = ideal_numbers - first_approximation
        first_approximation = ps.cast(first_approximation, dtype=tf.int32)
        fraction_order = tf.argsort(missing_fractions, axis=-1)
        # We sort the integer parts and fractional parts together.
        batch_dims = ps.rank_from_shape(prob_shape) - 1
        first_approximation = tf.gather_nd(first_approximation,
                                           fraction_order[..., tf.newaxis],
                                           batch_dims=batch_dims)
        missing_fractions = tf.gather_nd(missing_fractions,
                                         fraction_order[..., tf.newaxis],
                                         batch_dims=batch_dims)
        sample_defect = event_size - tf.reduce_sum(
            first_approximation, axis=-1, keepdims=True)
        unpermuted = tf.broadcast_to(tf.range(pdf_size), prob_shape)
        increments = tf.cast(unpermuted >= pdf_size - sample_defect,
                             dtype=first_approximation.dtype)
        counts = first_approximation + increments
        samples = _samples_from_counts(fraction_order, counts, event_size)
        result_shape = tf.concat([sample_shape, prob_shape[:-1], [event_size]],
                                 axis=0)
        # Replicate sample up to batch size.
        # TODO(dpiponi): rather than replicating, spread the "error" over
        # multiple samples with a minimum-discrepancy sequence.
        resampled = tf.broadcast_to(samples, result_shape)
        return dist_util.move_dimension(resampled, source_idx=-1, dest_idx=0)
def constraint_for(dist=None, param=None):
    """Get bijector constraint for a given distribution's parameter."""

    constraints = {
        'atol':
            tfb.Softplus(),
        'rtol':
            tfb.Softplus(),
        'concentration':
            tfb.Softplus(),
        'GeneralizedPareto.concentration':  # Permits +ve and -ve concentrations.
            lambda x: tf.math.tanh(x) * 0.24,
        'concentration0':
            tfb.Softplus(),
        'concentration1':
            tfb.Softplus(),
        'df':
            tfb.Softplus(),
        'InverseGaussian.loc':
            tfb.Softplus(),
        'JohnsonSU.tailweight':
            tfb.Softplus(),
        'PowerSpherical.mean_direction':
            lambda x: tf.math.l2_normalize(tf.math.sigmoid(x) + 1e-6, -1),
        'ContinuousBernoulli.probs':
            tfb.Sigmoid(),
        'Geometric.logits':  # TODO(b/128410109): re-enable down to -50
            # Capping at 15. so that probability is less than 1, and entropy is
            # defined. b/147394924
            lambda x: tf.minimum(tf.maximum(x, -16.), 15.
                                ),  # works around the bug
        'Geometric.probs':
            constrain_between_eps_and_one_minus_eps(),
        'Binomial.probs':
            tfb.Sigmoid(),
        'NegativeBinomial.probs':
            tfb.Sigmoid(),
        'Bernoulli.probs':
            tfb.Sigmoid(),
        'PlackettLuce.scores':
            tfb.Softplus(),
        'ProbitBernoulli.probs':
            tfb.Sigmoid(),
        'RelaxedBernoulli.probs':
            tfb.Sigmoid(),
        'cutpoints':  # Permit values that aren't too large
            lambda x: tfb.Ordered().inverse(10. * tf.math.tanh(x)),
        'log_rate':
            lambda x: tf.maximum(x, -16.),
        'mixing_concentration':
            tfb.Softplus(),
        'mixing_rate':
            tfb.Softplus(),
        'rate':
            tfb.Softplus(),
        'scale':
            tfb.Softplus(),
        'scale_diag':
            tfb.Softplus(),
        'scale_identity_multiplier':
            tfb.Softplus(),
        'tailweight':
            tfb.Softplus(),
        'temperature':
            tfb.Softplus(),
        'total_count':
            lambda x: tf.floor(tfb.Sigmoid()(x / 100.) * 100.) + 1.,
        'Bernoulli':
            lambda d: dict(d, dtype=tf.float32),
        'CholeskyLKJ':
            fix_lkj,
        'LKJ':
            fix_lkj,
        'Zipf':
            lambda d: dict(d, dtype=tf.float32),
        'GeneralizedNormal.power':
            tfb.Softplus(),
    }

    if param is not None:
        return constraints.get('{}.{}'.format(dist, param),
                               constraints.get(param, tfb.Identity()))
    return constraints.get(dist, tfb.Identity())
def resample_minimum_variance(log_probs,
                              event_size,
                              sample_shape,
                              seed=None,
                              name=None):
    """Minimum variance resampler for sequential Monte Carlo.

  This function is based on Algorithm #2 in [Maskell et al. (2006)][1].

  Args:
    log_probs: A tensor-valued batch of discrete log probability distributions.
    event_size: the dimension of the vector considered a single draw.
    sample_shape: the `sample_shape` determining the number of draws.
    seed: Python '`int` used to seed calls to `tf.random.*`.
      Default value: None (i.e. no seed).
    name: Python `str` name for ops created by this method.
      Default value: `None` (i.e., `'resample_minimum_variance'`).

  Returns:
    resampled_indices: The result is similar to sampling with
    ```python
    expanded_sample_shape = tf.concat([[event_size], sample_shape]), axis=-1)
    tfd.Categorical(logits=log_probs).sample(expanded_sample_shape)`
    ```
    but with values sorted along the first axis. It can be considered to be
    sampling events made up of a length-`event_size` vector of draws from
    the `Categorical` distribution. However, although the elements of
    this event have the appropriate marginal distribution, they are not
    independent of each other. Instead they have been chosen so as to form
    a good representative sample, suitable for use with Sequential Monte
    Carlo algorithms.
    The sortedness is an unintended side effect of the algorithm that is
    harmless in the context of simple SMC algorithms.

  #### References
  [1]: S. Maskell, B. Alun-Jones and M. Macleod. A Single Instruction Multiple
       Data Particle Filter.
       In 2006 IEEE Nonlinear Statistical Signal Processing Workshop.
       http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf

  """
    with tf.name_scope(name or 'resample_minimum_variance') as name:
        log_probs = tf.convert_to_tensor(log_probs, dtype_hint=tf.float32)
        log_probs = dist_util.move_dimension(log_probs,
                                             source_idx=0,
                                             dest_idx=-1)

        batch_shape = prefer_static.shape(log_probs)[:-1]
        working_shape = prefer_static.concat([sample_shape, batch_shape],
                                             axis=-1)
        log_cdf = tf.math.cumulative_logsumexp(log_probs[..., :-1], axis=-1)
        # Each resampling requires a single uniform random variable
        offset = uniform.Uniform(low=tf.constant(0., log_cdf.dtype),
                                 high=tf.constant(1., log_cdf.dtype)).sample(
                                     working_shape, seed=seed)[..., tf.newaxis]
        # It is possible for numerical error to result in a cumulative
        # sum that exceeds 1 so we need to clip.
        markers = prefer_static.cast(
            tf.floor(event_size * tf.math.exp(log_cdf) + offset), tf.int32)
        indices = markers[..., tf.newaxis]
        updates = tf.ones(prefer_static.shape(indices)[:-1], dtype=tf.int32)
        scatter_shape = prefer_static.concat([working_shape, [event_size + 1]],
                                             axis=-1)
        batch_dims = (prefer_static.rank_from_shape(sample_shape) +
                      prefer_static.rank_from_shape(batch_shape))
        x = _scatter_nd_batch(indices,
                              updates,
                              scatter_shape,
                              batch_dims=batch_dims)

        resampled = tf.cumsum(x, axis=-1)[..., :-1]
        resampled = dist_util.move_dimension(resampled,
                                             source_idx=-1,
                                             dest_idx=0)
        return resampled
def selective_crop_and_resize(features,
                              boxes,
                              box_levels,
                              boundaries,
                              output_size=7,
                              sample_offset=0.5):
    """Crop and resize boxes on a set of feature maps.

  Given multiple features maps indexed by different levels, and a set of boxes
  where each box is mapped to a certain level, it selectively crops and resizes
  boxes from the corresponding feature maps to generate the box features.

  We follow the ROIAlign technique (see https://arxiv.org/pdf/1703.06870.pdf,
  figure 3 for reference). Specifically, for each feature map, we select an
  (output_size, output_size) set of pixels corresponding to the box location,
  and then use bilinear interpolation to select the feature value for each
  pixel.

  For performance, we perform the gather and interpolation on all layers as a
  single operation. This is op the multi-level features are first stacked and
  gathered into [2*output_size, 2*output_size] feature points. Then bilinear
  interpolation is performed on the gathered feature points to generate
  [output_size, output_size] RoIAlign feature map.

  Here is the step-by-step algorithm:
    1. The multi-level features are gathered into a
       [batch_size, num_boxes, output_size*2, output_size*2, num_filters]
       Tensor. The Tensor contains four neighboring feature points for each
       vertice in the output grid.
    2. Compute the interpolation kernel of shape
       [batch_size, num_boxes, output_size*2, output_size*2]. The last 2 axis
       can be seen as stacking 2x2 interpolation kernels for all vertices in the
       output grid.
    3. Element-wise multiply the gathered features and interpolation kernel.
       Then apply 2x2 average pooling to reduce spatial dimension to
       output_size.

  Args:
    features: a 5-D tensor of shape
      [batch_size, num_levels, max_height, max_width, num_filters] where
      cropping and resizing are based.
    boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the
      information of each box w.r.t. the corresponding feature map.
      boxes[:, :, 0:2] are the grid position in (y, x) (float) of the top-left
      corner of each box. boxes[:, :, 2:4] are the box sizes in (h, w) (float)
      in terms of the number of pixels of the corresponding feature map size.
    box_levels: a 3-D tensor of shape [batch_size, num_boxes, 1] representing
      the 0-based corresponding feature level index of each box.
    boundaries: a 3-D tensor of shape [batch_size, num_boxes, 2] representing
      the boundary (in (y, x)) of the corresponding feature map for each box.
      Any resampled grid points that go beyond the bounary will be clipped.
    output_size: a scalar indicating the output crop size.
    sample_offset: a float number in [0, 1] indicates the subpixel sample offset
      from grid point.

  Returns:
    features_per_box: a 5-D tensor of shape
      [batch_size, num_boxes, output_size, output_size, num_filters]
      representing the cropped features.
  """
    (batch_size, num_levels, max_feature_height, max_feature_width,
     num_filters) = features.get_shape().as_list()
    _, num_boxes, _ = boxes.get_shape().as_list()

    # Compute the grid position w.r.t. the corresponding feature map.
    box_grid_x = []
    box_grid_y = []
    for i in range(output_size):
        box_grid_x.append(boxes[:, :, 1] +
                          (i + sample_offset) * boxes[:, :, 3] / output_size)
        box_grid_y.append(boxes[:, :, 0] +
                          (i + sample_offset) * boxes[:, :, 2] / output_size)
    box_grid_x = tf.stack(box_grid_x, axis=2)
    box_grid_y = tf.stack(box_grid_y, axis=2)

    # Compute indices for gather operation.
    box_grid_y0 = tf.floor(box_grid_y)
    box_grid_x0 = tf.floor(box_grid_x)
    box_grid_x0 = tf.maximum(0., box_grid_x0)
    box_grid_y0 = tf.maximum(0., box_grid_y0)
    box_gridx0x1 = tf.stack([
        tf.minimum(box_grid_x0, tf.expand_dims(boundaries[:, :, 1], -1)),
        tf.minimum(box_grid_x0 + 1, tf.expand_dims(boundaries[:, :, 1], -1))
    ],
                            axis=3)
    box_gridy0y1 = tf.stack([
        tf.minimum(box_grid_y0, tf.expand_dims(boundaries[:, :, 0], -1)),
        tf.minimum(box_grid_y0 + 1, tf.expand_dims(boundaries[:, :, 0], -1))
    ],
                            axis=3)

    x_indices = tf.cast(tf.reshape(box_gridx0x1,
                                   [batch_size, num_boxes, output_size * 2]),
                        dtype=tf.int32)
    y_indices = tf.cast(tf.reshape(box_gridy0y1,
                                   [batch_size, num_boxes, output_size * 2]),
                        dtype=tf.int32)

    height_dim_offset = max_feature_width
    level_dim_offset = max_feature_height * height_dim_offset
    batch_dim_offset = num_levels * level_dim_offset
    indices = tf.reshape(
        tf.tile(
            tf.reshape(
                tf.range(batch_size) * batch_dim_offset,
                [batch_size, 1, 1, 1]),
            [1, num_boxes, output_size * 2, output_size * 2]) + tf.tile(
                tf.reshape(box_levels * level_dim_offset,
                           [batch_size, num_boxes, 1, 1]),
                [1, 1, output_size * 2, output_size * 2]) + tf.tile(
                    tf.reshape(y_indices * height_dim_offset,
                               [batch_size, num_boxes, output_size * 2, 1]),
                    [1, 1, 1, output_size * 2]) +
        tf.tile(
            tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]),
            [1, 1, output_size * 2, 1]), [-1])

    features = tf.reshape(features, [-1, num_filters])
    features_per_box = tf.reshape(
        tf.gather(features, indices),
        [batch_size, num_boxes, output_size * 2, output_size * 2, num_filters])

    # The RoIAlign feature f can be computed by bilinear interpolation of four
    # neighboring feature points f0, f1, f2, and f3.
    # f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T
    #                       [f10, f11]]
    # f(y, x) = (hy*hx)f00 + (hy*lx)f01 + (ly*hx)f10 + (lx*ly)f11
    # f(y, x) = w00*f00 + w01*f01 + w10*f10 + w11*f11
    ly = box_grid_y - box_grid_y0
    lx = box_grid_x - box_grid_x0
    hy = 1.0 - ly
    hx = 1.0 - lx
    kernel_x = tf.reshape(tf.stack([hx, lx], axis=3),
                          [batch_size, num_boxes, 1, output_size * 2])
    kernel_y = tf.reshape(tf.stack([hy, ly], axis=3),
                          [batch_size, num_boxes, output_size * 2, 1])
    # Uses implicit broadcast to generate the interpolation kernel. The
    # multiplier `4` is for avg pooling.
    interpolation_kernel = kernel_y * kernel_x * 4

    # Interpolates the gathered features with computed interpolation kernels.
    features_per_box *= tf.cast(tf.expand_dims(interpolation_kernel, axis=4),
                                dtype=features_per_box.dtype)
    features_per_box = tf.reshape(features_per_box, [
        batch_size * num_boxes, output_size * 2, output_size * 2, num_filters
    ])
    features_per_box = tf.nn.avg_pool2d(input=features_per_box,
                                        ksize=[1, 2, 2, 1],
                                        strides=[1, 2, 2, 1],
                                        padding='VALID')
    features_per_box = tf.reshape(
        features_per_box,
        [batch_size, num_boxes, output_size, output_size, num_filters])

    return features_per_box
Exemple #29
0
 def _mode(self):
   total_count = tf.convert_to_tensor(self._total_count)
   probs = self._probs_parameter_no_checks(total_count=total_count)
   return tf.math.minimum(
       total_count, tf.floor((1. + total_count) * probs))
 def _log_prob(self, x):
   probs = self._probs_parameter_no_checks()
   if not self.validate_args:
     # For consistency with cdf, we take the floor.
     x = tf.floor(x)
   return tf.math.xlog1py(x, -probs) + tf.math.log(probs)