Ejemplo n.º 1
0
def process_batchwise_mention_targets(
    dense_span_starts: tf.Tensor,
    dense_span_ends: tf.Tensor,
    dense_mention_ids: tf.Tensor,
    dense_linked_mention_mask: tf.Tensor,
    dense_is_masked: tf.Tensor,
    max_mentions: int,
    max_mention_targets: int,
) -> Dict[str, tf.Tensor]:
    """Processes mention targets and subsamples/pads as necessary.

  This function does two things. First, it selects which mentions to mark as
  mentions for mention-aware text encoders (in case the number of mentions
  exceeds the max number of mentions). Second, it selects which linked
  mentions to use as targets for mention objectives. To reduce subsampling and
  padding, the function operates over all mentions in a batch, generating
  flattened arrays. The encoder reconstructs the original mention positions
  from an array which specifies each mention's position in the batch. Linked
  mentions are given priority for sampling.

  Args:
    dense_span_starts: dense mention start positions.
    dense_span_ends: dense mention end positions.
    dense_mention_ids: dense entity ids for linked mentions in passage.
    dense_linked_mention_mask: dense mask for linked mentions in passage.
    dense_is_masked: dense mask for masked positions in passage.
    max_mentions: max number of mentions to be considered in model.
    max_mention_targets: max number of mentions to be used for linking loss.

  Returns:
    Mention starts, mention ends, mention mask,
    mention target indices (into start/end positions),
    mention target ids, mention target weights, mention_target_batch_positions,
    mention_target_start_positions, mention_target_end_positions
  """

    seq_len = tf.shape(dense_span_starts)[1]

    # The linking mask has 1's for every part of the mention, we only
    # want it for starts...
    linking_mask_start_indexed = dense_span_starts * dense_linked_mention_mask

    # values in {0, 1, 2}:
    # 0: not a masking location.
    # 1: a masking location.
    # 2: a masking and linking location.
    prioritized_span_starts = dense_span_starts + linking_mask_start_indexed
    prioritized_span_starts = tf.cast(prioritized_span_starts, tf.float32)

    # Add random [0; 1) values for a uniform sampling in case
    # there are more mention than `max_mentions`
    prioritized_span_starts += tf.random.uniform(
        tf.shape(prioritized_span_starts))

    _, global_start_indices = tf.math.top_k(_flatten(prioritized_span_starts),
                                            k=max_mentions)

    dense_span_starts_flatten = _flatten(dense_span_starts)
    dense_span_ends_at_starts = get_dense_span_ends_from_starts(
        dense_span_starts_flatten, _flatten(dense_span_ends))
    global_end_indices = tf.gather(dense_span_ends_at_starts,
                                   global_start_indices)

    dtype = dense_span_starts.dtype
    mention_batch_positions = tf.math.floordiv(global_start_indices, seq_len)
    mention_batch_positions = tf.cast(mention_batch_positions, dtype=dtype)
    mention_start_positions = tf.math.floormod(global_start_indices, seq_len)
    mention_start_positions = tf.cast(mention_start_positions, dtype=dtype)
    mention_end_positions = tf.math.floormod(global_end_indices, seq_len)
    mention_end_positions = tf.cast(mention_end_positions, dtype=dtype)
    mention_mask = tf.gather(dense_span_starts_flatten, global_start_indices)
    mention_mask = tf.cast(mention_mask, dtype=dtype)
    mention_batch_positions *= mention_mask
    mention_start_positions *= mention_mask
    mention_end_positions *= mention_mask

    mention_target_weights = tf.gather(_flatten(linking_mask_start_indexed),
                                       global_start_indices)
    mention_target_weights = mention_target_weights[:max_mention_targets]
    mention_target_weights = tf.cast(mention_target_weights, dtype=dtype)
    mention_target_indices = tf.range(max_mention_targets, dtype=dtype)
    mention_target_indices = mention_target_indices * mention_target_weights
    mention_target_ids = tf.gather(_flatten(dense_mention_ids),
                                   global_start_indices)
    mention_target_ids = mention_target_ids[:max_mention_targets]
    mention_target_ids = tf.cast(mention_target_ids, dtype=dtype)
    mention_target_ids = mention_target_ids * mention_target_weights
    indices = tf.stack((mention_batch_positions, mention_start_positions),
                       axis=1)
    mention_is_masked = tf.gather_nd(dense_is_masked, indices)
    mention_target_is_masked = mention_is_masked[:max_mention_targets]

    features = {
        'mention_batch_positions': mention_batch_positions,
        'mention_start_positions': mention_start_positions,
        'mention_end_positions': mention_end_positions,
        'mention_mask': mention_mask,
        'mention_is_masked': mention_is_masked,
        'mention_target_ids': mention_target_ids,
        'mention_target_indices': mention_target_indices,
        'mention_target_is_masked': mention_target_is_masked,
    }
    mention_target_features = prepare_mention_target_features(
        mention_batch_positions, mention_start_positions,
        mention_end_positions, mention_mask, mention_target_weights,
        mention_target_indices)
    features.update(mention_target_features)
    return features
Ejemplo n.º 2
0
 def test_slice_first_element_with_from_tensor(self):
     m = self.modules.tensorlist.all
     result = m.slice_first_element_with_from_tensor(
         tf.range(STATIC_SIZE, dtype=tf.float32))
     result.print().assert_all_close()
    def checkAllIntegrals(self, prior_scale, likelihood_scale,
                          inverse_temperatures, iid_chain_ndims):
        prior_scale = tf.convert_to_tensor(prior_scale, name='prior_scale')
        likelihood_scale = tf.convert_to_tensor(likelihood_scale,
                                                name='likelihood_scale')

        # Create (normalized) prior and likelihood. Their product of course is not
        # normalized. In particular, there is a number `normalizing_const` such that
        #   posterior(z) = prior.prob(x) * likelihood.prob(x) / normalizing_const
        # is normalized.
        prior = tfd.Normal(0., prior_scale)
        likelihood = tfd.Normal(0., likelihood_scale)
        posterior = tfd.Normal(0., (prior_scale**-2 +
                                    likelihood_scale**-2)**(-0.5))

        # Get a good step size, custom for every replica/batch member.
        bcast_inv_temperatures = bu.left_justified_expand_dims_to(
            inverse_temperatures,
            # Broadcast over replicas.
            1 +
            # Broadcast over chains.
            iid_chain_ndims +
            # Broadcast over batch dims.
            tf.rank(likelihood_scale))
        tempered_posteriors = tfd.Normal(
            0.,
            # One tempered posterior for every inverse_temperature.
            (prior_scale**-2 + bcast_inv_temperatures * likelihood_scale**-2
             )**(-0.5))
        step_size = 0.71234 * tempered_posteriors.stddev()

        num_leapfrog_steps = tf.cast(
            tf.math.ceil(1.567 / tf.reduce_min(step_size)), tf.int32)

        def make_kernel_fn(target_log_prob_fn):
            return tfp.mcmc.HamiltonianMonteCarlo(
                target_log_prob_fn=target_log_prob_fn,
                step_size=step_size,
                num_leapfrog_steps=num_leapfrog_steps,
            )

        remc = tfp.mcmc.ReplicaExchangeMC(
            target_log_prob_fn=None,
            untempered_log_prob_fn=prior.log_prob,
            tempered_log_prob_fn=likelihood.log_prob,
            inverse_temperatures=inverse_temperatures,
            state_includes_replicas=False,
            make_kernel_fn=make_kernel_fn,
            swap_proposal_fn=tfp.mcmc.even_odd_swap_proposal_fn(1.),
        )

        def trace_fn(state, results):  # pylint: disable=unused-argument
            return {
                'replica_log_accept_ratio':
                results.post_swap_replica_results.log_accept_ratio,
                'is_swap_accepted_adjacent': results.is_swap_accepted_adjacent,
                'is_swap_proposed_adjacent': results.is_swap_proposed_adjacent,
                'potential_energy': results.potential_energy,
            }

        if tf.executing_eagerly():
            num_results = 100
        else:
            num_results = 1000
        num_burnin_steps = num_results // 10

        n_samples_per_chain = 2
        initial_sample_shape = [n_samples_per_chain] * iid_chain_ndims

        unused_replica_states, trace = self.evaluate(
            tfp.mcmc.sample_chain(
                num_results=num_results,
                # Start at one of the modes, in order to make mode jumping necessary
                # if we want to pass test.
                current_state=prior.sample(initial_sample_shape,
                                           seed=test_util.test_seed()),
                kernel=remc,
                num_burnin_steps=num_burnin_steps,
                trace_fn=trace_fn,
                seed=test_util.test_seed()))

        # Tolerance depends on samples * replicas * number of (iid) chains.
        # ess.shape = [n_replica, ...]
        # We will sum over batch dims, then take min over replica.
        ess = tfp.mcmc.effective_sample_size(trace['potential_energy'])
        if iid_chain_ndims:
            ess = tf.reduce_sum(ess, axis=tf.range(1, 1 + iid_chain_ndims))
        min_ess = self.evaluate(tf.reduce_min(ess))

        n_combined_results = min_ess * inverse_temperatures.shape[0]

        # Make sure sampling worked well enough, for every replica/chain.
        conditional_swap_prob = (
            np.sum(trace['is_swap_accepted_adjacent'], axis=0) /
            np.sum(trace['is_swap_proposed_adjacent'], axis=0))
        self.assertAllGreater(conditional_swap_prob, 0.5)

        replica_mean_accept_prob = np.mean(np.exp(
            np.minimum(0, trace['replica_log_accept_ratio'])),
                                           axis=0)
        self.assertAllGreater(replica_mean_accept_prob, 0.5)

        integrals = self.evaluate(
            tfp.experimental.mcmc.remc_thermodynamic_integrals(
                inverse_temperatures,
                trace['potential_energy'],
                iid_chain_ndims=iid_chain_ndims,
            ))

        self.assertAllEqual(posterior.batch_shape,
                            integrals.log_normalizing_constant_ratio.shape)
        actual_log_normalizing_const = self.evaluate(
            # Use arbitrary point, 0, to find the constant.
            prior.log_prob(0.) + likelihood.log_prob(0.) -
            posterior.log_prob(0.))
        self.assertAllClose(integrals.log_normalizing_constant_ratio,
                            actual_log_normalizing_const,
                            rtol=10 / np.sqrt(n_combined_results))

        self.assertAllEqual(posterior.batch_shape,
                            integrals.cross_entropy_difference.shape)

        def cross_entropy(dist):
            z = dist.sample(50000, seed=test_util.test_seed())
            return tf.reduce_mean(likelihood.log_prob(z), axis=0)

        iid_cross_entropy_difference = self.evaluate(
            cross_entropy(posterior) - cross_entropy(prior))
        self.assertAllClose(integrals.cross_entropy_difference,
                            iid_cross_entropy_difference,
                            rtol=30 / np.sqrt(n_combined_results))
Ejemplo n.º 4
0
 def target_log_prob(x):
     counter['target_calls'] += 1
     event_dims = tf.range(independent_chain_ndims, tf.rank(x))
     return self._log_gamma_log_prob(x, event_dims)
Ejemplo n.º 5
0
def auto_correlation(x,
                     axis=-1,
                     max_lags=None,
                     center=True,
                     normalize=True,
                     name='auto_correlation'):
    """Auto correlation along one axis.

  Given a `1-D` wide sense stationary (WSS) sequence `X`, the auto correlation
  `RXX` may be defined as  (with `E` expectation and `Conj` complex conjugate)

  ```
  RXX[m] := E{ W[m] Conj(W[0]) } = E{ W[0] Conj(W[-m]) },
  W[n]   := (X[n] - MU) / S,
  MU     := E{ X[0] },
  S**2   := E{ (X[0] - MU) Conj(X[0] - MU) }.
  ```

  This function takes the viewpoint that `x` is (along one axis) a finite
  sub-sequence of a realization of (WSS) `X`, and then uses `x` to produce an
  estimate of `RXX[m]` as follows:

  After extending `x` from length `L` to `inf` by zero padding, the auto
  correlation estimate `rxx[m]` is computed for `m = 0, 1, ..., max_lags` as

  ```
  rxx[m] := (L - m)**-1 sum_n w[n + m] Conj(w[n]),
  w[n]   := (x[n] - mu) / s,
  mu     := L**-1 sum_n x[n],
  s**2   := L**-1 sum_n (x[n] - mu) Conj(x[n] - mu)
  ```

  The error in this estimate is proportional to `1 / sqrt(len(x) - m)`, so users
  often set `max_lags` small enough so that the entire output is meaningful.

  Note that since `mu` is an imperfect estimate of `E{ X[0] }`, and we divide by
  `len(x) - m` rather than `len(x) - m - 1`, our estimate of auto correlation
  contains a slight bias, which goes to zero as `len(x) - m --> infinity`.

  Args:
    x:  `float32` or `complex64` `Tensor`.
    axis:  Python `int`. The axis number along which to compute correlation.
      Other dimensions index different batch members.
    max_lags:  Positive `int` tensor.  The maximum value of `m` to consider (in
      equation above).  If `max_lags >= x.shape[axis]`, we effectively re-set
      `max_lags` to `x.shape[axis] - 1`.
    center:  Python `bool`.  If `False`, do not subtract the mean estimate `mu`
      from `x[n]` when forming `w[n]`.
    normalize:  Python `bool`.  If `False`, do not divide by the variance
      estimate `s**2` when forming `w[n]`.
    name:  `String` name to prepend to created ops.

  Returns:
    `rxx`: `Tensor` of same `dtype` as `x`.  `rxx.shape[i] = x.shape[i]` for
      `i != axis`, and `rxx.shape[axis] = max_lags + 1`.

  Raises:
    TypeError:  If `x` is not a supported type.
  """
    # Implementation details:
    # Extend length N / 2 1-D array x to length N by zero padding onto the end.
    # Then, set
    #   F[x]_k := sum_n x_n exp{-i 2 pi k n / N }.
    # It is not hard to see that
    #   F[x]_k Conj(F[x]_k) = F[R]_k, where
    #   R_m := sum_n x_n Conj(x_{(n - m) mod N}).
    # One can also check that R_m / (N / 2 - m) is an unbiased estimate of RXX[m].

    # Since F[x] is the DFT of x, this leads us to a zero-padding and FFT/IFFT
    # based version of estimating RXX.
    # Note that this is a special case of the Wiener-Khinchin Theorem.
    with tf.name_scope(name):
        x = tf.convert_to_tensor(x, name='x')

        # Rotate dimensions of x in order to put axis at the rightmost dim.
        # FFT op requires this.
        rank = prefer_static.rank(x)
        if axis < 0:
            axis = rank + axis
        shift = rank - 1 - axis
        # Suppose x.shape[axis] = T, so there are T 'time' steps.
        #   ==> x_rotated.shape = B + [T],
        # where B is x_rotated's batch shape.
        x_rotated = distribution_util.rotate_transpose(x, shift)

        if center:
            x_rotated -= tf.reduce_mean(x_rotated, axis=-1, keepdims=True)

        # x_len = N / 2 from above explanation.  The length of x along axis.
        # Get a value for x_len that works in all cases.
        x_len = prefer_static.shape(x_rotated)[-1]

        # TODO(langmore) Investigate whether this zero padding helps or hurts.  At
        # the moment is necessary so that all FFT implementations work.
        # Zero pad to the next power of 2 greater than 2 * x_len, which equals
        # 2**(ceil(Log_2(2 * x_len))).  Note: Log_2(X) = Log_e(X) / Log_e(2).
        x_len_float64 = tf.cast(x_len, np.float64)
        target_length = tf.pow(
            np.float64(2.),
            tf.math.ceil(tf.math.log(x_len_float64 * 2) / np.log(2.)))
        pad_length = tf.cast(target_length - x_len_float64, np.int32)

        # We should have:
        # x_rotated_pad.shape = x_rotated.shape[:-1] + [T + pad_length]
        #                     = B + [T + pad_length]
        x_rotated_pad = distribution_util.pad(x_rotated,
                                              axis=-1,
                                              back=True,
                                              count=pad_length)

        dtype = x.dtype
        if not dtype_util.is_complex(dtype):
            if not dtype_util.is_floating(dtype):
                raise TypeError(
                    'Argument x must have either float or complex dtype'
                    ' found: {}'.format(dtype))
            x_rotated_pad = tf.complex(
                x_rotated_pad,
                dtype_util.as_numpy_dtype(dtype_util.real_dtype(dtype))(0.))

        # Autocorrelation is IFFT of power-spectral density (up to some scaling).
        fft_x_rotated_pad = tf.signal.fft(x_rotated_pad)
        spectral_density = fft_x_rotated_pad * tf.math.conj(fft_x_rotated_pad)
        # shifted_product is R[m] from above detailed explanation.
        # It is the inner product sum_n X[n] * Conj(X[n - m]).
        shifted_product = tf.signal.ifft(spectral_density)

        # Cast back to real-valued if x was real to begin with.
        shifted_product = tf.cast(shifted_product, dtype)

        # Figure out if we can deduce the final static shape, and set max_lags.
        # Use x_rotated as a reference, because it has the time dimension in the far
        # right, and was created before we performed all sorts of crazy shape
        # manipulations.
        know_static_shape = True
        if not tensorshape_util.is_fully_defined(x_rotated.shape):
            know_static_shape = False
        if max_lags is None:
            max_lags = x_len - 1
        else:
            max_lags = tf.convert_to_tensor(max_lags, name='max_lags')
            max_lags_ = tf.get_static_value(max_lags)
            if max_lags_ is None or not know_static_shape:
                know_static_shape = False
                max_lags = tf.minimum(x_len - 1, max_lags)
            else:
                max_lags = min(x_len - 1, max_lags_)

        # Chop off the padding.
        # We allow users to provide a huge max_lags, but cut it off here.
        # shifted_product_chopped.shape = x_rotated.shape[:-1] + [max_lags]
        shifted_product_chopped = shifted_product[..., :max_lags + 1]

        # If possible, set shape.
        if know_static_shape:
            chopped_shape = tensorshape_util.as_list(x_rotated.shape)
            chopped_shape[-1] = min(x_len, max_lags + 1)
            shifted_product_chopped.set_shape(chopped_shape)

        # Recall R[m] is a sum of N / 2 - m nonzero terms x[n] Conj(x[n - m]).  The
        # other terms were zeros arising only due to zero padding.
        # `denominator = (N / 2 - m)` (defined below) is the proper term to
        # divide by to make this an unbiased estimate of the expectation
        # E[X[n] Conj(X[n - m])].
        x_len = tf.cast(x_len, dtype_util.real_dtype(dtype))
        max_lags = tf.cast(max_lags, dtype_util.real_dtype(dtype))
        denominator = x_len - tf.range(0., max_lags + 1.)
        denominator = tf.cast(denominator, dtype)
        shifted_product_rotated = shifted_product_chopped / denominator

        if normalize:
            shifted_product_rotated /= shifted_product_rotated[..., :1]

        # Transpose dimensions back to those of x.
        return distribution_util.rotate_transpose(shifted_product_rotated,
                                                  -shift)
Ejemplo n.º 6
0
def box_matching(boxes, gt_boxes, gt_classes):
    """Match boxes to groundtruth boxes.

  Given the proposal boxes and the groundtruth boxes and classes, perform the
  groundtruth matching by taking the argmax of the IoU between boxes and
  groundtruth boxes.

  Args:
    boxes: a tensor of shape of [batch_size, N, 4] representing the box
      coordiantes to be matched to groundtruth boxes.
    gt_boxes: a tensor of shape of [batch_size, MAX_INSTANCES, 4] representing
      the groundtruth box coordinates. It is padded with -1s to indicate the
      invalid boxes.
    gt_classes: [batch_size, MAX_INSTANCES] representing the groundtruth box
      classes. It is padded with -1s to indicate the invalid classes.

  Returns:
    matched_gt_boxes: a tensor of shape of [batch_size, N, 4], representing
      the matched groundtruth box coordinates for each input box. If the box
      does not overlap with any groundtruth boxes, the matched boxes of it
      will be set to all 0s.
    matched_gt_classes: a tensor of shape of [batch_size, N], representing
      the matched groundtruth classes for each input box. If the box does not
      overlap with any groundtruth boxes, the matched box classes of it will
      be set to 0, which corresponds to the background class.
    matched_gt_indices: a tensor of shape of [batch_size, N], representing
      the indices of the matched groundtruth boxes in the original gt_boxes
      tensor. If the box does not overlap with any groundtruth boxes, the
      index of the matched groundtruth will be set to -1.
    matched_iou: a tensor of shape of [batch_size, N], representing the IoU
      between the box and its matched groundtruth box. The matched IoU is the
      maximum IoU of the box and all the groundtruth boxes.
    iou: a tensor of shape of [batch_size, N, K], representing the IoU matrix
      between boxes and the groundtruth boxes. The IoU between a box and the
      invalid groundtruth boxes whose coordinates are [-1, -1, -1, -1] is -1.
  """
    # Compute IoU between boxes and gt_boxes.
    # iou <- [batch_size, N, K]
    iou = box_utils.bbox_overlap(boxes, gt_boxes)

    # max_iou <- [batch_size, N]
    # 0.0 -> no match to gt, or -1.0 match to no gt
    matched_iou = tf.reduce_max(iou, axis=-1)

    # background_box_mask <- bool, [batch_size, N]
    background_box_mask = tf.less_equal(matched_iou, 0.0)

    argmax_iou_indices = tf.argmax(iou, axis=-1, output_type=tf.int32)

    argmax_iou_indices_shape = tf.shape(argmax_iou_indices)
    batch_indices = (
        tf.expand_dims(tf.range(argmax_iou_indices_shape[0]), axis=-1) *
        tf.ones([1, argmax_iou_indices_shape[-1]], dtype=tf.int32))
    gather_nd_indices = tf.stack([batch_indices, argmax_iou_indices], axis=-1)

    matched_gt_boxes = tf.gather_nd(gt_boxes, gather_nd_indices)
    matched_gt_boxes = tf.where(
        tf.tile(tf.expand_dims(background_box_mask, axis=-1), [1, 1, 4]),
        tf.zeros_like(matched_gt_boxes, dtype=tf.float32), matched_gt_boxes)

    matched_gt_classes = tf.gather_nd(gt_classes, gather_nd_indices)
    matched_gt_classes = tf.where(background_box_mask,
                                  tf.zeros_like(matched_gt_classes),
                                  matched_gt_classes)

    matched_gt_indices = tf.where(background_box_mask,
                                  -tf.ones_like(argmax_iou_indices),
                                  argmax_iou_indices)

    return (matched_gt_boxes, matched_gt_classes, matched_gt_indices,
            matched_iou, iou)
Ejemplo n.º 7
0
 def target_log_prob(x):
     event_dims = tf.range(independent_chain_ndims, tf.rank(x))
     return self._log_gamma_log_prob(x, event_dims)
Ejemplo n.º 8
0
  def __init__(self,
               avg_length=200,
               midi_std=0.5,
               amps_on_center=1.5,
               amps_on_scale=0.5,
               amps_off_center=0.0,
               amps_off_scale=0.1,
               n_timesteps=1000,
               n_pitches=128,
               weight=1.0,
               **kwargs):
    """Discrete hidden states for each midi pitch, f0 observations (in midi).

    Args:
      avg_length: Prior over average note length between transitions.
      midi_std: Prior over f0 variance (in midi) allowed around discrete states.
      amps_on_center: Center amplitude of the "on" state.
      amps_on_scale: Variance amplitude of the "on" state.
      amps_off_center: Center amplitude of the "off" state.
      amps_off_scale: Variance amplitude of the "off" state.
      n_timesteps: Number of timesteps in the batch to unroll the HMM.
      n_pitches: Number of pitches (starting from 0) to use as HMM states.
      weight: Weighting of the nll loss term.
      **kwargs: Other kwargs for the distribution such as name.
    """
    # Initial distribution is uniform.
    initial_distribution = tfp.distributions.Categorical(
        probs=tf.ones([n_pitches]) / n_pitches)

    # Transition is heavily peaked around diagonal and uniform otherwise.
    hold = 1.0 - 1.0/avg_length
    other = (1.0 - hold) / (n_pitches - 1)
    transitions = ((hold - other) * tf.eye(n_pitches)
                   + other * tf.ones([n_pitches, n_pitches]))
    transitions /= tf.reduce_sum(transitions, axis=1, keepdims=True)
    transition_distribution = tfp.distributions.Categorical(
        probs=transitions)

    # Observations are normally distributed around the MIDI pitch (hmm state).
    p_loc = tf.range(1, n_pitches, dtype=tf.float32)
    p_scale = tf.ones([n_pitches - 1]) * midi_std
    pitch_loc = tf.concat([tf.ones([1]) * n_pitches / 2.0, p_loc], axis=0)
    pitch_scale = tf.concat([tf.ones([1]) * n_pitches, p_scale], axis=0)

    amps_loc = tf.concat([tf.ones([1]) * amps_off_center,
                          tf.ones(n_pitches - 1) * amps_on_center], axis=0)
    amps_scale = tf.concat([tf.ones([1]) * amps_off_scale,
                            tf.ones(n_pitches - 1) * amps_on_scale], axis=0)

    loc = tf.stack([pitch_loc, amps_loc], axis=-1)
    scale = tf.stack([pitch_scale, amps_scale], axis=-1)

    # observation_distribution = tfp.distributions.Normal(loc=loc, scale=scale)
    observation_distribution = tfp.distributions.MultivariateNormalDiag(
        loc=loc, scale_diag=scale)

    super().__init__(
        initial_distribution=initial_distribution,
        transition_distribution=transition_distribution,
        observation_distribution=observation_distribution,
        num_steps=n_timesteps,
        **kwargs
    )

    self.initial_distribution = initial_distribution
    self.transition_distribution = transition_distribution
    self.observation_distribution = observation_distribution
    self.avg_length = avg_length
    self.midi_std = midi_std
    self.n_timesteps = n_timesteps
    self.n_pitches = n_pitches
    self.weight = weight
Ejemplo n.º 9
0
def sample_and_preprocess(video,
                          labels,
                          seq_label,
                          seq_len,
                          name,
                          num_steps,
                          augment,
                          sample_all=False,
                          sample_all_stride=1,
                          add_shape=False):
    """Samples frames and prepares them for training."""

    if sample_all:
        # When dealing with very long videos we can choose to sub-sample to fit
        # data in memory. But be aware this also evaluates over a subset of frames.
        # Subsampling the validation set videos when reporting performance is not
        # recommended.
        steps = tf.range(0, seq_len, sample_all_stride)
        seq_len = tf.shape(steps)[0]
        chosen_steps = steps
    else:
        stride = CONFIG.DATA.STRIDE
        sampling_strategy = CONFIG.DATA.SAMPLING_STRATEGY

        # TODO(debidatta) : More flexible sampling
        if sampling_strategy == 'stride':
            # Offset can be set between 0 and maximum location from which we can get
            # total coverage of the video without having to pad.
            # This handles sampling over longer sequences.
            offset = tf.random.uniform(
                (),
                0,
                tf.maximum(tf.cast(1, tf.int64), seq_len - stride * num_steps),
                dtype=tf.int64)
            # This handles sampling over shorter sequences by padding the last frame
            # many times. This is not ideal for the way alignment training batches are
            # created.
            steps = tf.minimum(
                seq_len - 1,
                tf.range(offset, offset + num_steps * stride + 1, stride))
            steps = steps[:num_steps]
        elif sampling_strategy == 'offset_uniform':
            # Sample a random offset less than a provided max offset. Among all frames
            # higher than the chosen offset, randomly sample num_frames
            check1 = tf.debugging.assert_greater_equal(
                seq_len,
                tf.cast(CONFIG.DATA.RANDOM_OFFSET, tf.int64),
                message='Random offset is more than sequence length.')
            check2 = tf.less_equal(
                tf.cast(num_steps, tf.int64),
                seq_len - tf.cast(CONFIG.DATA.RANDOM_OFFSET, tf.int64),
            )

            def _sample_random():
                with tf.control_dependencies([tf.identity(check1.outputs[0])]):
                    offset = CONFIG.DATA.RANDOM_OFFSET
                    steps = tf.random.shuffle(tf.range(offset, seq_len))
                    steps = tf.gather(steps, tf.range(0, num_steps))
                    steps = tf.gather(
                        steps,
                        tf.nn.top_k(steps, k=num_steps).indices[::-1])
                    return steps

            def _sample_all():
                return tf.range(0, num_steps, dtype=tf.int64)

            steps = tf.cond(check2, _sample_random, _sample_all)

        else:
            raise ValueError(
                'Sampling strategy %s is unknown. Supported values are '
                'stride, offset_uniform .' % sampling_strategy)

        if not sample_all and 'tcn' in CONFIG.TRAINING_ALGO:
            pos_window = CONFIG.TCN.POSITIVE_WINDOW
            # pylint: disable=g-long-lambda
            pos_steps = tf.map_fn(
                lambda step: tf.random.uniform(
                    (), minval=step - pos_window, maxval=step, dtype=tf.int64),
                steps)
            # pylint: enable=g-long-lambda
            steps = tf.stack([pos_steps, steps])
            steps = tf.reshape(tf.transpose(steps), (-1, ))

        # Store chosen indices.
        chosen_steps = steps
        # Get multiple context steps depending on config at selected steps.
        steps = tf.reshape(tf.map_fn(get_steps, steps), [-1])
        steps = tf.maximum(tf.cast(0, tf.int64), steps)
        steps = tf.minimum(seq_len - 1, steps)

    shape_all_steps = CONFIG.DATA.NUM_STEPS * num_steps
    if not sample_all and 'tcn' in CONFIG.TRAINING_ALGO:
        shape_all_steps *= 2

    # Select data based on steps/
    video = tf.gather(video, steps)
    # Decode the encoded JPEG images
    video = tf.map_fn(tf.image.decode_jpeg,
                      video,
                      parallel_iterations=FLAGS.num_parallel_calls,
                      dtype=tf.uint8)
    # Take images in range [0, 255] and normalize to [0, 1]
    video = tf.map_fn(normalize_input,
                      video,
                      parallel_iterations=FLAGS.num_parallel_calls,
                      dtype=tf.float32)
    # Perform data-augmentation and return images in range [-1, 1]
    video = preprocess_input(video, augment)
    if add_shape:
        video.set_shape(
            [shape_all_steps, CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE, 3])

    if CONFIG.DATA.FRAME_LABELS:
        labels = tf.gather(labels, steps)
        if add_shape:
            labels.set_shape([shape_all_steps])

    return {
        'frames': video,
        'frame_labels': labels,
        'chosen_steps': chosen_steps,
        'seq_lens': seq_len,
        'seq_labels': seq_label,
        'name': name
    }
Ejemplo n.º 10
0
    def _sample_n(self, n, seed):
        batch_shape = self.batch_shape_tensor()
        event_shape = self.event_shape_tensor()
        batch_ndims = tf.shape(input=batch_shape)[0]

        ndims = batch_ndims + 3  # sample_ndims=1, event_ndims=2
        shape = tf.concat([[n], batch_shape, event_shape], 0)
        stream = seed_stream.SeedStream(seed, salt="Wishart")

        # Complexity: O(nbk**2)
        x = tf.random.normal(shape=shape,
                             mean=0.,
                             stddev=1.,
                             dtype=self.dtype,
                             seed=stream())

        # Complexity: O(nbk)
        # This parametrization is equivalent to Chi2, i.e.,
        # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2)
        expanded_df = self.df * tf.ones(
            self.scale_operator.batch_shape_tensor(),
            dtype=dtype_util.base_dtype(self.df.dtype))

        g = tf.random.gamma(shape=[n],
                            alpha=self._multi_gamma_sequence(
                                0.5 * expanded_df, self.dimension),
                            beta=0.5,
                            dtype=self.dtype,
                            seed=stream())

        # Complexity: O(nbk**2)
        x = tf.linalg.band_part(x, -1, 0)  # Tri-lower.

        # Complexity: O(nbk)
        x = tf.linalg.set_diag(x, tf.sqrt(g))

        # Make batch-op ready.
        # Complexity: O(nbk**2)
        perm = tf.concat([tf.range(1, ndims), [0]], 0)
        x = tf.transpose(a=x, perm=perm)
        shape = tf.concat(
            [batch_shape, [event_shape[0]], [event_shape[1] * n]], 0)
        x = tf.reshape(x, shape)

        # Complexity: O(nbM) where M is the complexity of the operator solving a
        # vector system. For LinearOperatorLowerTriangular, each matmul is O(k^3) so
        # this step has complexity O(nbk^3).
        x = self.scale_operator.matmul(x)

        # Undo make batch-op ready.
        # Complexity: O(nbk**2)
        shape = tf.concat([batch_shape, event_shape, [n]], 0)
        x = tf.reshape(x, shape)
        perm = tf.concat([[ndims - 1], tf.range(0, ndims - 1)], 0)
        x = tf.transpose(a=x, perm=perm)

        if not self.input_output_cholesky:
            # Complexity: O(nbk**3)
            x = tf.matmul(x, x, adjoint_b=True)

        return x
Ejemplo n.º 11
0
    def _log_prob(self, x):
        if self.input_output_cholesky:
            x_sqrt = x
        else:
            # Complexity: O(nbk**3)
            x_sqrt = tf.linalg.cholesky(x)

        batch_shape = self.batch_shape_tensor()
        event_shape = self.event_shape_tensor()
        x_ndims = tf.rank(input=x_sqrt)
        num_singleton_axes_to_prepend = (
            tf.maximum(tf.size(input=batch_shape) + 2, x_ndims) - x_ndims)
        x_with_prepended_singletons_shape = tf.concat([
            tf.ones([num_singleton_axes_to_prepend], dtype=tf.int32),
            tf.shape(input=x_sqrt)
        ], 0)
        x_sqrt = tf.reshape(x_sqrt, x_with_prepended_singletons_shape)
        ndims = tf.rank(x_sqrt)
        # sample_ndims = ndims - batch_ndims - event_ndims
        sample_ndims = ndims - tf.size(input=batch_shape) - 2
        sample_shape = tf.shape(input=x_sqrt)[:sample_ndims]

        # We need to be able to pre-multiply each matrix by its corresponding
        # batch scale matrix. Since a Distribution Tensor supports multiple
        # samples per batch, this means we need to reshape the input matrix `x`
        # so that the first b dimensions are batch dimensions and the last two
        # are of shape [dimension, dimensions*number_of_samples]. Doing these
        # gymnastics allows us to do a batch_solve.
        #
        # After we're done with sqrt_solve (the batch operation) we need to undo
        # this reshaping so what we're left with is a Tensor partitionable by
        # sample, batch, event dimensions.

        # Complexity: O(nbk**2) since transpose must access every element.
        scale_sqrt_inv_x_sqrt = x_sqrt
        perm = tf.concat(
            [tf.range(sample_ndims, ndims),
             tf.range(0, sample_ndims)], 0)
        scale_sqrt_inv_x_sqrt = tf.transpose(a=scale_sqrt_inv_x_sqrt,
                                             perm=perm)
        last_dim_size = (
            tf.cast(self.dimension, dtype=tf.int32) * tf.reduce_prod(
                input_tensor=x_with_prepended_singletons_shape[:sample_ndims]))
        shape = tf.concat([
            x_with_prepended_singletons_shape[sample_ndims:-2],
            [tf.cast(self.dimension, dtype=tf.int32), last_dim_size]
        ],
                          axis=0)
        scale_sqrt_inv_x_sqrt = tf.reshape(scale_sqrt_inv_x_sqrt, shape)

        # Complexity: O(nbM*k) where M is the complexity of the operator solving a
        # vector system. For LinearOperatorLowerTriangular, each solve is O(k**2) so
        # this step has complexity O(nbk^3).
        scale_sqrt_inv_x_sqrt = self.scale_operator.solve(
            scale_sqrt_inv_x_sqrt)

        # Undo make batch-op ready.
        # Complexity: O(nbk**2)
        shape = tf.concat([
            tf.shape(input=scale_sqrt_inv_x_sqrt)[:-2], event_shape,
            sample_shape
        ],
                          axis=0)
        scale_sqrt_inv_x_sqrt = tf.reshape(scale_sqrt_inv_x_sqrt, shape)
        perm = tf.concat([
            tf.range(ndims - sample_ndims, ndims),
            tf.range(0, ndims - sample_ndims)
        ], 0)
        scale_sqrt_inv_x_sqrt = tf.transpose(a=scale_sqrt_inv_x_sqrt,
                                             perm=perm)

        # Write V = SS', X = LL'. Then:
        # tr[inv(V) X] = tr[inv(S)' inv(S) L L']
        #              = tr[inv(S) L L' inv(S)']
        #              = tr[(inv(S) L) (inv(S) L)']
        #              = sum_{ik} (inv(S) L)_{ik}**2
        # The second equality follows from the cyclic permutation property.
        # Complexity: O(nbk**2)
        trace_scale_inv_x = tf.reduce_sum(
            input_tensor=tf.square(scale_sqrt_inv_x_sqrt), axis=[-2, -1])

        # Complexity: O(nbk)
        half_log_det_x = tf.reduce_sum(input_tensor=tf.math.log(
            tf.linalg.diag_part(x_sqrt)),
                                       axis=[-1])

        # Complexity: O(nbk**2)
        log_prob = ((self.df - self.dimension - 1.) * half_log_det_x -
                    0.5 * trace_scale_inv_x - self.log_normalization())

        # Set shape hints.
        # Try to merge what we know from the input x with what we know from the
        # parameters of this distribution.
        if tensorshape_util.rank(
                x.shape) is not None and tensorshape_util.rank(
                    self.batch_shape) is not None:
            log_prob.set_shape(
                tf.broadcast_static_shape(x.shape[:-2], self.batch_shape))

        return log_prob
Ejemplo n.º 12
0
  def bootstrap_results(self, init_state):
    """Returns an object with the same type as returned by `one_step`.

    Args:
      init_state: `Tensor` or Python `list` of `Tensor`s representing the
        initial state(s) of the Markov chain(s).

    Returns:
      kernel_results: A (possibly nested) `tuple`, `namedtuple` or `list` of
        `Tensor`s representing internal calculations made within this function.
        This inculdes replica states.
    """
    with tf.name_scope(mcmc_util.make_name(
        self.name, 'remc', 'bootstrap_results')):
      init_state, unused_is_multipart_state = mcmc_util.prepare_state_parts(
          init_state)

      inverse_temperatures = tf.convert_to_tensor(
          self.inverse_temperatures,
          name='inverse_temperatures')

      if self._state_includes_replicas:
        it_n_replica = inverse_temperatures.shape[0]
        state_n_replica = init_state[0].shape[0]
        if ((it_n_replica is not None) and (state_n_replica is not None) and
            (it_n_replica != state_n_replica)):
          raise ValueError(
              'Number of replicas implied by initial state ({}) must equal '
              'number of replicas implied by inverse_temperatures ({}), but '
              'did not'.format(it_n_replica, state_n_replica))

      # We will now replicate each of a possible batch of initial stats, one for
      # each inverse_temperature. So if init_state=[x, y] of shapes [Sx, Sy]
      # then the new shape is [(T, Sx), (T, Sy)] where (a, b) means
      # concatenation and T=shape(inverse_temperature).
      num_replica = prefer_static.size0(inverse_temperatures)
      replica_shape = tf.convert_to_tensor([num_replica])

      if self._state_includes_replicas:
        replica_states = init_state
      else:
        replica_states = [
            tf.broadcast_to(  # pylint: disable=g-complex-comprehension
                x,
                prefer_static.concat([replica_shape, prefer_static.shape(x)],
                                     axis=0),
                name='replica_states')
            for x in init_state
        ]

      target_log_prob_for_inner_kernel = _make_replica_target_log_prob_fn(
          self.target_log_prob_fn,
          inverse_temperatures)
      # Seed handling complexity is due to users possibly expecting an old-style
      # stateful seed to be passed to `self.make_kernel_fn`.
      # In other words:
      # - We try `make_kernel_fn` without a seed first; this is the future. The
      #   kernel will receive a seed later, as part of `one_step`.
      # - If the user code doesn't like that (Python complains about a missing
      #   required argument), we fall back to the previous behavior and warn.
      try:
        inner_kernel = self.make_kernel_fn(  # pylint: disable=not-callable
            target_log_prob_for_inner_kernel)
      except TypeError as e:
        if 'argument' not in str(e):
          raise
        warnings.warn(
            'The second (`seed`) argument to `ReplicaExchangeMC`s '
            '`make_kernel_fn` is deprecated. `TransitionKernel` instances now '
            'receive seeds via `bootstrap_results` and `one_step`. This '
            'fallback may become an error 2020-09-20.')
        inner_kernel = self.make_kernel_fn(  # pylint: disable=not-callable
            target_log_prob_for_inner_kernel, self._seed_stream())

      replica_results = inner_kernel.bootstrap_results(replica_states)

      pre_swap_replica_target_log_prob = _get_field(
          replica_results, 'target_log_prob')

      replica_and_batch_shape = prefer_static.shape(
          pre_swap_replica_target_log_prob)
      batch_shape = replica_and_batch_shape[1:]

      inverse_temperatures = mcmc_util.left_justified_broadcast_to(
          inverse_temperatures, replica_and_batch_shape)

      # Pretend we did a "null swap", which will always be accepted.
      swaps = mcmc_util.left_justified_broadcast_to(
          tf.range(num_replica), replica_and_batch_shape)
      # is_swap_accepted.shape = [n_replica, n_replica] + batch_shape.
      is_swap_accepted = distribution_util.rotate_transpose(
          tf.eye(num_replica, batch_shape=batch_shape, dtype=tf.bool),
          shift=2)

      post_swap_replica_results = _make_post_swap_replica_results(
          replica_results,
          inverse_temperatures,
          inverse_temperatures,
          is_swap_accepted[0],
          lambda x: x,
      )

      return ReplicaExchangeMCKernelResults(
          post_swap_replica_states=replica_states,
          pre_swap_replica_results=replica_results,
          post_swap_replica_results=post_swap_replica_results,
          is_swap_proposed=is_swap_accepted,
          is_swap_accepted=is_swap_accepted,
          is_swap_proposed_adjacent=_sub_diag(is_swap_accepted),
          is_swap_accepted_adjacent=_sub_diag(is_swap_accepted),
          inverse_temperatures=self.inverse_temperatures,
          swaps=swaps,
          step_count=tf.zeros(shape=(), dtype=tf.int32),
          seed=samplers.zeros_seed(),
      )
Ejemplo n.º 13
0
  def one_step(self, current_state, previous_kernel_results, seed=None):
    """Takes one step of the TransitionKernel.

    Args:
      current_state: `Tensor` or Python `list` of `Tensor`s representing the
        current state(s) of the Markov chain(s).
      previous_kernel_results: A (possibly nested) `tuple`, `namedtuple` or
        `list` of `Tensor`s representing internal calculations made within the
        previous call to this function (or as returned by `bootstrap_results`).
      seed: Optional, a seed for reproducible sampling.

    Returns:
      next_state: `Tensor` or Python `list` of `Tensor`s representing the
        next state(s) of the Markov chain(s).
      kernel_results: A (possibly nested) `tuple`, `namedtuple` or `list` of
        `Tensor`s representing internal calculations made within this function.
        This inculdes replica states.
    """

    # The code below propagates one step states of shape
    #  [n_replica] + batch_shape + event_shape.
    #
    # The step is done in three parts:
    #  1) Call one_step to transition states via a tempered version of
    #     self.target_log_prob_fn (see _replica_target_log_prob).
    #  2) Permute values in states
    #  3) Update state-dependent values, such as log_probs.
    #
    # We chose to swap states, rather than temperatures, because...
    # (i)  If swapping temperatures, you *still* have to swap log_probs to
    #      determine acceptance, as well as states (for kernel results).
    #      So it's just as difficult to swap temperatures.
    # (ii) If swapping temperatures, you have to take care to swap any user-
    #      supplied temperature related things (like step size).
    #      A-priori, we don't know what else will need to be swapped!
    # (iii)In both cases, the kernel results need to be updated in a non-trivial
    #      manner....so we either special-case, or use bootstrap.

    with tf.name_scope(mcmc_util.make_name(self.name, 'remc', 'one_step')):
      # Force a read in case the `inverse_temperatures` is a `tf.Variable`.
      inverse_temperatures = tf.convert_to_tensor(
          previous_kernel_results.inverse_temperatures,
          name='inverse_temperatures')

      target_log_prob_for_inner_kernel = _make_replica_target_log_prob_fn(
          self.target_log_prob_fn,
          inverse_temperatures)
      # Seed handling complexity is due to users possibly expecting an old-style
      # stateful seed to be passed to `self.make_kernel_fn`, and no seed
      # expected by `kernel.one_step`.
      # In other words:
      # - We try `make_kernel_fn` without a seed first; this is the future. The
      #   kernel will receive a seed later, as part of `one_step`.
      # - If the user code doesn't like that (Python complains about a missing
      #   required argument), we warn and fall back to the previous behavior.
      try:
        inner_kernel = self.make_kernel_fn(  # pylint: disable=not-callable
            target_log_prob_for_inner_kernel)
      except TypeError as e:
        if 'argument' not in str(e):
          raise
        warnings.warn(
            'The `seed` argument to `ReplicaExchangeMC`s `make_kernel_fn` is '
            'deprecated. `TransitionKernel` instances now receive seeds via '
            '`one_step`.')
        inner_kernel = self.make_kernel_fn(  # pylint: disable=not-callable
            target_log_prob_for_inner_kernel, self._seed_stream())

      # Now that we've constructed the TransitionKernel instance:
      # - If we were given a seed, we sanitize it to stateless and pass along
      #   to `kernel.one_step`. If it doesn't like that, we crash and propagate
      #   the error.  Rationale: The contract is stateless sampling given
      #   seed, and doing otherwise would not meet it.
      # - If not given a seed, we don't pass one along. This avoids breaking
      #   underlying kernels lacking a `seed` arg on `one_step`.
      # TODO(b/159636942): Clean up after 2020-09-20.
      if seed is not None:
        seed = samplers.sanitize_seed(seed)
        inner_seed, swap_seed, logu_seed = samplers.split_seed(
            seed, n=3, salt='remc_one_step')
        inner_kwargs = dict(seed=inner_seed)
      else:
        if self._seed_stream.original_seed is not None:
          warnings.warn(mcmc_util.SEED_CTOR_ARG_DEPRECATION_MSG)
        inner_kwargs = {}
        swap_seed, logu_seed = samplers.split_seed(self._seed_stream())
      [
          pre_swap_replica_states,
          pre_swap_replica_results,
      ] = inner_kernel.one_step(
          previous_kernel_results.post_swap_replica_states,
          previous_kernel_results.post_swap_replica_results,
          **inner_kwargs)

      pre_swap_replica_target_log_prob = _get_field(
          # These are tempered log probs (have been divided by temperature).
          pre_swap_replica_results, 'target_log_prob')

      dtype = pre_swap_replica_target_log_prob.dtype
      replica_and_batch_shape = prefer_static.shape(
          pre_swap_replica_target_log_prob)
      batch_shape = replica_and_batch_shape[1:]
      replica_and_batch_rank = prefer_static.rank(
          pre_swap_replica_target_log_prob)
      num_replica = prefer_static.size0(inverse_temperatures)

      inverse_temperatures = mcmc_util.left_justified_broadcast_to(
          inverse_temperatures, replica_and_batch_shape)

      # Now that each replica has done one_step, it is time to consider swaps.

      # swap.shape = [n_replica], and is a "once only" permutation, meaning it
      # is achievable by a sequence of pairwise permutations, where each element
      # is moved at most once.
      # E.g. if swaps = [1, 0, 2], we will consider swapping temperatures 0 and
      # 1, keeping 2 fixed.  This exact same swap is considered for *every*
      # batch member.  Of course some batch members may accept and some reject.
      try:
        swaps = tf.cast(
            self.swap_proposal_fn(  # pylint: disable=not-callable
                num_replica,
                batch_shape=batch_shape,
                seed=swap_seed,
                step_count=previous_kernel_results.step_count),
            dtype=tf.int32)
      except TypeError as e:
        if 'step_count' not in str(e):
          raise
        warnings.warn(
            'The `swap_proposal_fn` given to ReplicaExchangeMC did not accept '
            'the `step_count` argument. Falling back to omitting the '
            'argument. This fallback will be removed after 24-Oct-2020.')
        swaps = tf.cast(
            self.swap_proposal_fn(  # pylint: disable=not-callable
                num_replica,
                batch_shape=batch_shape,
                seed=swap_seed),
            dtype=tf.int32)

      null_swaps = mcmc_util.left_justified_expand_dims_like(
          tf.range(num_replica, dtype=swaps.dtype), swaps)
      swaps = _maybe_embed_swaps_validation(swaps, null_swaps,
                                            self.validate_args)

      # Un-temper the log probs.  E.g., for replica k, at point x_k, this is
      # Log[p(x_k)], and *not* Log[p_x(x_k)] = Log[p(x_k)] * beta_k.
      untempered_pre_swap_replica_target_log_prob = (
          pre_swap_replica_target_log_prob / inverse_temperatures)

      # Since `swaps` is its own inverse permutation we automatically know the
      # swap counterpart: range(num_replica). We use this idea to compute the
      # acceptance in a vectorized manner at the cost of wasting roughly half
      # our computation. Although we could use `unique` to solve this problem,
      # we expect the cost of `unique` to be higher than the dozens of wasted
      # arithmetic calculations. Worse, it'd mean we need dynamic sized Tensors
      # (eg, using `tf.where(bool)`) and so we wouldn't be able to XLA compile.

      # Note: diffs would normally be "proposed - current" however energy is
      # flipped since `energy == -log_prob`.
      energy_diff = (
          untempered_pre_swap_replica_target_log_prob -
          mcmc_util.index_remapping_gather(
              untempered_pre_swap_replica_target_log_prob,
              swaps, name='gather_swap_tlp'))
      swapped_inverse_temperatures = mcmc_util.index_remapping_gather(
          inverse_temperatures, swaps, name='gather_swap_temps')
      inverse_temp_diff = swapped_inverse_temperatures - inverse_temperatures

      # If i and j are swapping, log_accept_ratio[] i and j are equal.
      log_accept_ratio = (
          energy_diff * mcmc_util.left_justified_expand_dims_to(
              inverse_temp_diff, replica_and_batch_rank))

      log_accept_ratio = tf.where(
          tf.math.is_finite(log_accept_ratio),
          log_accept_ratio, tf.constant(-np.inf, dtype=dtype))

      # Produce Log[Uniform] draws that are identical at swapped indices.
      log_uniform = tf.math.log(
          samplers.uniform(shape=replica_and_batch_shape,
                           dtype=dtype,
                           seed=logu_seed))
      anchor_swaps = tf.minimum(swaps, null_swaps)
      log_uniform = mcmc_util.index_remapping_gather(log_uniform, anchor_swaps)

      is_swap_accepted_mask = tf.less(
          log_uniform,
          log_accept_ratio,
          name='is_swap_accepted_mask')

      def _swap_tensor(x):
        return mcmc_util.choose(
            is_swap_accepted_mask,
            mcmc_util.index_remapping_gather(x, swaps), x)

      post_swap_replica_states = [
          _swap_tensor(s) for s in pre_swap_replica_states]

      expanded_null_swaps = mcmc_util.left_justified_broadcast_to(
          null_swaps, replica_and_batch_shape)
      is_swap_proposed = _compute_swap_notmatrix(
          # Broadcast both so they have shape [num_replica] + batch_shape.
          # This (i) makes them have same shape as is_swap_accepted, and
          # (ii) keeps shape consistent if someday swaps has a batch shape.
          expanded_null_swaps,
          mcmc_util.left_justified_broadcast_to(swaps, replica_and_batch_shape))

      # To get is_swap_accepted in ordered position, we use
      # _compute_swap_notmatrix on current and next replica positions.
      post_swap_replica_position = _swap_tensor(expanded_null_swaps)

      is_swap_accepted = _compute_swap_notmatrix(
          post_swap_replica_position,
          expanded_null_swaps)

      if self._state_includes_replicas:
        post_swap_states = post_swap_replica_states
      else:
        post_swap_states = [s[0] for s in post_swap_replica_states]

      post_swap_replica_results = _make_post_swap_replica_results(
          pre_swap_replica_results,
          inverse_temperatures,
          swapped_inverse_temperatures,
          is_swap_accepted_mask,
          _swap_tensor)

      if mcmc_util.is_list_like(current_state):
        # We *always* canonicalize the states in the kernel results.
        states = post_swap_states
      else:
        states = post_swap_states[0]

      post_swap_kernel_results = ReplicaExchangeMCKernelResults(
          post_swap_replica_states=post_swap_replica_states,
          pre_swap_replica_results=pre_swap_replica_results,
          post_swap_replica_results=post_swap_replica_results,
          is_swap_proposed=is_swap_proposed,
          is_swap_accepted=is_swap_accepted,
          is_swap_proposed_adjacent=_sub_diag(is_swap_proposed),
          is_swap_accepted_adjacent=_sub_diag(is_swap_accepted),
          # Store the original pkr.inverse_temperatures in case its a
          # `tf.Variable`.
          inverse_temperatures=previous_kernel_results.inverse_temperatures,
          swaps=swaps,
          step_count=previous_kernel_results.step_count + 1,
          seed=samplers.zeros_seed() if seed is None else seed,
      )

      return states, post_swap_kernel_results
Ejemplo n.º 14
0
def _orthogonal_complement_e_i(vectors, i, gram_schmidt_iters):
    """Computes a basis for the orthogonal complement to `e_i` in `span(vectors)`.

  The orthogonal complement of the coordinate vector `e_i` of the vector space
  `V` is the set of all vectors in `V` that are orthogonal to `e_i`.

  We compute this by first choosing a column `j` of `vectors` with non-zero in
  coordinate `i`. This vector (`col_j`) is subtracted from all other vectors
  with an appropriate weight to zero out row `i`. Finally, we orthonormalize
  using (modified) Gram-Schmidt. For performance reasons, the calling code
  specifies the G-S iteration count.

  For example, suppose we start with the matrix of column vectors:

  ```none
  [ 2  4  7 ]
  [ 4  2  4 ]
  [ 6  6  3 ]
  ```

  If we suppose `i = 1`, we are being asked to zero-out the middle row, i.e.
  orthogonalize with respect to the coordinate vector `e_1 = [0, 1, 0]^T`. We
  can do so by picking `j = argmax(mat[i, :])`, so `j = 0` in this case. Then,
  compute the appropriate weights that would zero out the row, i.e.
  `w=[1, 0.5, 1]` and subtract `mat[:, j:j+1] * w = [2, 4, 6]^T * [1, .5, 1]`.
  This yields the intermediate:

  ```none
  [ 2  4  7 ]   [ 2  1  2 ]   [ 0  3  5 ]
  [ 4  2  4 ] - [ 4  2  4 ] = [ 0  0  0 ]
  [ 6  6  3 ]   [ 6  3  6 ]   [ 0  3 -3 ]
  ```

  We rotate the zero column to the end, and finally return the result of
  applying Gram-Schmidt orthogonalization, i.e.

  ```none
  [ sqrt(.5)  sqrt(.5) 0 ]
  [     0        0     0 ]
  [ sqrt(.5) -sqrt(.5) 0 ]
  ```

  Args:
    vectors: A Tensor of vectors of shape `[..., d, n]` we are orthogonalizing.
    i: The coordinate (against dimension `d`) w.r.t. which we orthogonalize.
    gram_schmidt_iters: Number of iterations of Gram-Schmidt orthonormalization
      to run, generally `n_vectors - iter_num`. Since each iteration of sampling
      reduces the number of nonzero columns by one (in the `n` dim), this allows
      us to save iterations of orthonormalization work.

  Returns:
    orthogonal: A Tensor of shape `[..., d, n]` representing the subspace
      spanned by `vectors` that is orthogonal to `e_i`, the `i`-th coordinate
      vector. The tensor is orthonormalized. It contains at least one more zero
      row (`i`) and zero column than the input vectors (exactly one more if all
      nonzero columns of `vectors` are linearly independent).
  """
    i = tf.convert_to_tensor(i, dtype_hint=tf.int32)
    row_i = tf.gather(vectors, i, axis=-2, batch_dims=len(i.shape))
    j = tf.argmax(tf.abs(row_i), axis=-1)  # Max for numerical stability.
    col_j = tf.gather(vectors, j, axis=-1, batch_dims=len(j.shape))
    val_i_j = tf.gather(row_i, j, axis=-1, batch_dims=len(j.shape))
    weights = row_i / val_i_j[..., tf.newaxis]
    delta = weights[..., tf.newaxis, :] * col_j[..., :, tf.newaxis]
    result = (vectors - delta)
    # Rotate the new zero column to the end.
    d = ps.shape(vectors)[-2]
    n = ps.shape(vectors)[-1]
    mask_d = tf.not_equal(tf.range(d, dtype=i.dtype),
                          i[..., tf.newaxis])[..., tf.newaxis]
    shift_indices = tf.range(n, dtype=j.dtype)
    shift_indices = shift_indices + tf.cast(
        shift_indices >= j[..., tf.newaxis], j.dtype)
    shift_indices = tf.where(shift_indices >= tf.cast(n, j.dtype),
                             j[..., tf.newaxis], shift_indices)
    result = tf.gather(result,
                       shift_indices,
                       axis=-1,
                       batch_dims=len(shift_indices.shape) - 1)
    mask_n = tf.not_equal(tf.range(n), n - 1)
    result = tf.where(mask_d & mask_n, result, 0)  # Make exactly zero.
    # Orthonormalize. This is equivalent, but faster than tf.linalg.qr(result).q
    return tfp_math.gram_schmidt(result, gram_schmidt_iters)
Ejemplo n.º 15
0
def assign_and_sample_proposals(proposed_boxes,
                                gt_boxes,
                                gt_classes,
                                num_samples_per_image=512,
                                mix_gt_boxes=True,
                                fg_fraction=0.25,
                                fg_iou_thresh=0.5,
                                bg_iou_thresh_hi=0.5,
                                bg_iou_thresh_lo=0.0):
    """Assigns the proposals with groundtruth classes and performs subsmpling.

  Given `proposed_boxes`, `gt_boxes`, and `gt_classes`, the function uses the
  following algorithm to generate the final `num_samples_per_image` RoIs.
    1. Calculates the IoU between each proposal box and each gt_boxes.
    2. Assigns each proposed box with a groundtruth class and box by choosing
       the largest IoU overlap.
    3. Samples `num_samples_per_image` boxes from all proposed boxes, and
       returns box_targets, class_targets, and RoIs.

  Args:
    proposed_boxes: a tensor of shape of [batch_size, N, 4]. N is the number
      of proposals before groundtruth assignment. The last dimension is the
      box coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax]
      format.
    gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4].
      The coordinates of gt_boxes are in the pixel coordinates of the scaled
      image. This tensor might have padding of values -1 indicating the invalid
      box coordinates.
    gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
      tensor might have paddings with values of -1 indicating the invalid
      classes.
    num_samples_per_image: a integer represents RoI minibatch size per image.
    mix_gt_boxes: a bool indicating whether to mix the groundtruth boxes before
      sampling proposals.
    fg_fraction: a float represents the target fraction of RoI minibatch that
      is labeled foreground (i.e., class > 0).
    fg_iou_thresh: a float represents the IoU overlap threshold for an RoI to be
      considered foreground (if >= fg_iou_thresh).
    bg_iou_thresh_hi: a float represents the IoU overlap threshold for an RoI to
      be considered background (class = 0 if overlap in [LO, HI)).
    bg_iou_thresh_lo: a float represents the IoU overlap threshold for an RoI to
      be considered background (class = 0 if overlap in [LO, HI)).

  Returns:
    sampled_rois: a tensor of shape of [batch_size, K, 4], representing the
      coordinates of the sampled RoIs, where K is the number of the sampled
      RoIs, i.e. K = num_samples_per_image.
    sampled_gt_boxes: a tensor of shape of [batch_size, K, 4], storing the
      box coordinates of the matched groundtruth boxes of the samples RoIs.
    sampled_gt_classes: a tensor of shape of [batch_size, K], storing the
      classes of the matched groundtruth boxes of the sampled RoIs.
    sampled_gt_indices: a tensor of shape of [batch_size, K], storing the
      indices of the sampled groudntruth boxes in the original `gt_boxes`
      tensor, i.e. gt_boxes[sampled_gt_indices[:, i]] = sampled_gt_boxes[:, i].
  """

    with tf.name_scope('sample_proposals'):
        if mix_gt_boxes:
            boxes = tf.concat([proposed_boxes, gt_boxes], axis=1)
        else:
            boxes = proposed_boxes

        (matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou,
         _) = box_matching(boxes, gt_boxes, gt_classes)

        positive_match = tf.greater(matched_iou, fg_iou_thresh)
        negative_match = tf.logical_and(
            tf.greater_equal(matched_iou, bg_iou_thresh_lo),
            tf.less(matched_iou, bg_iou_thresh_hi))
        ignored_match = tf.less(matched_iou, 0.0)

        # re-assign negatively matched boxes to the background class.
        matched_gt_classes = tf.where(negative_match,
                                      tf.zeros_like(matched_gt_classes),
                                      matched_gt_classes)
        matched_gt_indices = tf.where(negative_match,
                                      tf.zeros_like(matched_gt_indices),
                                      matched_gt_indices)

        sample_candidates = tf.logical_and(
            tf.logical_or(positive_match, negative_match),
            tf.logical_not(ignored_match))

        sampler = (
            balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
                positive_fraction=fg_fraction, is_static=True))

        batch_size, _ = sample_candidates.get_shape().as_list()
        sampled_indicators = []
        for i in range(batch_size):
            sampled_indicator = sampler.subsample(sample_candidates[i],
                                                  num_samples_per_image,
                                                  positive_match[i])
            sampled_indicators.append(sampled_indicator)
        sampled_indicators = tf.stack(sampled_indicators)
        _, sampled_indices = tf.nn.top_k(tf.cast(sampled_indicators,
                                                 dtype=tf.int32),
                                         k=num_samples_per_image,
                                         sorted=True)

        sampled_indices_shape = tf.shape(sampled_indices)
        batch_indices = (
            tf.expand_dims(tf.range(sampled_indices_shape[0]), axis=-1) *
            tf.ones([1, sampled_indices_shape[-1]], dtype=tf.int32))
        gather_nd_indices = tf.stack([batch_indices, sampled_indices], axis=-1)

        sampled_rois = tf.gather_nd(boxes, gather_nd_indices)
        sampled_gt_boxes = tf.gather_nd(matched_gt_boxes, gather_nd_indices)
        sampled_gt_classes = tf.gather_nd(matched_gt_classes,
                                          gather_nd_indices)
        sampled_gt_indices = tf.gather_nd(matched_gt_indices,
                                          gather_nd_indices)

        return (sampled_rois, sampled_gt_boxes, sampled_gt_classes,
                sampled_gt_indices)
Ejemplo n.º 16
0
 def _sample_all():
     return tf.range(0, num_steps, dtype=tf.int64)
Ejemplo n.º 17
0
def sample_and_crop_foreground_masks(candidate_rois,
                                     candidate_gt_boxes,
                                     candidate_gt_classes,
                                     candidate_gt_indices,
                                     gt_masks,
                                     num_mask_samples_per_image=128,
                                     mask_target_size=28):
    """Samples and creates cropped foreground masks for training.

  Args:
    candidate_rois: a tensor of shape of [batch_size, N, 4], where N is the
      number of candidate RoIs to be considered for mask sampling. It includes
      both positive and negative RoIs. The `num_mask_samples_per_image` positive
      RoIs will be sampled to create mask training targets.
    candidate_gt_boxes: a tensor of shape of [batch_size, N, 4], storing the
      corresponding groundtruth boxes to the `candidate_rois`.
    candidate_gt_classes: a tensor of shape of [batch_size, N], storing the
      corresponding groundtruth classes to the `candidate_rois`. 0 in the tensor
      corresponds to the background class, i.e. negative RoIs.
    candidate_gt_indices: a tensor of shape [batch_size, N], storing the
      corresponding groundtruth instance indices to the `candidate_gt_boxes`,
      i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i] and
      gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N, is the
      superset of candidate_gt_boxes.
    gt_masks: a tensor of [batch_size, MAX_INSTANCES, mask_height, mask_width]
      containing all the groundtruth masks which sample masks are drawn from.
    num_mask_samples_per_image: an integer which specifies the number of masks
      to sample.
    mask_target_size: an integer which specifies the final cropped mask size
      after sampling. The output masks are resized w.r.t the sampled RoIs.

  Returns:
    foreground_rois: a tensor of shape of [batch_size, K, 4] storing the RoI
      that corresponds to the sampled foreground masks, where
      K = num_mask_samples_per_image.
    foreground_classes: a tensor of shape of [batch_size, K] storing the classes
      corresponding to the sampled foreground masks.
    cropoped_foreground_masks: a tensor of shape of
      [batch_size, K, mask_target_size, mask_target_size] storing the cropped
      foreground masks used for training.
  """
    with tf.name_scope('sample_and_crop_foreground_masks'):
        _, fg_instance_indices = tf.nn.top_k(tf.cast(tf.greater(
            candidate_gt_classes, 0),
                                                     dtype=tf.int32),
                                             k=num_mask_samples_per_image)

        fg_instance_indices_shape = tf.shape(fg_instance_indices)
        batch_indices = (
            tf.expand_dims(tf.range(fg_instance_indices_shape[0]), axis=-1) *
            tf.ones([1, fg_instance_indices_shape[-1]], dtype=tf.int32))

        gather_nd_instance_indices = tf.stack(
            [batch_indices, fg_instance_indices], axis=-1)
        foreground_rois = tf.gather_nd(candidate_rois,
                                       gather_nd_instance_indices)
        foreground_boxes = tf.gather_nd(candidate_gt_boxes,
                                        gather_nd_instance_indices)
        foreground_classes = tf.gather_nd(candidate_gt_classes,
                                          gather_nd_instance_indices)
        foreground_gt_indices = tf.gather_nd(candidate_gt_indices,
                                             gather_nd_instance_indices)

        foreground_gt_indices_shape = tf.shape(foreground_gt_indices)
        batch_indices = (
            tf.expand_dims(tf.range(foreground_gt_indices_shape[0]), axis=-1) *
            tf.ones([1, foreground_gt_indices_shape[-1]], dtype=tf.int32))
        gather_nd_gt_indices = tf.stack([batch_indices, foreground_gt_indices],
                                        axis=-1)
        foreground_masks = tf.gather_nd(gt_masks, gather_nd_gt_indices)

        cropped_foreground_masks = spatial_transform_ops.crop_mask_in_target_box(
            foreground_masks, foreground_boxes, foreground_rois,
            mask_target_size)

        return foreground_rois, foreground_classes, cropped_foreground_masks
Ejemplo n.º 18
0
 def log_prob0(x):
     return tf.squeeze(
         tfd.Independent(tfd.Normal(tf.range(6, dtype=tf.float32),
                                    tf.constant(1.)),
                         reinterpreted_batch_ndims=1).log_prob(x))
Ejemplo n.º 19
0
 def proposal_log_prob(x):
     event_dims = tf.range(independent_chain_ndims, tf.rank(x))
     return -0.5 * tf.reduce_sum(x**2. + np.log(2 * np.pi),
                                 axis=event_dims)
Ejemplo n.º 20
0
def _effective_sample_size_single_state(states, filter_beyond_lag,
                                        filter_threshold,
                                        filter_beyond_positive_pairs,
                                        cross_chain_dims, validate_args):
    """ESS computation for one single Tensor argument."""

    with tf.name_scope('effective_sample_size_single_state'):

        states = tf.convert_to_tensor(states, name='states')
        dt = states.dtype

        # filter_beyond_lag == None ==> auto_corr is the full sequence.
        auto_cov = stats.auto_correlation(states,
                                          axis=0,
                                          max_lags=filter_beyond_lag,
                                          normalize=False)
        n = _axis_size(states, axis=0)

        if cross_chain_dims is not None:
            num_chains = _axis_size(states, cross_chain_dims)
            num_chains_ = tf.get_static_value(num_chains)

            assertions = []
            msg = (
                'When `cross_chain_dims` is not `None`, there must be > 1 chain '
                'in `states`.')
            if num_chains_ is not None:
                if num_chains_ < 2:
                    raise ValueError(msg)
            elif validate_args:
                assertions.append(
                    assert_util.assert_greater(num_chains, 1., message=msg))

            with tf.control_dependencies(assertions):
                # We're computing the R[k] from equation 10 of Vehtari et al.
                # (2019):
                #
                # R[k] := 1 - (W - 1/C * Sum_{c=1}^C s_c**2 R[k, c]) / (var^+),
                #
                # where:
                #   C := number of chains
                #   N := length of chains
                #   x_hat[c] := 1 / N Sum_{n=1}^N x[n, c], chain mean.
                #   x_hat := 1 / C Sum_{c=1}^C x_hat[c], overall mean.
                #   W := 1/C Sum_{c=1}^C s_c**2, within-chain variance.
                #   B := N / (C - 1) Sum_{c=1}^C (x_hat[c] - x_hat)**2, between chain
                #     variance.
                #   s_c**2 := 1 / (N - 1) Sum_{n=1}^N (x[n, c] - x_hat[c])**2, chain
                #       variance
                #   R[k, m] := auto_corr[k, m, ...], auto-correlation indexed by chain.
                #   var^+ := (N - 1) / N * W + B / N

                cross_chain_dims = ps.non_negative_axis(
                    cross_chain_dims, ps.rank(states))
                # B / N
                between_chain_variance_div_n = _reduce_variance(
                    tf.reduce_mean(states, axis=0),
                    biased=False,  # This makes the denominator be C - 1.
                    axis=cross_chain_dims - 1)
                # W * (N - 1) / N
                biased_within_chain_variance = tf.reduce_mean(
                    auto_cov[0], cross_chain_dims - 1)
                # var^+
                approx_variance = (biased_within_chain_variance +
                                   between_chain_variance_div_n)
                # 1/C * Sum_{c=1}^C s_c**2 R[k, c]
                mean_auto_cov = tf.reduce_mean(auto_cov, cross_chain_dims)
                auto_corr = 1. - (biased_within_chain_variance -
                                  mean_auto_cov) / approx_variance
        else:
            auto_corr = auto_cov / auto_cov[:1]
            num_chains = 1

        # With R[k] := auto_corr[k, ...],
        # ESS = N / {1 + 2 * Sum_{k=1}^N R[k] * (N - k) / N}
        #     = N / {-1 + 2 * Sum_{k=0}^N R[k] * (N - k) / N} (since R[0] = 1)
        #     approx N / {-1 + 2 * Sum_{k=0}^M R[k] * (N - k) / N}
        # where M is the filter_beyond_lag truncation point chosen above.

        # Get the factor (N - k) / N, and give it shape [M, 1,...,1], having total
        # ndims the same as auto_corr
        k = tf.range(0., _axis_size(auto_corr, axis=0))
        nk_factor = (n - k) / n
        if tensorshape_util.rank(auto_corr.shape) is not None:
            new_shape = [-1
                         ] + [1] * (tensorshape_util.rank(auto_corr.shape) - 1)
        else:
            new_shape = tf.concat(
                ([-1], tf.ones([tf.rank(auto_corr) - 1], dtype=tf.int32)),
                axis=0)
        nk_factor = tf.reshape(nk_factor, new_shape)
        weighted_auto_corr = nk_factor * auto_corr

        if filter_beyond_positive_pairs:

            def _sum_pairs(x):
                x_len = ps.shape(x)[0]
                # For odd sequences, we drop the final value.
                x = x[:x_len - x_len % 2]
                new_shape = ps.concat(
                    [[x_len // 2, 2], ps.shape(x)[1:]], axis=0)
                return tf.reduce_sum(tf.reshape(x, new_shape), 1)

            # Pairwise sums are all positive for auto-correlation spectra derived from
            # reversible MCMC chains.
            # E.g. imagine the pairwise sums are [0.2, 0.1, -0.1, -0.2]
            # Step 1: mask = [False, False, True, True]
            mask = _sum_pairs(auto_corr) < 0.
            # Step 2: mask = [0, 0, 1, 1]
            mask = tf.cast(mask, dt)
            # Step 3: mask = [0, 0, 1, 2]
            mask = tf.cumsum(mask, axis=0)
            # Step 4: mask = [1, 1, 0, 0]
            mask = tf.maximum(1. - mask, 0.)

            # N.B. this reduces the length of weighted_auto_corr by a factor of 2.
            # It still works fine in the formula below.
            weighted_auto_corr = _sum_pairs(weighted_auto_corr) * mask
        elif filter_threshold is not None:
            filter_threshold = tf.convert_to_tensor(filter_threshold,
                                                    dtype=dt,
                                                    name='filter_threshold')
            # Get a binary mask to zero out values of auto_corr below the threshold.
            #   mask[i, ...] = 1 if auto_corr[j, ...] > threshold for all j <= i,
            #   mask[i, ...] = 0, otherwise.
            # So, along dimension zero, the mask will look like [1, 1, ..., 0, 0,...]
            # Building step by step,
            #   Assume auto_corr = [1, 0.5, 0.0, 0.3], and filter_threshold = 0.2.
            # Step 1:  mask = [False, False, True, False]
            mask = auto_corr < filter_threshold
            # Step 2:  mask = [0, 0, 1, 0]
            mask = tf.cast(mask, dtype=dt)
            # Step 3:  mask = [0, 0, 1, 1]
            mask = tf.cumsum(mask, axis=0)
            # Step 4:  mask = [1, 1, 0, 0]
            mask = tf.maximum(1. - mask, 0.)
            weighted_auto_corr *= mask

        return num_chains * n / (-1 +
                                 2 * tf.reduce_sum(weighted_auto_corr, axis=0))
Ejemplo n.º 21
0
 def proposal_log_prob(x):
     counter['proposal_calls'] += 1
     event_dims = tf.range(independent_chain_ndims, tf.rank(x))
     return tf.reduce_sum(tfd.Normal(loc=0., scale=1.).log_prob(x),
                          axis=event_dims)
Ejemplo n.º 22
0
def segment_diff(x,
                 segment_ids,
                 order=1,
                 exclusive=False,
                 dtype=None,
                 name=None):
    """Computes difference of successive elements in a segment.

  For a complete description of segment_* ops see documentation of
  `tf.segment_max`. This op extends the `diff` functionality to segmented
  inputs.

  The behaviour of this op is the same as that of the op `diff` within each
  segment. The result is effectively a concatenation of the results of `diff`
  applied to each segment.

  ## Example

  ```python
    x = tf.constant([2, 5, 1, 7, 9] + [32, 10, 12, 3] + [4, 8, 5])
    segments = tf.constant([0, 0, 0, 0, 0] + [1, 1, 1, 1] + [2, 2, 2])
    # First order diff. Expected result: [3, -4, 6, 2, -22, 2, -9, 4, -3]
    dx1 = segment_diff(
        x, segment_ids=segments, order=1, exclusive=True)
    # Non-exclusive, second order diff.
    # Expected result: [2, 5, -1, 2, 8, 32, 10, -20, -7, 4, 8, 1]
    dx2 = segment_diff(
        x, segment_ids=segments, order=2, exclusive=False)
  ```

  Args:
    x: A rank 1 `Tensor` of any dtype for which arithmetic operations are
      permitted.
    segment_ids: A `Tensor`. Must be one of the following types: int32, int64. A
      1-D tensor whose size is equal to the size of `x`. Values should be sorted
      and can be repeated.
    order: Positive Python int. The order of the difference to compute. `order =
      1` corresponds to the difference between successive elements.
      Default value: 1
    exclusive: Python bool. See description above.
      Default value: False
    dtype: Optional `tf.Dtype`. If supplied, the dtype for `x` to use when
      converting to `Tensor`.
      Default value: None which maps to the default dtype inferred by TF.
    name: Python `str` name prefixed to Ops created by this class.
      Default value: None which is mapped to the default name 'segment_diff'.

  Returns:
    diffs: A `Tensor` of the same dtype as `x`. Assuming that each segment is
      of length greater than or equal to order, if `exclusive` is True,
      then the size is `n-order*k` where `n` is the size of x,
      `k` is the number of different segment ids supplied if `segment_ids` is
      not None or 1 if `segment_ids` is None. If any of the segments is of
      length less than the order, then the size is:
      `n-sum(min(order, length(segment_j)), j)` where the sum is over segments.
      If `exclusive` is False, then the size is `n`.
  """
    with tf.compat.v1.name_scope(name, default_name='segment_diff',
                                 values=[x]):
        x = tf.convert_to_tensor(x, dtype=dtype)
        raw_diffs = diff_ops.diff(x, order=order, exclusive=exclusive)
        if segment_ids is None:
            return raw_diffs
        # If segment ids are supplied, raw_diffs are incorrect at locations:
        # p, p+1, ... min(p+order-1, m_p-1) where p is the index of the first
        # element of a segment other than the very first segment (which is
        # already correct). m_p is the segment length.
        # Find positions where the segments begin.
        has_segment_changed = tf.concat(
            [[False],
             tf.not_equal(segment_ids[1:] - segment_ids[:-1], 0)],
            axis=0)
        # Shape [k, 1]
        segment_start_index = tf.cast(tf.where(has_segment_changed),
                                      dtype=tf.int32)
        segment_end_index = tf.concat([
            tf.reshape(segment_start_index, [-1])[1:], [tf.size(segment_ids)]
        ],
                                      axis=0)
        segment_end_index = tf.reshape(segment_end_index, [-1, 1])
        # The indices of locations that need to be adjusted. This needs to be
        # constructed in steps. First we generate p, p+1, ... p+order-1.
        # Shape [num_segments-1, order]
        fix_indices = (segment_start_index +
                       tf.range(order, dtype=segment_start_index.dtype))
        in_bounds = tf.where(fix_indices < segment_end_index)
        # Keep only the ones in bounds.
        fix_indices = tf.reshape(tf.gather_nd(fix_indices, in_bounds), [-1, 1])

        needs_fix = tf.scatter_nd(
            fix_indices,
            # Unfortunately, scatter_nd doesn't support bool on GPUs so we need to
            # do ints here and then convert to bool.
            tf.reshape(tf.ones_like(fix_indices, dtype=tf.int32), [-1]),
            shape=tf.shape(x))
        # If exclusive is False, then needs_fix means we need to replace the values
        # in raw_diffs at those locations with the values in x.
        needs_fix = tf.cast(needs_fix, dtype=tf.bool)
        if not exclusive:
            return tf.where(needs_fix, x, raw_diffs)

        # If exclusive is True, we have to be more careful. The raw_diffs
        # computation has removed the first 'order' elements. After removing the
        # corresponding elements from needs_fix, we use it to remove the elements
        # from raw_diffs.
        return tf.boolean_mask(raw_diffs, tf.logical_not(needs_fix[order:]))
Ejemplo n.º 23
0
def covariance(x,
               y=None,
               sample_axis=0,
               event_axis=-1,
               keepdims=False,
               name=None):
    """Sample covariance between observations indexed by `event_axis`.

  Given `N` samples of scalar random variables `X` and `Y`, covariance may be
  estimated as

  ```none
  Cov[X, Y] := N^{-1} sum_{n=1}^N (X_n - Xbar) Conj{(Y_n - Ybar)}
  Xbar := N^{-1} sum_{n=1}^N X_n
  Ybar := N^{-1} sum_{n=1}^N Y_n
  ```

  For vector-variate random variables `X = (X1, ..., Xd)`, `Y = (Y1, ..., Yd)`,
  one is often interested in the covariance matrix, `C_{ij} := Cov[Xi, Yj]`.

  ```python
  x = tf.random_normal(shape=(100, 2, 3))
  y = tf.random_normal(shape=(100, 2, 3))

  # cov[i, j] is the sample covariance between x[:, i, j] and y[:, i, j].
  cov = tfp.stats.covariance(x, y, sample_axis=0, event_axis=None)

  # cov_matrix[i, m, n] is the sample covariance of x[:, i, m] and y[:, i, n]
  cov_matrix = tfp.stats.covariance(x, y, sample_axis=0, event_axis=-1)
  ```

  Notice we divide by `N` (the numpy default), which does not create `NaN`
  when `N = 1`, but is slightly biased.

  Args:
    x:  A numeric `Tensor` holding samples.
    y:  Optional `Tensor` with same `dtype` and `shape` as `x`.
      Default value: `None` (`y` is effectively set to `x`).
    sample_axis: Scalar or vector `Tensor` designating axis holding samples, or
      `None` (meaning all axis hold samples).
      Default value: `0` (leftmost dimension).
    event_axis:  Scalar or vector `Tensor`, or `None` (scalar events).
      Axis indexing random events, whose covariance we are interested in.
      If a vector, entries must form a contiguous block of dims. `sample_axis`
      and `event_axis` should not intersect.
      Default value: `-1` (rightmost axis holds events).
    keepdims:  Boolean.  Whether to keep the sample axis as singletons.
    name: Python `str` name prefixed to Ops created by this function.
          Default value: `None` (i.e., `'covariance'`).

  Returns:
    cov: A `Tensor` of same `dtype` as the `x`, and rank equal to
      `rank(x) - len(sample_axis) + 2 * len(event_axis)`.

  Raises:
    AssertionError:  If `x` and `y` are found to have different shape.
    ValueError:  If `sample_axis` and `event_axis` are found to overlap.
    ValueError:  If `event_axis` is found to not be contiguous.
  """

    with tf.name_scope(name or 'covariance'):
        x = tf.convert_to_tensor(x, name='x')
        # Covariance *only* uses the centered versions of x (and y).
        x -= tf.reduce_mean(x, axis=sample_axis, keepdims=True)

        if y is None:
            y = x
        else:
            y = tf.convert_to_tensor(y, name='y', dtype=x.dtype)
            # If x and y have different shape, sample_axis and event_axis will likely
            # be wrong for one of them!
            tensorshape_util.assert_is_compatible_with(x.shape, y.shape)
            y -= tf.reduce_mean(y, axis=sample_axis, keepdims=True)

        if event_axis is None:
            return tf.reduce_mean(x * tf.math.conj(y),
                                  axis=sample_axis,
                                  keepdims=keepdims)

        if sample_axis is None:
            raise ValueError(
                'sample_axis was None, which means all axis hold events, and this '
                'overlaps with event_axis ({})'.format(event_axis))

        event_axis = _make_positive_axis(event_axis, tf.rank(x))
        sample_axis = _make_positive_axis(sample_axis, tf.rank(x))

        # If we get lucky and axis is statically defined, we can do some checks.
        if _is_list_like(event_axis) and _is_list_like(sample_axis):
            if set(event_axis).intersection(sample_axis):
                raise ValueError(
                    'sample_axis ({}) and event_axis ({}) overlapped'.format(
                        sample_axis, event_axis))
            if (np.diff(sorted(event_axis)) > 1).any():
                raise ValueError(
                    'event_axis must be contiguous. Found: {}'.format(
                        event_axis))
            batch_axis = list(
                sorted(
                    set(range(tensorshape_util.rank(
                        x.shape))).difference(sample_axis + event_axis)))
        else:
            batch_axis = prefer_static.setdiff1d(
                tf.range(0, tf.rank(x)), tf.concat((sample_axis, event_axis),
                                                   0))

        event_axis = tf.convert_to_tensor(event_axis,
                                          name='event_axis',
                                          dtype=tf.int32)
        sample_axis = tf.convert_to_tensor(sample_axis,
                                           name='sample_axis',
                                           dtype=tf.int32)
        batch_axis = tf.convert_to_tensor(batch_axis,
                                          name='batch_axis',
                                          dtype=tf.int32)

        # Permute x/y until shape = B + E + S
        perm_for_xy = tf.concat((batch_axis, event_axis, sample_axis), 0)
        x_permed = tf.transpose(a=x, perm=perm_for_xy)
        y_permed = tf.transpose(a=y, perm=perm_for_xy)

        batch_ndims = tf.size(batch_axis)
        batch_shape = tf.shape(x_permed)[:batch_ndims]
        event_ndims = tf.size(event_axis)
        event_shape = tf.shape(x_permed)[batch_ndims:batch_ndims + event_ndims]
        sample_shape = tf.shape(x_permed)[batch_ndims + event_ndims:]
        sample_ndims = tf.size(sample_shape)
        n_samples = tf.reduce_prod(sample_shape)
        n_events = tf.reduce_prod(event_shape)

        # Flatten sample_axis into one long dim.
        x_permed_flat = tf.reshape(
            x_permed, tf.concat((batch_shape, event_shape, [n_samples]), 0))
        y_permed_flat = tf.reshape(
            y_permed, tf.concat((batch_shape, event_shape, [n_samples]), 0))
        # Do the same for event_axis.
        x_permed_flat = tf.reshape(
            x_permed, tf.concat((batch_shape, [n_events], [n_samples]), 0))
        y_permed_flat = tf.reshape(
            y_permed, tf.concat((batch_shape, [n_events], [n_samples]), 0))

        # After matmul, cov.shape = batch_shape + [n_events, n_events]
        cov = tf.matmul(x_permed_flat, y_permed_flat,
                        adjoint_b=True) / tf.cast(n_samples, x.dtype)

        # Insert some singletons to make
        # cov.shape = batch_shape + event_shape**2 + [1,...,1]
        # This is just like x_permed.shape, except the sample_axis is all 1's, and
        # the [n_events] became event_shape**2.
        cov = tf.reshape(
            cov,
            tf.concat(
                (
                    batch_shape,
                    # event_shape**2 used here because it is the same length as
                    # event_shape, and has the same number of elements as one
                    # batch of covariance.
                    event_shape**2,
                    tf.ones([sample_ndims], tf.int32)),
                0))
        # Permuting by the argsort inverts the permutation, making
        # cov.shape have ones in the position where there were samples, and
        # [n_events * n_events] in the event position.
        cov = tf.transpose(a=cov, perm=tf.math.invert_permutation(perm_for_xy))

        # Now expand event_shape**2 into event_shape + event_shape.
        # We here use (for the first time) the fact that we require event_axis to be
        # contiguous.
        e_start = event_axis[0]
        e_len = 1 + event_axis[-1] - event_axis[0]
        cov = tf.reshape(
            cov,
            tf.concat((tf.shape(cov)[:e_start], event_shape, event_shape,
                       tf.shape(cov)[e_start + e_len:]), 0))

        # tf.squeeze requires python ints for axis, not Tensor.  This is enough to
        # require our axis args to be constants.
        if not keepdims:
            squeeze_axis = tf.where(sample_axis < e_start, sample_axis,
                                    sample_axis + e_len)
            cov = _squeeze(cov, axis=squeeze_axis)

        return cov
Ejemplo n.º 24
0
def _compute_log_acceptance_correction(current_state_parts,
                                       proposed_state_parts,
                                       current_volatility_parts,
                                       proposed_volatility_parts,
                                       current_drift_parts,
                                       proposed_drift_parts,
                                       step_size_parts,
                                       independent_chain_ndims,
                                       name=None):
    r"""Helper to `kernel` which computes the log acceptance-correction.

  Computes `log_acceptance_correction` as described in `MetropolisHastings`
  class. The proposal density is normal. More specifically,

   ```none
  q(proposed_state | current_state) \sim N(current_state + current_drift,
  step_size * current_volatility**2)

  q(current_state | proposed_state) \sim N(proposed_state + proposed_drift,
  step_size * proposed_volatility**2)
  ```

  The `log_acceptance_correction` is then

  ```none
  log_acceptance_correctio = q(current_state | proposed_state)
  - q(proposed_state | current_state)
  ```

  Args:
    current_state_parts: Python `list` of `Tensor`s representing the value(s) of
      the current state of the chain.
    proposed_state_parts:  Python `list` of `Tensor`s representing the value(s)
      of the proposed state of the chain. Must broadcast with the shape of
      `current_state_parts`.
    current_volatility_parts: Python `list` of `Tensor`s representing the value
      of `volatility_fn(*current_volatility_parts)`. Must broadcast with the
      shape of `current_state_parts`.
    proposed_volatility_parts: Python `list` of `Tensor`s representing the value
      of `volatility_fn(*proposed_volatility_parts)`. Must broadcast with the
      shape of `current_state_parts`
    current_drift_parts: Python `list` of `Tensor`s representing value of the
      drift `_get_drift(*current_state_parts, ..)`. Must broadcast with the
      shape of `current_state_parts`.
    proposed_drift_parts: Python `list` of `Tensor`s representing value of the
      drift `_get_drift(*proposed_drift_parts, ..)`. Must broadcast with the
      shape of `current_state_parts`.
    step_size_parts: Python `list` of `Tensor`s representing the step size for
      Euler-Maruyama method. Must broadcast with the shape of
      `current_state_parts`.
    independent_chain_ndims: Scalar `int` `Tensor` representing the number of
      leftmost `Tensor` dimensions which index independent chains.
    name: Python `str` name prefixed to Ops created by this function.
      Default value: `None` (i.e., 'compute_log_acceptance_correction').

  Returns:
    log_acceptance_correction: `Tensor` representing the `log`
      acceptance-correction.  (See docstring for mathematical definition.)
  """

    with tf.name_scope(name or 'compute_log_acceptance_correction'):

        proposed_log_density_parts = []
        dual_log_density_parts = []

        for [
                current_state,
                proposed_state,
                current_volatility,
                proposed_volatility,
                current_drift,
                proposed_drift,
                step_size,
        ] in zip(
                current_state_parts,
                proposed_state_parts,
                current_volatility_parts,
                proposed_volatility_parts,
                current_drift_parts,
                proposed_drift_parts,
                step_size_parts,
        ):
            axis = tf.range(independent_chain_ndims, tf.rank(current_state))

            state_diff = proposed_state - current_state

            current_volatility *= tf.sqrt(step_size)

            proposed_energy = (state_diff - current_drift) / current_volatility

            proposed_volatility *= tf.sqrt(step_size)
            # Compute part of `q(proposed_state | current_state)`
            proposed_energy = (tf.reduce_sum(mcmc_util.safe_sum(
                [tf.math.log(current_volatility), 0.5 * (proposed_energy**2)]),
                                             axis=axis))
            proposed_log_density_parts.append(-proposed_energy)

            # Compute part of `q(current_state | proposed_state)`
            dual_energy = (state_diff + proposed_drift) / proposed_volatility
            dual_energy = (tf.reduce_sum(mcmc_util.safe_sum(
                [tf.math.log(proposed_volatility), 0.5 * (dual_energy**2)]),
                                         axis=axis))
            dual_log_density_parts.append(-dual_energy)

        # Compute `q(proposed_state | current_state)`
        proposed_log_density_reduce = tf.add_n(proposed_log_density_parts)
        # Compute `q(current_state | proposed_state)`
        dual_log_density_reduce = tf.add_n(dual_log_density_parts)

        return mcmc_util.safe_sum(
            [dual_log_density_reduce, -proposed_log_density_reduce])
Ejemplo n.º 25
0
def one_step_predictive(model,
                        posterior_samples,
                        num_forecast_steps=0,
                        original_mean=0.,
                        original_scale=1.,
                        thin_every=10):
    """Constructs a one-step-ahead predictive distribution at every timestep.

  Unlike the generic `tfp.sts.one_step_predictive`, this method uses the
  latent levels from Gibbs sampling to efficiently construct a predictive
  distribution that mixes over posterior samples. The predictive distribution
  may also include additional forecast steps.

  This method returns the predictive distributions for each timestep given
  previous timesteps and sampled model parameters, `p(observed_time_series[t] |
  observed_time_series[:t], weights, observation_noise_scale)`. Note that the
  posterior values of the weights and noise scale will in general be informed
  by observations from all timesteps *including the step being predicted*, so
  this is not a strictly kosher probabilistic quantity, but in general we assume
  that it's close, i.e., that the step being predicted had very small individual
  impact on the overall parameter posterior.

  Args:
    model: A `tfd.sts.StructuralTimeSeries` model instance. This must be of the
      form constructed by `build_model_for_gibbs_sampling`.
    posterior_samples: A `GibbsSamplerState` instance in which each element is a
      `Tensor` with initial dimension of size `num_samples`.
    num_forecast_steps: Python `int` number of additional forecast steps to
      append.
      Default value: `0`.
    original_mean: Optional scalar float `Tensor`, added to the predictive
      distribution to undo the effect of input normalization.
      Default value: `0.`
    original_scale: Optional scalar float `Tensor`, used to rescale the
      predictive distribution to undo the effect of input normalization.
      Default value: `1.`
    thin_every: Optional Python `int` factor by which to thin the posterior
      samples, to reduce complexity of the predictive distribution. For example,
      if `thin_every=10`, every `10`th sample will be used.
      Default value: `10`.
  Returns:
    predictive_dist: A `tfd.MixtureSameFamily` instance of event shape
      `[num_timesteps + num_forecast_steps]` representing the predictive
      distribution of each timestep given previous timesteps.
  """
    dtype = dtype_util.common_dtype([
        posterior_samples.level_scale.dtype,
        posterior_samples.observation_noise_scale.dtype,
        posterior_samples.level.dtype, original_mean, original_scale
    ],
                                    dtype_hint=tf.float32)
    num_observed_steps = prefer_static.shape(posterior_samples.level)[-1]

    original_mean = tf.convert_to_tensor(original_mean, dtype=dtype)
    original_scale = tf.convert_to_tensor(original_scale, dtype=dtype)
    thinned_samples = tf.nest.map_structure(lambda x: x[::thin_every],
                                            posterior_samples)

    # The local level model expects that the level at step t+1 is equal
    # to the level at step t (plus transition noise of scale 'level_scale', which
    # we account for below).
    if num_forecast_steps > 0:
        num_batch_dims = prefer_static.rank_from_shape(
            prefer_static.shape(thinned_samples.level)) - 2
        forecast_level = tf.tile(
            thinned_samples.level[..., -1:],
            tf.concat([
                tf.ones([num_batch_dims + 1], dtype=tf.int32),
                [num_forecast_steps]
            ],
                      axis=0))
    level_pred = tf.concat(
        [
            thinned_samples.level[..., :1],  # t == 0
            thinned_samples.level[..., :-1]  # 1 <= t < T
        ] + ([forecast_level] if num_forecast_steps > 0 else []),
        axis=-1)

    design_matrix = _get_design_matrix(model).to_dense()[:num_observed_steps +
                                                         num_forecast_steps]
    regression_effect = tf.linalg.matvec(design_matrix,
                                         thinned_samples.weights)

    y_mean = (
        (level_pred + regression_effect) * original_scale[..., tf.newaxis] +
        original_mean[..., tf.newaxis])

    num_steps_from_last_observation = tf.concat([
        tf.ones([num_observed_steps], dtype=dtype),
        tf.range(1, num_forecast_steps + 1, dtype=dtype)
    ],
                                                axis=0)
    y_scale = (
        original_scale *
        tf.sqrt(thinned_samples.observation_noise_scale[..., tf.newaxis]**2 +
                thinned_samples.level_scale[..., tf.newaxis]**2 *
                num_steps_from_last_observation))

    num_posterior_draws = prefer_static.shape(y_mean)[0]
    return tfd.MixtureSameFamily(
        mixture_distribution=tfd.Categorical(
            logits=tf.zeros([num_posterior_draws], dtype=y_mean.dtype)),
        components_distribution=tfd.Normal(
            loc=dist_util.move_dimension(y_mean, 0, -1),
            scale=dist_util.move_dimension(y_scale, 0, -1)))
Ejemplo n.º 26
0
    def update_tables(self):
        """Updates integer-valued probability tables used by the range coder.

    These tables must not be re-generated independently on the sending and
    receiving side, since small numerical discrepancies between both sides can
    occur in this process. If the tables differ slightly, this in turn would
    very likely cause catastrophic error propagation during range decoding. For
    a more in-depth discussion of this, see:

    > "Integer Networks for Data Compression with Latent-Variable Models"<br />
    > J. Ballé, N. Johnston, D. Minnen<br />
    > https://openreview.net/forum?id=S1zz2i0cY7

    The tables are stored in `tf.Tensor`s as attributes of this object. The
    recommended way is to train the model, then call this method, and then
    distribute the model to a sender and a receiver.
    """
        offset = self.quantization_offset()
        lower_tail = self.lower_tail()
        upper_tail = self.upper_tail()

        # Largest distance observed between lower tail and median, and between
        # median and upper tail.
        minima = offset - lower_tail
        minima = tf.cast(tf.math.ceil(minima), tf.int32)
        minima = tf.math.maximum(minima, 0)
        maxima = upper_tail - offset
        maxima = tf.cast(tf.math.ceil(maxima), tf.int32)
        maxima = tf.math.maximum(maxima, 0)

        # PMF starting positions and lengths.
        pmf_start = offset - tf.cast(minima, self.dtype)
        pmf_length = maxima + minima + 1

        # Sample the densities in the computed ranges, possibly computing more
        # samples than necessary at the upper end.
        max_length = tf.math.reduce_max(pmf_length)
        if max_length > 2048:
            logging.warning(
                "Very wide PMF with %d elements may lead to out of memory issues. "
                "Consider encoding distributions with smaller dispersion or "
                "increasing `tail_mass` parameter.", int(max_length))
        samples = tf.range(tf.cast(max_length, self.dtype), dtype=self.dtype)
        samples = tf.reshape(samples,
                             [-1] + self.distribution.batch_shape.rank * [1])
        samples += pmf_start
        pmf = self.distribution.prob(samples)

        # Collapse batch dimensions of distribution.
        pmf = tf.reshape(pmf, [max_length, -1])
        pmf = tf.transpose(pmf)

        dist_shape = self.distribution.batch_shape_tensor()
        pmf_length = tf.broadcast_to(pmf_length, dist_shape)
        pmf_length = tf.reshape(pmf_length, [-1])
        cdf_length = pmf_length + 2
        cdf_offset = tf.broadcast_to(-minima, dist_shape)
        cdf_offset = tf.reshape(cdf_offset, [-1])

        # Prevent tensors from bouncing back and forth between host and GPU.
        with tf.device("/cpu:0"):

            def loop_body(args):
                prob, length = args
                prob = prob[:length]
                prob = tf.concat(
                    [prob, 1 - tf.reduce_sum(prob, keepdims=True)], axis=0)
                cdf = range_coding_ops.pmf_to_quantized_cdf(
                    prob, precision=self.range_coder_precision)
                return tf.pad(cdf, [[0, max_length - length]],
                              mode="CONSTANT",
                              constant_values=0)

            # TODO(jonycgn,ssjhv): Consider switching to Python control flow.
            cdf = tf.map_fn(loop_body, (pmf, pmf_length),
                            dtype=tf.int32,
                            name="pmf_to_cdf")

        self._cdf, self._cdf_offset, self._cdf_length = cdf, cdf_offset, cdf_length
Ejemplo n.º 27
0
    def _sample_n(self, n, seed=None):
        if self._use_static_graph:
            # This sampling approach is almost the same as the approach used by
            # `MixtureSameFamily`. The differences are due to having a list of
            # `Distribution` objects rather than a single object, and maintaining
            # random seed management that is consistent with the non-static code
            # path.
            samples = []
            cat_samples = self.cat.sample(n, seed=seed)
            stream = SeedStream(seed, salt='Mixture')

            for c in range(self.num_components):
                samples.append(self.components[c].sample(n, seed=stream()))
            stack_axis = -1 - tensorshape_util.rank(self._static_event_shape)
            x = tf.stack(samples, axis=stack_axis)  # [n, B, k, E]
            npdt = dtype_util.as_numpy_dtype(x.dtype)
            mask = tf.one_hot(
                indices=cat_samples,  # [n, B]
                depth=self._num_components,  # == k
                on_value=npdt(1),
                off_value=npdt(0))  # [n, B, k]
            mask = distribution_util.pad_mixture_dimensions(
                mask, self, self._cat,
                tensorshape_util.rank(
                    self._static_event_shape))  # [n, B, k, [1]*e]
            return tf.reduce_sum(x * mask, axis=stack_axis)  # [n, B, E]

        n = tf.convert_to_tensor(n, name='n')
        static_n = tf.get_static_value(n)
        n = int(static_n) if static_n is not None else n
        cat_samples = self.cat.sample(n, seed=seed)

        static_samples_shape = cat_samples.shape
        if tensorshape_util.is_fully_defined(static_samples_shape):
            samples_shape = tensorshape_util.as_list(static_samples_shape)
            samples_size = tensorshape_util.num_elements(static_samples_shape)
        else:
            samples_shape = tf.shape(cat_samples)
            samples_size = tf.size(cat_samples)
        static_batch_shape = self.batch_shape
        if tensorshape_util.is_fully_defined(static_batch_shape):
            batch_shape = tensorshape_util.as_list(static_batch_shape)
            batch_size = tensorshape_util.num_elements(static_batch_shape)
        else:
            batch_shape = tf.shape(cat_samples)[1:]
            batch_size = tf.reduce_prod(batch_shape)
        static_event_shape = self.event_shape
        if tensorshape_util.is_fully_defined(static_event_shape):
            event_shape = np.array(
                tensorshape_util.as_list(static_event_shape), dtype=np.int32)
        else:
            event_shape = None

        # Get indices into the raw cat sampling tensor. We will
        # need these to stitch sample values back out after sampling
        # within the component partitions.
        samples_raw_indices = tf.reshape(tf.range(0, samples_size),
                                         samples_shape)

        # Partition the raw indices so that we can use
        # dynamic_stitch later to reconstruct the samples from the
        # known partitions.
        partitioned_samples_indices = tf.dynamic_partition(
            data=samples_raw_indices,
            partitions=cat_samples,
            num_partitions=self.num_components)

        # Copy the batch indices n times, as we will need to know
        # these to pull out the appropriate rows within the
        # component partitions.
        batch_raw_indices = tf.reshape(tf.tile(tf.range(0, batch_size), [n]),
                                       samples_shape)

        # Explanation of the dynamic partitioning below:
        #   batch indices are i.e., [0, 1, 0, 1, 0, 1]
        # Suppose partitions are:
        #     [1 1 0 0 1 1]
        # After partitioning, batch indices are cut as:
        #     [batch_indices[x] for x in 2, 3]
        #     [batch_indices[x] for x in 0, 1, 4, 5]
        # i.e.
        #     [1 1] and [0 0 0 0]
        # Now we sample n=2 from part 0 and n=4 from part 1.
        # For part 0 we want samples from batch entries 1, 1 (samples 0, 1),
        # and for part 1 we want samples from batch entries 0, 0, 0, 0
        #   (samples 0, 1, 2, 3).
        partitioned_batch_indices = tf.dynamic_partition(
            data=batch_raw_indices,
            partitions=cat_samples,
            num_partitions=self.num_components)
        samples_class = [None for _ in range(self.num_components)]

        stream = SeedStream(seed, salt='Mixture')

        for c in range(self.num_components):
            n_class = tf.size(partitioned_samples_indices[c])
            samples_class_c = self.components[c].sample(n_class, seed=stream())

            if event_shape is None:
                batch_ndims = prefer_static.rank_from_shape(batch_shape)
                event_shape = tf.shape(samples_class_c)[1 + batch_ndims:]

            # Pull out the correct batch entries from each index.
            # To do this, we may have to flatten the batch shape.

            # For sample s, batch element b of component c, we get the
            # partitioned batch indices from
            # partitioned_batch_indices[c]; and shift each element by
            # the sample index. The final lookup can be thought of as
            # a matrix gather along locations (s, b) in
            # samples_class_c where the n_class rows correspond to
            # samples within this component and the batch_size columns
            # correspond to batch elements within the component.
            #
            # Thus the lookup index is
            #   lookup[c, i] = batch_size * s[i] + b[c, i]
            # for i = 0 ... n_class[c] - 1.
            lookup_partitioned_batch_indices = (
                batch_size * tf.range(n_class) + partitioned_batch_indices[c])
            samples_class_c = tf.reshape(
                samples_class_c,
                tf.concat([[n_class * batch_size], event_shape], 0))
            samples_class_c = tf.gather(samples_class_c,
                                        lookup_partitioned_batch_indices,
                                        name='samples_class_c_gather')
            samples_class[c] = samples_class_c

        # Stitch back together the samples across the components.
        lhs_flat_ret = tf.dynamic_stitch(indices=partitioned_samples_indices,
                                         data=samples_class)
        # Reshape back to proper sample, batch, and event shape.
        ret = tf.reshape(lhs_flat_ret,
                         tf.concat([samples_shape, event_shape], 0))
        tensorshape_util.set_shape(
            ret,
            tensorshape_util.concatenate(static_samples_shape,
                                         self.event_shape))
        return ret
def _generate_detections_per_image(boxes,
                                   scores,
                                   max_total_size=100,
                                   nms_iou_threshold=0.3,
                                   score_threshold=0.05,
                                   pre_nms_num_boxes=5000):
  """Generate the final detections per image given the model outputs.

  Args:
    boxes: a tensor with shape [N, num_classes, 4] or [N, 1, 4], which box
      predictions on all feature levels. The N is the number of total anchors on
      all levels.
    scores: a tensor with shape [N, num_classes], which stacks class probability
      on all feature levels. The N is the number of total anchors on all levels.
      The num_classes is the number of classes predicted by the model. Note that
      the class_outputs here is the raw score.
    max_total_size: a scalar representing maximum number of boxes retained over
      all classes.
    nms_iou_threshold: a float representing the threshold for deciding whether
      boxes overlap too much with respect to IOU.
    score_threshold: a float representing the threshold for deciding when to
      remove boxes based on score.
    pre_nms_num_boxes: an int number of top candidate detections per class
      before NMS.

  Returns:
    nms_boxes: `float` Tensor of shape [max_total_size, 4] representing top
      detected boxes in [y1, x1, y2, x2].
    nms_scores: `float` Tensor of shape [max_total_size] representing sorted
      confidence scores for detected boxes. The values are between [0, 1].
    nms_classes: `int` Tensor of shape [max_total_size] representing classes for
      detected boxes.
    valid_detections: `int` Tensor of shape [1] only the top `valid_detections`
      boxes are valid detections.
  """
  nmsed_boxes = []
  nmsed_scores = []
  nmsed_classes = []
  num_classes_for_box = boxes.get_shape().as_list()[1]
  num_classes = scores.get_shape().as_list()[1]
  for i in range(num_classes):
    boxes_i = boxes[:, min(num_classes_for_box-1, i)]
    scores_i = scores[:, i]

    # Obtains pre_nms_num_boxes before running NMS.
    scores_i, indices = tf.nn.top_k(
        scores_i, k=tf.minimum(tf.shape(input=scores_i)[-1], pre_nms_num_boxes))
    boxes_i = tf.gather(boxes_i, indices)

    (nmsed_indices_i,
     nmsed_num_valid_i) = tf.image.non_max_suppression_padded(
         tf.cast(boxes_i, tf.float32),
         tf.cast(scores_i, tf.float32),
         max_total_size,
         iou_threshold=nms_iou_threshold,
         score_threshold=score_threshold,
         pad_to_max_output_size=True,
         name='nms_detections_' + str(i))
    nmsed_boxes_i = tf.gather(boxes_i, nmsed_indices_i)
    nmsed_scores_i = tf.gather(scores_i, nmsed_indices_i)
    # Sets scores of invalid boxes to -1.
    nmsed_scores_i = tf.where(
        tf.less(tf.range(max_total_size), [nmsed_num_valid_i]), nmsed_scores_i,
        -tf.ones_like(nmsed_scores_i))
    nmsed_classes_i = tf.fill([max_total_size], i)
    nmsed_boxes.append(nmsed_boxes_i)
    nmsed_scores.append(nmsed_scores_i)
    nmsed_classes.append(nmsed_classes_i)
  # Concats results from all classes and sort them.
  nmsed_boxes = tf.concat(nmsed_boxes, axis=0)
  nmsed_scores = tf.concat(nmsed_scores, axis=0)
  nmsed_classes = tf.concat(nmsed_classes, axis=0)
  nmsed_scores, indices = tf.nn.top_k(
      nmsed_scores,
      k=max_total_size,
      sorted=True)
  nmsed_boxes = tf.gather(nmsed_boxes, indices)
  nmsed_classes = tf.gather(nmsed_classes, indices)
  valid_detections = tf.reduce_sum(
      input_tensor=tf.cast(tf.greater(nmsed_scores, -1), tf.int32))
  return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections
Ejemplo n.º 29
0
  def _setup(self, coupon_spec):
    """Setup tensors for efficient computations."""

    cpn_frequency = dates.PeriodTensor.stack(
        [x.coupon_frequency for x in coupon_spec], axis=0)
    cpn_dates, _ = self._generate_schedule(cpn_frequency,
                                           coupon_spec[-1].businessday_rule)
    cms_start_dates = cpn_dates[:, :-1]
    cms_term = dates.PeriodTensor.stack([x.tenor for x in coupon_spec], axis=0)

    cms_end_dates = cpn_dates[:, :-1] + cms_term.expand_dims(
        axis=-1).broadcast_to(cms_start_dates.shape)
    coupon_start_dates = cpn_dates[:, :-1]
    coupon_end_dates = cpn_dates[:, 1:]
    payment_dates = cpn_dates[:, 1:]

    daycount_fractions = rc.get_daycount_fraction(
        coupon_start_dates,
        coupon_end_dates,
        coupon_spec[-1].daycount_convention,
        dtype=self._dtype)

    notional = tf.repeat(
        tf.convert_to_tensor([x.notional for x in coupon_spec],
                             dtype=self._dtype),
        payment_dates.shape.as_list()[-1])

    coupon_basis = tf.repeat(tf.convert_to_tensor(
        [x.coupon_basis for x in coupon_spec], dtype=self._dtype),
                             payment_dates.shape.as_list()[-1])

    coupon_multiplier = tf.repeat(tf.convert_to_tensor(
        [x.coupon_multiplier for x in coupon_spec], dtype=self._dtype),
                                  payment_dates.shape.as_list()[-1])

    contract_index = tf.repeat(
        tf.range(0, len(coupon_spec)),
        payment_dates.shape.as_list()[-1])

    cms_fixed_leg = [x.fixed_leg for x in coupon_spec]
    cms_float_leg = [x.float_leg for x in coupon_spec]
    self._num_cashflows = daycount_fractions.shape.as_list()[-1]
    self._swap = irs.InterestRateSwap(
        cms_start_dates.reshape([-1]),
        cms_end_dates.reshape([-1]),
        list(itertools.chain.from_iterable(
            itertools.repeat(i, self._num_cashflows) for i in cms_fixed_leg)),
        list(itertools.chain.from_iterable(
            itertools.repeat(i, self._num_cashflows) for i in cms_float_leg)),
        dtype=self._dtype
        )
    self._coupon_start_dates = coupon_start_dates.reshape([-1])
    self._coupon_end_dates = coupon_end_dates.reshape([-1])
    self._payment_dates = payment_dates.reshape([-1])
    self._notional = notional
    self._daycount_fractions = tf.reshape(daycount_fractions, [-1])
    self._coupon_basis = coupon_basis
    self._coupon_multiplier = coupon_multiplier
    self._contract_index = contract_index

    # convexity related values
    def term_to_years(t):
      frac = tf.where(t.period_type() == dates.PeriodType.MONTH,
                      tf.constant(1. / 12., dtype=self._dtype),
                      tf.where(
                          t.period_type() == dates.PeriodType.YEAR,
                          tf.constant(1., dtype=self._dtype),
                          tf.constant(0., dtype=self._dtype)))
      return frac * tf.cast(t.quantity(), dtype=self._dtype)

    cms_fixed_leg_frequency = dates.PeriodTensor.stack(
        [x.fixed_leg.coupon_frequency for x in coupon_spec], axis=0)
    self._delta = term_to_years(cpn_frequency)
    self._tau = term_to_years(cms_fixed_leg_frequency)
    self._cms_periods = term_to_years(cms_term) / self._tau
Ejemplo n.º 30
0
    def _parse_train_data(self, data):
        """Parse data for ShapeMask training."""
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        masks = data['groundtruth_instance_masks']
        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training and self._is_training:
            num_groundtrtuhs = tf.shape(classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    tf.greater(tf.size(is_crowds), 0),
                    lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)
            masks = tf.gather(masks, indices)

        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(image)[0:2]

        # If not using category, makes all categories with id = 0.
        if not self._use_category:
            classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32)

        # Normalizes image with mean and std pixel values.
        image = input_utils.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, boxes, masks = input_utils.random_horizontal_flip(
                image, boxes, masks)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_utils.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = input_utils.resize_and_crop_image(
            image,
            self._output_size,
            self._output_size,
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_scale = image_info[2, :]
        offset = image_info[3, :]

        # Resizes and crops boxes and masks.
        boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
                                                  self._output_size, offset)

        # Filters out ground truth boxes that are all zeros.
        indices = input_utils.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        masks = tf.gather(masks, indices)

        # Assigns anchors.
        input_anchor = anchor.Anchor(self._min_level, self._max_level,
                                     self._num_scales, self._aspect_ratios,
                                     self._anchor_size, self._output_size)
        anchor_labeler = anchor.AnchorLabeler(input_anchor,
                                              self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets,
         num_positives) = anchor_labeler.label_anchors(
             boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32))

        # Sample groundtruth masks/boxes/classes for mask branch.
        num_masks = tf.shape(masks)[0]
        mask_shape = tf.shape(masks)[1:3]

        # Pad sampled boxes/masks/classes to a constant batch size.
        padded_boxes = input_utils.pad_to_fixed_size(boxes,
                                                     self._num_sampled_masks)
        padded_classes = input_utils.pad_to_fixed_size(classes,
                                                       self._num_sampled_masks)
        padded_masks = input_utils.pad_to_fixed_size(masks,
                                                     self._num_sampled_masks)

        # Randomly sample groundtruth masks for mask branch training. For the image
        # without groundtruth masks, it will sample the dummy padded tensors.
        rand_indices = tf.random.shuffle(
            tf.range(tf.maximum(num_masks, self._num_sampled_masks)))
        rand_indices = tf.math.mod(rand_indices, tf.maximum(num_masks, 1))
        rand_indices = rand_indices[0:self._num_sampled_masks]
        rand_indices = tf.reshape(rand_indices, [self._num_sampled_masks])

        sampled_boxes = tf.gather(padded_boxes, rand_indices)
        sampled_classes = tf.gather(padded_classes, rand_indices)
        sampled_masks = tf.gather(padded_masks, rand_indices)
        # Jitter the sampled boxes to mimic the noisy detections.
        sampled_boxes = box_utils.jitter_boxes(
            sampled_boxes, noise_scale=self._box_jitter_scale)
        sampled_boxes = box_utils.clip_boxes(sampled_boxes, self._output_size)
        # Compute mask targets in feature crop. A feature crop fully contains a
        # sampled box.
        mask_outer_boxes = box_utils.compute_outer_boxes(
            sampled_boxes, tf.shape(image)[0:2], scale=self._outer_box_scale)
        mask_outer_boxes = box_utils.clip_boxes(mask_outer_boxes,
                                                self._output_size)
        # Compensate the offset of mask_outer_boxes to map it back to original image
        # scale.
        mask_outer_boxes_ori = mask_outer_boxes
        mask_outer_boxes_ori += tf.tile(tf.expand_dims(offset, axis=0), [1, 2])
        mask_outer_boxes_ori /= tf.tile(tf.expand_dims(image_scale, axis=0),
                                        [1, 2])
        norm_mask_outer_boxes_ori = box_utils.normalize_boxes(
            mask_outer_boxes_ori, mask_shape)

        # Set sampled_masks shape to [batch_size, height, width, 1].
        sampled_masks = tf.cast(tf.expand_dims(sampled_masks, axis=-1),
                                tf.float32)
        mask_targets = tf.image.crop_and_resize(
            sampled_masks,
            norm_mask_outer_boxes_ori,
            box_indices=tf.range(self._num_sampled_masks),
            crop_size=[self._mask_crop_size, self._mask_crop_size],
            method='bilinear',
            extrapolation_value=0,
            name='train_mask_targets')
        mask_targets = tf.where(tf.greater_equal(mask_targets, 0.5),
                                tf.ones_like(mask_targets),
                                tf.zeros_like(mask_targets))
        mask_targets = tf.squeeze(mask_targets, axis=-1)
        if self._up_sample_factor > 1:
            fine_mask_targets = tf.image.crop_and_resize(
                sampled_masks,
                norm_mask_outer_boxes_ori,
                box_indices=tf.range(self._num_sampled_masks),
                crop_size=[
                    self._mask_crop_size * self._up_sample_factor,
                    self._mask_crop_size * self._up_sample_factor
                ],
                method='bilinear',
                extrapolation_value=0,
                name='train_mask_targets')
            fine_mask_targets = tf.where(
                tf.greater_equal(fine_mask_targets, 0.5),
                tf.ones_like(fine_mask_targets),
                tf.zeros_like(fine_mask_targets))
            fine_mask_targets = tf.squeeze(fine_mask_targets, axis=-1)
        else:
            fine_mask_targets = mask_targets

        # If bfloat16 is used, casts input image to tf.bfloat16.
        if self._use_bfloat16:
            image = tf.cast(image, dtype=tf.bfloat16)

        valid_image = tf.cast(tf.not_equal(num_masks, 0), tf.int32)
        if self._mask_train_class == 'all':
            mask_is_valid = valid_image * tf.ones_like(sampled_classes,
                                                       tf.int32)
        else:
            # Get the intersection of sampled classes with training splits.
            mask_valid_classes = tf.cast(
                tf.expand_dims(
                    class_utils.coco_split_class_ids(self._mask_train_class),
                    1), sampled_classes.dtype)
            match = tf.reduce_any(
                tf.equal(tf.expand_dims(sampled_classes, 0),
                         mask_valid_classes), 0)
            mask_is_valid = valid_image * tf.cast(match, tf.int32)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': input_anchor.multilevel_boxes,
            'num_positives': num_positives,
            'image_info': image_info,
            # For ShapeMask.
            'mask_boxes': sampled_boxes,
            'mask_outer_boxes': mask_outer_boxes,
            'mask_targets': mask_targets,
            'fine_mask_targets': fine_mask_targets,
            'mask_classes': sampled_classes,
            'mask_is_valid': mask_is_valid,
        }
        return image, labels