def prune_completely_outside_window(boxlist, window, scope=None):
    """Prunes bounding boxes that fall completely outside of the given window.

  The function clip_to_window prunes bounding boxes that fall
  completely outside the window, but also clips any bounding boxes that
  partially overflow. This function does not clip partially overflowing boxes.

  Args:
    boxlist: a BoxList holding M_in boxes.
    window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
      of the window
    scope: name scope.

  Returns:
    pruned_boxlist: a new BoxList with all bounding boxes partially or fully in
      the window.
    valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
     in the input tensor.
  """
    with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'):
        y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(),
                                              num_or_size_splits=4,
                                              axis=1)
        win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
        coordinate_violations = tf.concat([
            tf.greater_equal(y_min, win_y_max),
            tf.greater_equal(x_min, win_x_max),
            tf.less_equal(y_max, win_y_min),
            tf.less_equal(x_max, win_x_min)
        ], 1)
        valid_indices = tf.reshape(
            tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))),
            [-1])
        return gather(boxlist, valid_indices), valid_indices
Example #2
0
    def loop_body_fn(matrices, column):
        # shape: (dim, log_num_results)
        column_values = tf.gather(matrices, [column], axis=1)

        # shape: (dim, log_num_results)
        should_be_updated = tf.logical_and(
            # Columns whose index is smaller than the degree of the primitive
            # polynomial are obtained from direction numbers and should not be
            # updated.
            tf.less_equal(tf.math.maximum(degree, column + 1), indices),
            # During a given iteration, only the next `n` columns (where `n` is the
            # degree of the primitive polynomial) should be updated.
            tf.less_equal(indices, column + degree))

        # shape: (dim, log_num_results)
        updated_matrices = tf.bitwise.bitwise_xor(
            tf.where(tf.equal(indices, column + degree),
                     tf.bitwise.right_shift(column_values, degree), matrices),
            utils.filter_tensor(column_values, polynomial,
                                column + degree - indices))

        # shape: (dim, log_num_results)
        returned_matrices = tf.where(should_be_updated, updated_matrices,
                                     matrices)

        return (returned_matrices, column + 1)
Example #3
0
  def proposal(seed):
    """Proposal for log-concave rejection sampler."""
    (top_lobe_fractions_seed,
     exponential_samples_seed,
     top_selector_seed,
     rademacher_seed) = samplers.split_seed(
         seed, n=4, salt='log_concave_rejection_sampler_proposal')

    top_lobe_fractions = samplers.uniform(
        mode_shape, seed=top_lobe_fractions_seed, dtype=dtype)  # V in ref [1].
    top_offsets = top_lobe_fractions * top_width / mode_height

    exponential_samples = exponential_distribution.sample(
        mode_shape, seed=exponential_samples_seed)  # E in ref [1].
    exponential_height = (exponential_distribution.prob(exponential_samples) *
                          mode_height)
    exponential_offsets = (top_width + exponential_samples) / mode_height

    top_selector = samplers.uniform(
        mode_shape, seed=top_selector_seed, dtype=dtype)  # U in ref [1].
    on_top_mask = tf.less_equal(top_selector, top_fraction)

    unsigned_offsets = tf.where(on_top_mask, top_offsets, exponential_offsets)
    offsets = tf.round(
        tfp_random.rademacher(
            mode_shape, seed=rademacher_seed, dtype=dtype) *
        unsigned_offsets)

    potential_samples = mode + offsets
    envelope_height = tf.where(on_top_mask, mode_height, exponential_height)

    return potential_samples, envelope_height
Example #4
0
 def test_hz_to_midi_is_accurate(self):
   """Tests converting between MIDI values and their frequencies in hertz."""
   hz = np.linspace(0.0, 20000.0, 128)
   librosa_midi = librosa.hz_to_midi(hz)
   librosa_midi = tf.where(tf.less_equal(hz, 0.0), 0.0, librosa_midi)
   tf_midi = core.hz_to_midi(hz)
   self.assertAllClose(librosa_midi, tf_midi)
Example #5
0
def to_4d(image: tf.Tensor) -> tf.Tensor:
    """Converts an input Tensor to 4 dimensions.

  4D image => [N, H, W, C] or [N, C, H, W]
  3D image => [1, H, W, C] or [1, C, H, W]
  2D image => [1, H, W, 1]

  Args:
    image: The 2/3/4D input tensor.

  Returns:
    A 4D image tensor.

  Raises:
    `TypeError` if `image` is not a 2/3/4D tensor.

  """
    shape = tf.shape(image)
    original_rank = tf.rank(image)
    left_pad = tf.cast(tf.less_equal(original_rank, 3), dtype=tf.int32)
    right_pad = tf.cast(tf.equal(original_rank, 2), dtype=tf.int32)
    new_shape = tf.concat(
        [
            tf.ones(shape=left_pad, dtype=tf.int32),
            shape,
            tf.ones(shape=right_pad, dtype=tf.int32),
        ],
        axis=0,
    )
    return tf.reshape(image, new_shape)
Example #6
0
def from_4d(image: tf.Tensor, ndims: int) -> tf.Tensor:
    """Converts a 4D image back to `ndims` rank."""
    shape = tf.shape(image)
    begin = tf.cast(tf.less_equal(ndims, 3), dtype=tf.int32)
    end = 4 - tf.cast(tf.equal(ndims, 2), dtype=tf.int32)
    new_shape = shape[begin:end]
    return tf.reshape(image, new_shape)
Example #7
0
 def randomized_computation(seed):
   seed_stream = SeedStream(seed, 'batched_rejection_sampler')
   proposed_samples, proposed_values = proposal(seed_stream())
   good_samples_mask = tf.less_equal(
       proposed_values * tf.random.uniform(
           proposed_samples.shape, maxval=1., seed=seed_stream()),
       target(proposed_samples))
   return proposed_samples, good_samples_mask
Example #8
0
 def randomized_computation(seed):
   seed_stream = SeedStream(seed, 'batched_rejection_sampler')
   proposed_samples, proposed_values = proposal_fn(seed_stream())
   good_samples_mask = tf.less_equal(
       proposed_values * tf.random.uniform(
           prefer_static.shape(proposed_samples),
           seed=seed_stream(),
           dtype=dtype),
       target_fn(proposed_samples))
   return proposed_samples, good_samples_mask
Example #9
0
 def randomized_computation(seed):
   """Internal randomized computation."""
   proposal_seed, mask_seed = samplers.split_seed(
       seed, salt='batched_rejection_sampler')
   proposed_samples, proposed_values = proposal_fn(proposal_seed)
   good_samples_mask = tf.less_equal(
       proposed_values * samplers.uniform(
           prefer_static.shape(proposed_samples),
           seed=mask_seed,
           dtype=dtype),
       target_fn(proposed_samples))
   return proposed_samples, good_samples_mask
Example #10
0
        def maybe_update_alpha():
            """Maybe update the alpha param.

      Checks if global_step is between begin_compression_step and
      end_compression_step, and if the current training step is a
      compression step.

      Returns:
        Boolean tensor whether the training step is a compression step.
      """
            is_step_within_compression_range = tf.logical_and(
                tf.greater_equal(tf.cast(self._global_step, tf.int32),
                                 self._spec.begin_compression_step),
                tf.logical_or(
                    tf.less_equal(tf.cast(self._global_step, tf.int32),
                                  self._spec.end_compression_step),
                    tf.less(self._spec.end_compression_step, 0)))
            is_compression_step = tf.less_equal(
                tf.add(self.last_alpha_update_step,
                       self._spec.compression_frequency),
                tf.cast(self._global_step, tf.int32))
            return tf.logical_and(is_step_within_compression_range,
                                  is_compression_step)
 def _sample_control_dependencies(self, x):
   assertions = []
   if not self.validate_args:
     return assertions
   loc = tf.convert_to_tensor(self.loc)
   scale = tf.convert_to_tensor(self.scale)
   concentration = tf.convert_to_tensor(self.concentration)
   assertions.append(assert_util.assert_greater_equal(
       x, loc, message='Sample must be greater than or equal to `loc`.'))
   assertions.append(assert_util.assert_equal(
       tf.logical_or(tf.greater_equal(concentration, 0),
                     tf.less_equal(x, loc - scale / concentration)),
       True,
       message=('If `concentration < 0`, sample must be less than or '
                'equal to `loc - scale / concentration`.'),
       summarize=100))
   return assertions
Example #12
0
        def body_fn(i, partial, outputs):
            """Body function for while_loop.

      Args:
        i: integer scalar
        partial: dictionary of Tensor (partially-constructed example)
        outputs: dictionary of TensorArray

      Returns:
        A triple containing the new values of the inputs.
      """
            can_append = True
            one_example = {}
            for k in keys:
                val = tf.cast(x[k][i], tf.int32)
                val = val[:tf.
                          reduce_sum(tf.cast(tf.not_equal(val, 0), tf.int32))]
                one_example[k] = val
            for k in keys:
                can_append = tf.logical_and(
                    can_append,
                    tf.less_equal(
                        tf.size(partial[k]) + tf.size(one_example[k]),
                        length[k]))

            def false_fn():
                return write_packed_example(partial, outputs)

            def true_fn():
                return partial, outputs

            partial, outputs = tf.cond(can_append, true_fn, false_fn)
            new_partial = {}
            for k in keys:
                new_seq = one_example[k][:length[k]]
                new_seq_len = tf.size(new_seq)
                new_partial[k] = tf.concat([partial[k], new_seq], 0)
                if _annotate_key(k):
                    new_partial[k + '_position'] = tf.concat([
                        partial[k + '_position'],
                        tf.range(new_seq_len, dtype=tf.int32)
                    ], 0)
            partial = new_partial
            return i + 1, partial, outputs
    def represent(self, waves):
        """Transform waves into a representation suited for the DS2 encoder."""
        waves = tf.squeeze(waves, -1)

        # Re-scale.
        waves = waves / (tf.reduce_max(tf.abs(waves), axis=1, keepdims=True) +
                         1e-5)
        waves *= 32767
        # To match PSF the following line should be uncommented. But it's not
        # supported by TPUs.
        # waves = tf.cast(tf.cast(waves, tf.int16), waves.dtype)  # Matching PSF.

        # Determine frame and step sizes.
        window_size = int(self.sample_freq * self.window_size)
        window_step = int(self.sample_freq * self.window_step)

        # Compute STFT.
        fft_window = tf.signal.hann_window(window_size,
                                           periodic=False,
                                           dtype=waves.dtype)
        fft_window = tf.reshape(fft_window, [1, 1, window_size])

        frames = tf.signal.frame(waves, window_size, window_step, True)
        # Do the slow DFT matmul because window size generally will not be a power
        # of 2.
        dft_w = scipy.linalg.dft(window_size).astype(np.complex64)
        stft = tf.matmul(tf.cast(fft_window * frames, dft_w.dtype), dft_w)
        mag = tf.abs(stft) / float(window_size)
        mag = tf.where(tf.less_equal(mag, 1e-30),
                       tf.ones_like(mag) * 1e-30, mag)
        log_mag = 10. * tf.math.log(mag) / tf.math.log(10.)

        # Select features and standardize.
        features = log_mag[Ellipsis, :self.num_features]

        counts, means_ss, variance_ss, _ = tf.nn.sufficient_statistics(
            features, axes=[1, 2], keepdims=True)
        mean, variance = tf.nn.normalize_moments(counts, means_ss, variance_ss,
                                                 None)
        features = (features - mean) / tf.sqrt(variance)

        return features
Example #14
0
def get_shuffled_indices_and_labels(batch_size, num_samples, shuffle_fraction,
                                    num_steps):
  """Produce possibly shuffled indices and labels."""
  total_num_samples = batch_size * num_samples
  num_shuffled_examples = int(shuffle_fraction * total_num_samples)

  shuffle_labels = tf.random.shuffle(tf.cast(
      num_shuffled_examples*[1] +
      (total_num_samples - num_shuffled_examples) * [0], tf.int32))
  indices = tf.sort(random_choice_noreplace(
      total_num_samples, num_steps)[:, :5], axis=1)
  indices = randomly_reverse_indices(indices)
  shuffled_samples = tf.where(
      tf.less_equal(tf.random.uniform((total_num_samples, 1)), 0.5),
      tf.gather(indices, [1, 0, 3], axis=1),
      tf.gather(indices, [1, 4, 3], axis=1))
  ordered_samples = tf.gather(indices, [1, 2, 3], axis=1)
  indices = tf.where(tf.equal(tf.expand_dims(shuffle_labels, axis=-1), 1),
                     shuffled_samples, ordered_samples)

  return indices, shuffle_labels
Example #15
0
def sample_and_preprocess(video, frame_labels, seq_len, name):
    """Samples frames and prepares them for training."""

    # STEP 0: DECIDE NUMBER OF FRAMES TO SAMPLE AND AUGMENTATION STRATEGY
    # ACCORDING TO MODE (i.e. train vs test/val)
    if CONFIG.MODE == 'train':
        augment = True
        offset = 1
        max_num_steps = CONFIG.TRAIN.NUM_FRAMES
        sampling_strategy = CONFIG.DATA.SAMPLING_STRATEGY
        sample_all = False
        sample_all_stride = None
    else:
        sampling_strategy = CONFIG.DATA.SAMPLING_STRATEGY
        augment = False
        offset = 1
        if sampling_strategy == 'all':
            sample_all = True
            sample_all_stride = 1
            max_num_steps = seq_len  #400
        else:
            sample_all = False
            sample_all_stride = None
            max_num_steps = CONFIG.EVAL.NUM_FRAMES
    # choose number of steps to sample
    num_steps = max_num_steps

    # STEP1: SAMPLE STEPS AND GET THEIR CONTEXT FRAMES FOR THE EMBEDDER
    if sample_all:
        steps = tf.range(0, seq_len, sample_all_stride)
        chosen_steps = steps
    else:
        if sampling_strategy == 'stride':
            num_steps = tf.cast(num_steps, tf.int64)
            stride = (seq_len / num_steps)
            stride = tf.cast(stride, tf.int64)
            if stride <= 0:
                stride = tf.cast(CONFIG.DATA.STRIDE, tf.int64)
            # Offset can be set between 0 and maximum location from which we can get
            # total coverage of the video without having to pad.
            offset = tf.cast(offset, tf.int64)
            if offset is None:
                offset = tf.random.uniform(
                    (),
                    0,
                    tf.maximum(tf.cast(1, tf.int64),
                               seq_len - stride * num_steps),
                    dtype=tf.int64)
            # This handles sampling over shorter sequences by padding the last frame
            # many times. This is not ideal for the way alignment training batches are
            # created.
            cur_steps = tf.minimum(
                seq_len - 1,
                tf.range(offset, offset + num_steps * stride + 1, stride))
            cur_steps = cur_steps[:num_steps]

        elif sampling_strategy == 'random':
            # Sample a random offset less than a provided max offset. Among all frames
            # higher than the chosen offset, randomly sample num_frames
            check1 = tf.debugging.assert_greater_equal(
                seq_len,
                tf.cast(CONFIG.DATA.RANDOM_OFFSET, tf.int64),
                message='Random offset is more than sequence length.')
            check2 = tf.less_equal(
                tf.cast(num_steps, tf.int64),
                seq_len - tf.cast(CONFIG.DATA.RANDOM_OFFSET, tf.int64),
            )

            def _sample_random():
                with tf.control_dependencies([tf.identity(check1.outputs[0])]):
                    offset = CONFIG.DATA.RANDOM_OFFSET
                    steps = tf.random.shuffle(tf.range(offset, seq_len))
                    steps = tf.gather(steps, tf.range(0, num_steps))
                    #steps = tf.gather(steps, tf.range(0, seq_len))
                    #steps = tf.gather(steps, tf.random.uniform(shape=(num_steps,), minval=offset, maxval=seq_len, dtype=tf.int64))
                    steps = tf.gather(
                        steps,
                        tf.nn.top_k(steps, k=num_steps).indices[::-1])
                    steps = steps[:num_steps]
                    return steps

            def _sample_all():
                return tf.range(0, num_steps, dtype=tf.int64)

            cur_steps = tf.cond(check2, _sample_random, _sample_all)
        else:
            raise ValueError(
                'Sampling strategy %s is unknown. Supported values are '
                'stride, offset_uniform and all for now.' % sampling_strategy)

        # Get multiple context steps depending on config at selected steps.
        steps = tf.reshape(tf.map_fn(get_steps, cur_steps), [-1])
        # make sure that frame ID is never less than 1 or greater than (seq_len-1)
        steps = tf.maximum(tf.cast(0, tf.int64), steps)
        steps = tf.minimum(seq_len - 1, steps)
        # Store chosen indices.
        chosen_steps = cur_steps

    # Select data based on steps/
    video = tf.gather(video, steps)

    if CONFIG.DATA.FRAME_LABELS:
        frame_labels = tf.gather(frame_labels, steps)

    # Decode the encoded JPEG images
    video = tf.map_fn(tf.image.decode_jpeg,
                      video,
                      parallel_iterations=FLAGS.num_parallel_calls,
                      dtype=tf.uint8)
    # Take images in range [0, 255] and normalize to [0, 1]
    video = tf.map_fn(normalize_input,
                      video,
                      parallel_iterations=FLAGS.num_parallel_calls,
                      dtype=tf.float32)
    # Perform data-augmentation and return images in range [-1, 1]
    video = preprocess_input(video, augment)

    if CONFIG.MODE == 'train':
        shape_all_steps = CONFIG.DATA.NUM_CONTEXT_FRAMES * max_num_steps  # should be similar to shape of steps
        video.set_shape(
            [shape_all_steps, CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE, 3])
    if CONFIG.MODE == 'train' and CONFIG.DATA.FRAME_LABELS:
        shape_all_steps = CONFIG.DATA.NUM_CONTEXT_FRAMES * max_num_steps  # should be similar to shape of steps
        frame_labels.set_shape([shape_all_steps])

    return {
        'frames': video,
        'chosen_steps': chosen_steps,
        'seq_lens': seq_len,
        'frame_labels': frame_labels,
        'name': name,
        'num_steps': num_steps,
    }
Example #16
0
def find_interval_index(query_xs,
                        interval_lower_xs,
                        last_interval_is_closed=False,
                        dtype=None,
                        name=None):
    """Function to find the index of the interval where query points lies.

  Given a list of adjacent half-open intervals [x_0, x_1), [x_1, x_2), ...,
  [x_{n-1}, x_n), [x_n, inf), described by a list [x_0, x_1, ..., x_{n-1}, x_n].
  Return the index where the input query points lie. If x >= x_n, n is returned,
  and if x < x_0, -1 is returned. If `last_interval_is_closed` is set to `True`,
  the last interval [x_{n-1}, x_n] is interpreted as closed (including x_n).

  #### Example

  ```python
  interval_lower_xs = [0.25, 0.5, 1.0, 2.0, 3.0]
  query_xs = [0.25, 3.0, 5.0, 0.0, 0.5, 0.8]
  result = find_interval_index(query_xs, interval_lower_xs)
  # result == [0, 4, 4, -1, 1, 1]
  ```

  Args:
    query_xs: Rank 1 real `Tensor` of any size, the list of x coordinates for
      which the interval index is to be found. The values must be strictly
      increasing.
    interval_lower_xs: Rank 1 `Tensor` of the same shape and dtype as
      `query_xs`. The values x_0, ..., x_n that define the interval starts.
    last_interval_is_closed: If set to `True`, the last interval is interpreted
      as closed.
    dtype: Optional `tf.Dtype`. If supplied, the dtype for `query_xs` and
      `interval_lower_xs`.
      Default value: None which maps to the default dtype inferred by TensorFlow
        (float32).
    name: Optional name of the operation.

  Returns:
    A tensor that matches the shape of `query_xs` with dtype=int32 containing
    the indices of the intervals containing query points. `-1` means the query
    point lies before all intervals and `n-1` means that the point lies in the
    last half-open interval (if `last_interval_is_closed` is `False`) or that
    the point lies to the right of all intervals (if `last_interval_is_closed`
    is `True`).
  """
    with tf.compat.v1.name_scope(
            name,
            default_name='find_interval_index',
            values=[query_xs, interval_lower_xs, last_interval_is_closed]):
        # TODO(b/138988951): add ability to validate that intervals are increasing.
        # TODO(b/138988951): validate that if last_interval_is_closed, input size
        # must be > 1.
        query_xs = tf.convert_to_tensor(query_xs, dtype=dtype)
        interval_lower_xs = tf.convert_to_tensor(interval_lower_xs,
                                                 dtype=dtype)

        # Result assuming that last interval is half-open.
        indices = tf.searchsorted(interval_lower_xs, query_xs,
                                  side='right') - 1

        # Handling the branch if the last interval is closed.
        last_index = tf.shape(interval_lower_xs)[-1] - 1
        last_x = tf.gather(interval_lower_xs, [last_index], axis=-1)
        # should_cap is a tensor true where a cell is true iff indices is the last
        # index at that cell and the query x <= the right boundary of the last
        # interval.
        should_cap = tf.logical_and(tf.equal(indices, last_index),
                                    tf.less_equal(query_xs, last_x))

        # cap to last_index if the query x is not in the last interval, otherwise,
        # cap to last_index - 1.
        caps = last_index - tf.cast(should_cap, dtype=tf.dtypes.int32)

        return tf.compat.v1.where(last_interval_is_closed,
                                  tf.minimum(indices, caps), indices)
Example #17
0
def box_matching(boxes, gt_boxes, gt_classes):
    """Match boxes to groundtruth boxes.

  Given the proposal boxes and the groundtruth boxes and classes, perform the
  groundtruth matching by taking the argmax of the IoU between boxes and
  groundtruth boxes.

  Args:
    boxes: a tensor of shape of [batch_size, N, 4] representing the box
      coordiantes to be matched to groundtruth boxes.
    gt_boxes: a tensor of shape of [batch_size, MAX_INSTANCES, 4] representing
      the groundtruth box coordinates. It is padded with -1s to indicate the
      invalid boxes.
    gt_classes: [batch_size, MAX_INSTANCES] representing the groundtruth box
      classes. It is padded with -1s to indicate the invalid classes.

  Returns:
    matched_gt_boxes: a tensor of shape of [batch_size, N, 4], representing
      the matched groundtruth box coordinates for each input box. If the box
      does not overlap with any groundtruth boxes, the matched boxes of it
      will be set to all 0s.
    matched_gt_classes: a tensor of shape of [batch_size, N], representing
      the matched groundtruth classes for each input box. If the box does not
      overlap with any groundtruth boxes, the matched box classes of it will
      be set to 0, which corresponds to the background class.
    matched_gt_indices: a tensor of shape of [batch_size, N], representing
      the indices of the matched groundtruth boxes in the original gt_boxes
      tensor. If the box does not overlap with any groundtruth boxes, the
      index of the matched groundtruth will be set to -1.
    matched_iou: a tensor of shape of [batch_size, N], representing the IoU
      between the box and its matched groundtruth box. The matched IoU is the
      maximum IoU of the box and all the groundtruth boxes.
    iou: a tensor of shape of [batch_size, N, K], representing the IoU matrix
      between boxes and the groundtruth boxes. The IoU between a box and the
      invalid groundtruth boxes whose coordinates are [-1, -1, -1, -1] is -1.
  """
    # Compute IoU between boxes and gt_boxes.
    # iou <- [batch_size, N, K]
    iou = box_utils.bbox_overlap(boxes, gt_boxes)

    # max_iou <- [batch_size, N]
    # 0.0 -> no match to gt, or -1.0 match to no gt
    matched_iou = tf.reduce_max(iou, axis=-1)

    # background_box_mask <- bool, [batch_size, N]
    background_box_mask = tf.less_equal(matched_iou, 0.0)

    argmax_iou_indices = tf.argmax(iou, axis=-1, output_type=tf.int32)

    argmax_iou_indices_shape = tf.shape(argmax_iou_indices)
    batch_indices = (
        tf.expand_dims(tf.range(argmax_iou_indices_shape[0]), axis=-1) *
        tf.ones([1, argmax_iou_indices_shape[-1]], dtype=tf.int32))
    gather_nd_indices = tf.stack([batch_indices, argmax_iou_indices], axis=-1)

    matched_gt_boxes = tf.gather_nd(gt_boxes, gather_nd_indices)
    matched_gt_boxes = tf.where(
        tf.tile(tf.expand_dims(background_box_mask, axis=-1), [1, 1, 4]),
        tf.zeros_like(matched_gt_boxes, dtype=tf.float32), matched_gt_boxes)

    matched_gt_classes = tf.gather_nd(gt_classes, gather_nd_indices)
    matched_gt_classes = tf.where(background_box_mask,
                                  tf.zeros_like(matched_gt_classes),
                                  matched_gt_classes)

    matched_gt_indices = tf.where(background_box_mask,
                                  -tf.ones_like(argmax_iou_indices),
                                  argmax_iou_indices)

    return (matched_gt_boxes, matched_gt_classes, matched_gt_indices,
            matched_iou, iou)
    def pack_batch(x: Mapping[str, tf.Tensor]) -> Mapping[str, tf.Tensor]:
        """Internal function to map over.

    Consumes a batch of input examples and produces a variable number of output
    examples.

    Args:
      x: a single example
    Returns:
      a tf.data.Dataset
    """
        keys = list(feature_lengths)
        partial = empty_example.copy()
        first_key, *_ = keys
        dynamic_batch_size = tf.shape(x[first_key])[0]
        outputs = {}
        for k in keys:
            outputs[k] = tf.TensorArray(tf.int32,
                                        size=0,
                                        dynamic_size=True,
                                        element_shape=[feature_lengths[k]])
            outputs[k + "_positions"] = tf.TensorArray(
                tf.int32,
                size=0,
                dynamic_size=True,
                element_shape=[feature_lengths[k]])

        for i in tf.range(0, dynamic_batch_size):
            tf.autograph.experimental.set_loop_options(shape_invariants=[(
                partial, {k: tf.TensorShape([None])
                          for k in keys_etc}
            ), (outputs, {k: tf.TensorShape(None)
                          for k in keys_etc})])

            can_append = True
            one_example = {}
            for k in keys:
                val = tf.cast(x[k][i], tf.int32)
                val = val[:tf.
                          reduce_sum(tf.cast(tf.not_equal(val, 0), tf.int32))]
                one_example[k] = val
            for k in keys:
                can_append = tf.logical_and(
                    can_append,
                    tf.less_equal(
                        tf.size(partial[k]) + tf.size(one_example[k]),
                        feature_lengths[k]))

            if not can_append:
                partial, outputs = _write_packed_example(partial, outputs)

            new_partial = {}
            for k in keys:
                new_seq = one_example[k][:feature_lengths[k]]
                new_seq_len = tf.size(new_seq)
                new_partial[k] = tf.concat([partial[k], new_seq], 0)
                new_partial[k + "_positions"] = tf.concat([
                    partial[k + "_positions"],
                    tf.range(new_seq_len, dtype=tf.int32)
                ], 0)
            partial = new_partial

        partial, outputs = _write_packed_example(partial, outputs)
        packed = {k: outputs[k].stack() for k in keys_etc}
        for k in keys:
            packed[k + "_segment_ids"] = (tf.cumsum(
                tf.cast(tf.equal(packed[k + "_positions"], 0), tf.int32),
                axis=1) * tf.cast(tf.not_equal(packed[k], 0), tf.int32))
        return packed
Example #19
0
def sample_and_preprocess(video,
                          labels,
                          seq_label,
                          seq_len,
                          name,
                          num_steps,
                          augment,
                          sample_all=False,
                          sample_all_stride=1,
                          add_shape=False):
    """Samples frames and prepares them for training."""

    if sample_all:
        # When dealing with very long videos we can choose to sub-sample to fit
        # data in memory. But be aware this also evaluates over a subset of frames.
        # Subsampling the validation set videos when reporting performance is not
        # recommended.
        steps = tf.range(0, seq_len, sample_all_stride)
        seq_len = tf.shape(steps)[0]
        chosen_steps = steps
    else:
        stride = CONFIG.DATA.STRIDE
        sampling_strategy = CONFIG.DATA.SAMPLING_STRATEGY

        # TODO(debidatta) : More flexible sampling
        if sampling_strategy == 'stride':
            # Offset can be set between 0 and maximum location from which we can get
            # total coverage of the video without having to pad.
            # This handles sampling over longer sequences.
            offset = tf.random.uniform(
                (),
                0,
                tf.maximum(tf.cast(1, tf.int64), seq_len - stride * num_steps),
                dtype=tf.int64)
            # This handles sampling over shorter sequences by padding the last frame
            # many times. This is not ideal for the way alignment training batches are
            # created.
            steps = tf.minimum(
                seq_len - 1,
                tf.range(offset, offset + num_steps * stride + 1, stride))
            steps = steps[:num_steps]
        elif sampling_strategy == 'offset_uniform':
            # Sample a random offset less than a provided max offset. Among all frames
            # higher than the chosen offset, randomly sample num_frames
            check1 = tf.debugging.assert_greater_equal(
                seq_len,
                tf.cast(CONFIG.DATA.RANDOM_OFFSET, tf.int64),
                message='Random offset is more than sequence length.')
            check2 = tf.less_equal(
                tf.cast(num_steps, tf.int64),
                seq_len - tf.cast(CONFIG.DATA.RANDOM_OFFSET, tf.int64),
            )

            def _sample_random():
                with tf.control_dependencies([tf.identity(check1.outputs[0])]):
                    offset = CONFIG.DATA.RANDOM_OFFSET
                    steps = tf.random.shuffle(tf.range(offset, seq_len))
                    steps = tf.gather(steps, tf.range(0, num_steps))
                    steps = tf.gather(
                        steps,
                        tf.nn.top_k(steps, k=num_steps).indices[::-1])
                    return steps

            def _sample_all():
                return tf.range(0, num_steps, dtype=tf.int64)

            steps = tf.cond(check2, _sample_random, _sample_all)

        else:
            raise ValueError(
                'Sampling strategy %s is unknown. Supported values are '
                'stride, offset_uniform .' % sampling_strategy)

        if not sample_all and 'tcn' in CONFIG.TRAINING_ALGO:
            pos_window = CONFIG.TCN.POSITIVE_WINDOW
            # pylint: disable=g-long-lambda
            pos_steps = tf.map_fn(
                lambda step: tf.random.uniform(
                    (), minval=step - pos_window, maxval=step, dtype=tf.int64),
                steps)
            # pylint: enable=g-long-lambda
            steps = tf.stack([pos_steps, steps])
            steps = tf.reshape(tf.transpose(steps), (-1, ))

        # Store chosen indices.
        chosen_steps = steps
        # Get multiple context steps depending on config at selected steps.
        steps = tf.reshape(tf.map_fn(get_steps, steps), [-1])
        steps = tf.maximum(tf.cast(0, tf.int64), steps)
        steps = tf.minimum(seq_len - 1, steps)

    shape_all_steps = CONFIG.DATA.NUM_STEPS * num_steps
    if not sample_all and 'tcn' in CONFIG.TRAINING_ALGO:
        shape_all_steps *= 2

    # Select data based on steps/
    video = tf.gather(video, steps)
    # Decode the encoded JPEG images
    video = tf.map_fn(tf.image.decode_jpeg,
                      video,
                      parallel_iterations=FLAGS.num_parallel_calls,
                      dtype=tf.uint8)
    # Take images in range [0, 255] and normalize to [0, 1]
    video = tf.map_fn(normalize_input,
                      video,
                      parallel_iterations=FLAGS.num_parallel_calls,
                      dtype=tf.float32)
    # Perform data-augmentation and return images in range [-1, 1]
    video = preprocess_input(video, augment)
    if add_shape:
        video.set_shape(
            [shape_all_steps, CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE, 3])

    if CONFIG.DATA.FRAME_LABELS:
        labels = tf.gather(labels, steps)
        if add_shape:
            labels.set_shape([shape_all_steps])

    return {
        'frames': video,
        'frame_labels': labels,
        'chosen_steps': chosen_steps,
        'seq_lens': seq_len,
        'seq_labels': seq_label,
        'name': name
    }
    def _static_subsample(self, indicator, batch_size, labels):
        """Returns subsampled minibatch.

    Args:
      indicator: boolean tensor of shape [N] whose True entries can be sampled.
        N should be a complie time constant.
      batch_size: desired batch size. This scalar cannot be None.
      labels: boolean tensor of shape [N] denoting positive(=True) and negative
        (=False) examples. N should be a complie time constant.

    Returns:
      sampled_idx_indicator: boolean tensor of shape [N], True for entries which
        are sampled. It ensures the length of output of the subsample is always
        batch_size, even when number of examples set to True in indicator is
        less than batch_size.

    Raises:
      ValueError: if labels and indicator are not 1D boolean tensors.
    """
        # Check if indicator and labels have a static size.
        if not indicator.shape.is_fully_defined():
            raise ValueError(
                'indicator must be static in shape when is_static is'
                'True')
        if not labels.shape.is_fully_defined():
            raise ValueError('labels must be static in shape when is_static is'
                             'True')
        if not isinstance(batch_size, int):
            raise ValueError(
                'batch_size has to be an integer when is_static is'
                'True.')

        input_length = tf.shape(input=indicator)[0]

        # Set the number of examples set True in indicator to be at least
        # batch_size.
        num_true_sampled = tf.reduce_sum(
            input_tensor=tf.cast(indicator, tf.float32))
        additional_false_sample = tf.less_equal(
            tf.cumsum(tf.cast(tf.logical_not(indicator), tf.float32)),
            batch_size - num_true_sampled)
        indicator = tf.logical_or(indicator, additional_false_sample)

        # Shuffle indicator and label. Need to store the permutation to restore the
        # order post sampling.
        permutation = tf.random.shuffle(tf.range(input_length))
        indicator = ops.matmul_gather_on_zeroth_axis(
            tf.cast(indicator, tf.float32), permutation)
        labels = ops.matmul_gather_on_zeroth_axis(tf.cast(labels, tf.float32),
                                                  permutation)

        # index (starting from 1) when indicator is True, 0 when False
        indicator_idx = tf.where(tf.cast(indicator, tf.bool),
                                 tf.range(1, input_length + 1),
                                 tf.zeros(input_length, tf.int32))

        # Replace -1 for negative, +1 for positive labels
        signed_label = tf.where(
            tf.cast(labels, tf.bool), tf.ones(input_length, tf.int32),
            tf.scalar_mul(-1, tf.ones(input_length, tf.int32)))
        # negative of index for negative label, positive index for positive label,
        # 0 when indicator is False.
        signed_indicator_idx = tf.multiply(indicator_idx, signed_label)
        sorted_signed_indicator_idx = tf.nn.top_k(signed_indicator_idx,
                                                  input_length,
                                                  sorted=True).values

        [num_positive_samples, num_negative_samples
         ] = self._get_num_pos_neg_samples(sorted_signed_indicator_idx,
                                           batch_size)

        sampled_idx = self._get_values_from_start_and_end(
            sorted_signed_indicator_idx, num_positive_samples,
            num_negative_samples, batch_size)

        # Shift the indices to start from 0 and remove any samples that are set as
        # False.
        sampled_idx = tf.abs(sampled_idx) - tf.ones(batch_size, tf.int32)
        sampled_idx = tf.multiply(
            tf.cast(tf.greater_equal(sampled_idx, tf.constant(0)), tf.int32),
            sampled_idx)

        sampled_idx_indicator = tf.cast(
            tf.reduce_sum(input_tensor=tf.one_hot(sampled_idx,
                                                  depth=input_length),
                          axis=0), tf.bool)

        # project back the order based on stored permutations
        reprojections = tf.one_hot(permutation,
                                   depth=input_length,
                                   dtype=tf.float32)
        return tf.cast(
            tf.tensordot(tf.cast(sampled_idx_indicator, tf.float32),
                         reprojections,
                         axes=[0, 0]), tf.bool)