Beispiel #1
0
    def BatchedOrientedNMSIndices(self, bboxes, scores, nms_iou_threshold,
                                  score_threshold, max_boxes_per_class):
        """Runs batched version of a Per-Class 3D (7-DOF) Non Max Suppression.

    All outputs have shape [batch_size, num_classes, max_boxes_per_class].

    Args:
      bboxes: A [batch_size, num_boxes, 7] floating point Tensor of bounding
        boxes in [x, y, z, dx, dy, dz, phi] format.
      scores: A [batch_size, num_boxes, num_classes] floating point Tensor
        containing box scores.
      nms_iou_threshold: Either a float or a list of floats of len num_classes
        with the IoU threshold to use when determining whether two boxes overlap
        for purposes of suppression.
      score_threshold: Either a float or a list of floats of len num_classes
        with the score threshold that allows NMS to quickly ignore boxes.
      max_boxes_per_class: An integer scalar with the maximum number of boxes
        per example to emit per class.

    Returns:
      A tuple of 3 tensors:

      - bbox_indices: An int32 Tensor with the indices of the chosen boxes.
        Values are in sort order until the class_idx switches.
      - bbox_scores: A float32 Tensor with the score for each box.
      - valid_mask: A float32 Tensor with 1/0 values indicating the validity of
        each box. 1 indicates valid, and 0 invalid.
    """
        bboxes = py_utils.HasShape(bboxes, [-1, -1, 7])
        batch_size, num_boxes = py_utils.GetShape(bboxes, 2)
        scores = py_utils.HasShape(scores, [batch_size, num_boxes, -1])
        _, _, num_classes = py_utils.GetShape(scores)

        # Force the thresholds to be tensors of len num_classes
        nms_iou_threshold = tf.broadcast_to(
            tf.convert_to_tensor(nms_iou_threshold), [num_classes])
        score_threshold = tf.broadcast_to(
            tf.convert_to_tensor(score_threshold), [num_classes])

        def NMSBody(args):
            per_sample_bboxes, per_sample_scores = args
            indices, scores, mask = ops.non_max_suppression_3d(
                per_sample_bboxes,
                per_sample_scores,
                nms_iou_threshold=nms_iou_threshold,
                score_threshold=score_threshold,
                max_boxes_per_class=max_boxes_per_class)
            return indices, scores, mask

        bbox_indices, bbox_scores, valid_mask = tf.map_fn(
            fn=NMSBody,
            elems=(bboxes, scores),
            dtype=(tf.int32, tf.float32, tf.float32),
            back_prop=False)

        output_shape = [batch_size, num_classes, max_boxes_per_class]
        bbox_indices = py_utils.PadOrTrimTo(bbox_indices, output_shape)
        bbox_scores = py_utils.PadOrTrimTo(bbox_scores, output_shape)
        valid_mask = py_utils.PadOrTrimTo(valid_mask, output_shape)
        return bbox_indices, bbox_scores, valid_mask
Beispiel #2
0
  def FProp(self, _, encoded_images):
    """Decodes and preprocesses the given images.

    Args:
      encoded_images: Encoded jpeg images as a [batch_size] string Tensor.

    Returns:
      The decoded images as a float32 Tensor with shape
      [batch_size, height, width, num_channels=3].
    """
    p = self.params

    def _DecodeAndPreprocessOne(encoded_image):
      image = tf.image.decode_jpeg(encoded_image, channels=3)
      image = tf.image.convert_image_dtype(image, dtype=tf.float32)
      if self.do_eval:
        return self._PreprocessForEval(image)
      else:
        return self._PreprocessForTraining(image)

    images = tf.map_fn(
        _DecodeAndPreprocessOne,
        encoded_images,
        back_prop=False,
        dtype=tf.float32,
        parallel_iterations=p.parallelism)
    return images
Beispiel #3
0
 def IdsToStrings(self, ids, lens):
     """Takes integer matrices and returns vectors of strings."""
     ids = py_utils.with_dependencies(
         [py_utils.assert_same_dim0([ids, lens])], ids)
     return tf.map_fn(
         lambda inputs: self._wpm_encoder.Decode(inputs[0][:inputs[1]]),
         (ids, lens),
         dtype=tf.string,
         parallel_iterations=30,
         back_prop=False)
Beispiel #4
0
    def IdsToStrings(self, ids, lens):
        """Takes int32 token ids and returns approximate detokenized strings."""
        ids = py_utils.with_dependencies(
            [py_utils.assert_same_dim0([ids, lens])], ids)

        def _ProcessRow(inputs):
            length = inputs[1]
            ids = tf.reshape(inputs[0][:length], [1, -1])
            tokens = self._tokenizer.detokenize(ids)
            return tf.strings.reduce_join(tokens.flat_values, separator=' ')

        return tf.map_fn(_ProcessRow, (ids, lens),
                         dtype=tf.string,
                         parallel_iterations=30,
                         back_prop=False)
    def _ExtractBatch(self, features):
        """The subclass-defined implementation of ExtractBatch().

    Args:
      features: A dictionary of batched Tensors including tensors from this
        extractor.

    Returns:
      A NestedMap of output Tensors whose key names match self.Shape()'s keys.
    """
        # Default implementation uses map_fn.
        result = tf.map_fn(self._Extract,
                           elems=features,
                           dtype=self.DType(),
                           back_prop=False)
        return py_utils.NestedMap(result)
Beispiel #6
0
  def BatchedNMSIndices(self,
                        bboxes,
                        scores,
                        nms_iou_threshold=0.3,
                        score_threshold=0.01,
                        max_num_boxes=None):
    """Batched version of NMSIndices.

    Args:
      bboxes: A [batch_size, num_boxes, 7] floating point Tensor of bounding
        boxes in [x, y, z, dx, dy, dz, phi] format.
      scores: A [batch_size, num_boxes, num_classes] floating point Tensor
        containing box scores.
      nms_iou_threshold: IoU threshold to use when determining whether two boxes
        overlap for purposes of suppression.
      score_threshold: The score threshold passed to NMS that allows NMS to
        quickly ignore irrelevant boxes.
      max_num_boxes: The maximum number of boxes per example to emit. If None,
        this value is set to num_boxes from the shape of bboxes.

    Returns:
      The NMS indices and the mask of the padded indices for each example
      in the batch.
    """
    batch_size, num_boxes = py_utils.GetShape(bboxes, 2)

    if max_num_boxes is not None:
      max_output_size = max_num_boxes
    else:
      max_output_size = num_boxes

    output_shape = [batch_size, max_output_size]

    def NMSBody(args):
      bbox, score = args
      return self.NMSIndices(bbox, score, max_output_size, nms_iou_threshold,
                             score_threshold)

    nms_indices, valid_mask = tf.map_fn(
        fn=NMSBody,
        elems=(bboxes, scores),
        dtype=(tf.int32, tf.float32),
        back_prop=False)

    nms_indices = py_utils.PadOrTrimTo(nms_indices, output_shape)
    return nms_indices, valid_mask
Beispiel #7
0
def _BatchSampleGumbel(batch_seed, time_step, src_ids, src_paddings, shape,
                       dtype):
    """Samples (standard) Gumbel noises of a given shape for each batch item.

  The random seed for the i-th batch item is determined by batch_seed[i],
  time_step, and the sum of non-padding elements of src_ids[i].

  Args:
    batch_seed: An int tensor of shape [batch] that holds a seed for each batch
      item.
    time_step: An int tensor used as a secondary seed.
    src_ids: An int tensor of shape [batch, src_seq] that represents source IDs.
      Used for turning the random seed into a function of source IDs.
    src_paddings: A 0/1 float tensor of shape [batch, src_seq] where 1 means
      that the corresponding element of src_ids is a padding.
    shape: A shape of the Gumbel noises to sample.
    dtype: A type of the Gumbel noises.

  Returns:
    A `dtype` tensor of shape [batch, ...] that holds Gumbel noises.
  """
    # Turn batch_seed into a function of the source IDs by adding the sum of the
    # source IDs. Without doing this, the same pattern of random noises would be
    # used no matter what the source sequence is, resulting in a systematic bias
    # among the output for a given seed value.
    # Mask padding IDs by 0.
    src_ids = src_ids * tf.cast(1.0 - src_paddings, dtype=src_ids.dtype)
    # Compute the sum of source IDs.
    src_ids_sum = tf.math.reduce_sum(src_ids, axis=1)  # shape: [src_batch]
    batch_seed_plus_src_ids_sum = batch_seed + src_ids_sum

    def SampleForBeam(seed):
        return -tf.math.log(-tf.math.log(
            tf.random.stateless_uniform(
                shape=shape, dtype=dtype, seed=tf.stack([seed, time_step]))))

    return tf.map_fn(SampleForBeam, batch_seed_plus_src_ids_sum, dtype=dtype)
Beispiel #8
0
    def CornersToImagePlane(self, corners, velo_to_image_plane):
        """Project 3d box corners to the image plane.

    Args:
      corners: A [batch, num_boxes, 8, 3] floating point tensor containing the 8
        corners points for each 3d bounding box.
      velo_to_image_plane: A [batch, 3, 4] batch set of projection matrices from
        velo xyz to image plane xy. After multiplication, you need to divide by
        last coordinate to recover 2D pixel locations.

    Returns:
      A [batch, num_boxes, 8, 2] floating point Tensor containing the 3D
      bounding box corners projected to the image plane.
    """
        batch_size, num_boxes, _, _ = py_utils.GetShape(corners, 4)

        def CornersToPlaneBody(args):
            """Body of function to convert each bounding box to the image plane."""
            (corners, velo_to_image_plane) = args
            # corners[i] is [num_boxes, 8, 3]: flatten the points in this batch and do
            # the conversion in one call.
            bbox_corners = tf.reshape(corners, [-1, 3])
            image_plane_corners = geometry.PointsToImagePlane(
                bbox_corners, velo_to_image_plane)
            image_plane_corners = tf.reshape(image_plane_corners, [-1, 8, 2])
            return image_plane_corners

        corners_in_image_plane = tf.map_fn(fn=CornersToPlaneBody,
                                           elems=(corners,
                                                  velo_to_image_plane),
                                           dtype=tf.float32,
                                           back_prop=False)

        corners_in_image_plane = py_utils.HasShape(
            corners_in_image_plane, [batch_size, num_boxes, 8, 2])
        return corners_in_image_plane
Beispiel #9
0
    def _EncodeToIds(self, word):
        # Below:
        #   * a token is a wordpiece ID.
        #   * the tokens array will be merged in-place.
        #   * the candidates array is an array of size len(tokens) - 1.
        #     It contains the token for the merged wordpiece, if it exists,
        #     -1 otherwise. For instance, candidate[3] = id(token[3] + token[4]).
        # First, split into basic UTF-8 characters (letters).
        chars = tf.strings.unicode_split(word, 'UTF-8')
        tokens = self._StringToToken(chars)
        tokens = tf.where(
            tf.equal(tokens, NO_TOKEN),
            # Unseen character.
            tf.broadcast_to(self.unk_id, tf.shape(tokens)),
            tokens)
        # Create initial candidate list.
        candidates = tf.map_fn(self._MergeTokens, (tokens[:-1], tokens[1:]),
                               dtype=tokens.dtype)

        def _ShouldMerge(unused_tokens, candidates):
            """Merge until not possible, or we abort early according to merge_prob."""
            return tf.logical_and(
                tf.reduce_any(tf.not_equal(candidates, NO_TOKEN)),
                tf.random.uniform([]) < self._merge_prob)

        def _MergeOneToken(tokens, i):
            return tf.expand_dims(self._MergeTokens(
                (tokens[i], tokens[i + 1])),
                                  axis=-1)

        def _MergeCandidates(tokens, candidates):
            """Merge in the reverse binary tree."""
            best_id = tf.argmin(candidates, output_type=tf.int32)
            # Perform the merge at position best_id.
            tokens = tf.concat([
                tokens[:best_id], [candidates[best_id]], tokens[best_id + 2:]
            ],
                               axis=0)
            # Recompute the merge candidates.
            # Only the neighbors of best_id need to be recomputed.
            empty = tf.zeros([0], dtype=candidates.dtype)

            def _MergeLeft():
                return tf.concat([
                    candidates[:best_id - 1],
                    _MergeOneToken(tokens, best_id - 1)
                ],
                                 axis=0)

            left_candidates = tf.cond(tf.equal(best_id, 0), lambda: empty,
                                      _MergeLeft)

            def _MergeRight():
                return tf.concat([
                    _MergeOneToken(tokens, best_id), candidates[best_id + 2:]
                ],
                                 axis=0)

            right_candidates = tf.cond(
                tf.greater_equal(best_id,
                                 tf.size(tokens) - 1), lambda: empty,
                _MergeRight)

            candidates = tf.concat([left_candidates, right_candidates], axis=0)
            return tokens, candidates

        return tf.while_loop(_ShouldMerge,
                             _MergeCandidates, (tokens, candidates),
                             parallel_iterations=1,
                             back_prop=False)[0]
Beispiel #10
0
def SegmentPool3D(points,
                  point_features,
                  pooling_idx,
                  closest_idx,
                  pooling_method='max'):
    """Performs {min/max/average} pooling over a pointcloud given indices.

  This should be functionally identical when using max to the above
  MaxPool3D function, except it turns out to be much more memory efficient
  on a TPU, and supports min/max/mean.

  Args:
    points: A float tf.Tensor of shape [N, P1, 3] with point locations.
    point_features: A float tf.Tensor of shape [N, P1, C] with point features.
    pooling_idx: A tf.int32 tf.Tensor of shape [N, P2] with the index of which
      points we want to keep. Each value should be in the range [0, P1].
    closest_idx: A tf.int32 tf.Tensor of shape [N, P1] representing which
      sampled point is closest to each original point. Each value should be in
      the range of [0, P2].
    pooling_method: A string for which pooling function to use. Should be one of
      {'min', 'max', 'mean'}.

  Returns:
    pooled_points: A float tf.Tensor of shape [N, P2, 3] with the pooled
      point locations.
    pooled_features: A float tf.Tensor of shape [N, P2, C] with the pooled
      features.
  Raises:
    ValueError: If pooling_method is not one of {min/max/mean}.
  """
    segment_pooling_functions = {
        'min': tf.unsorted_segment_min,
        'max': tf.unsorted_segment_max,
        'mean': tf.unsorted_segment_mean
    }

    if pooling_method not in segment_pooling_functions:
        raise ValueError('`pooling_method` must be one of {}.'.format(
            segment_pooling_functions.keys()))
    segment_fn = segment_pooling_functions[pooling_method]

    points = py_utils.HasShape(points, [-1, -1, 3])
    n, p1 = py_utils.GetShape(points, 2)
    point_features = py_utils.HasShape(point_features, [n, p1, -1])
    _, _, c = py_utils.GetShape(point_features)
    pooling_idx = py_utils.HasShape(pooling_idx, [n, -1])
    _, p2 = py_utils.GetShape(pooling_idx)
    closest_idx = py_utils.HasShape(closest_idx, [n, p1])

    # Subselect our output points
    pooled_points = tf.batch_gather(points, pooling_idx)

    # Loop over batch dimension of our features/indices, as unsorted_segment_X
    # does not currently support a batch dimension.
    def _LoopFn(args):
        example_features, example_closest_idx = args
        return segment_fn(example_features,
                          example_closest_idx,
                          num_segments=p2)

    pooled_features = tf.map_fn(fn=_LoopFn,
                                elems=(point_features, closest_idx),
                                dtype=tf.float32)

    return (py_utils.HasShape(pooled_points, [n, p2, 3]),
            py_utils.HasShape(pooled_features, [n, p2, c]))
Beispiel #11
0
def MaxPool3D(points, point_features, pooling_idx, closest_idx):
    """Apply max pooling to a point cloud with computed sampling indices.

  sampled_idx and closest_idx are the outputs of a sampler such as
  FurthestPointSampler.

  The pooling operation results in a point cloud with fewer points, where the
  pooled points are specified by pooling_idx. Each element of pooling_idx
  contains an integer in the range [0, P1) containing the index of the point in
  points/points_features.

  Max pooling is performed by assigning each point to its closest pooled point,
  and then taking a max over the features of points assigned. We assume that
  this mapping is provided by closest_idx, where each element should contain
  an integer in the range [0, P2) containing the index of the pooled point that
  each point is assigned to.

  Note: This logic for pooling assumes that there will be at least
  one value > 0 per sampled region for each feature, otherwise it will return 0.
  Additionally, it does a reduce over a masked version of the features, so
  mean and min would not work without a change in the logic.

  Args:
    points: a floating point tf.Tensor with shape [N, P1, 3]
    point_features: a floating point tf.Tensor with shape [N, P1, C]
    pooling_idx: A tf.int32 tf.Tensor of shape [N, P2] with the index of which
      points we want to keep. Each value should be in the range [0, P1].
    closest_idx: A tf.int32 tf.Tensor of shape [N, P1] representing which
      sampled point is closest to each original point. Each value should be in
      the range of [0, P2].

  Returns:
    A tuple of tf.Tensors (pooled_points, pooled_features).

    pooled_points has shape [N, P2, 3] representing the locations of each
    selected point. P2 corresponds to num_pooled_points.

    pooled_features has shape [N, P2, C] representing the pooled features at
    each point.
  """
    batch_size, num_points = py_utils.GetShape(points, 2)
    point_features = py_utils.HasShape(point_features,
                                       [batch_size, num_points, -1])
    pooling_idx = py_utils.HasShape(pooling_idx, [batch_size, -1])
    _, num_output_points = py_utils.GetShape(pooling_idx)
    _, _, feature_dims = py_utils.GetShape(point_features, 3)

    # Gather new point locations.
    pooled_points = tf.batch_gather(points, pooling_idx)

    mask = tf.one_hot(closest_idx, num_output_points)  # [N, P1, P2]
    mask = tf.transpose(mask, [2, 0, 1])  # [P2, N, P1]

    def _PartialPoolFeaturesFn(partial_mask):
        partial_mask = tf.tile(
            tf.reshape(partial_mask, [batch_size, num_points, 1]),
            [1, 1, feature_dims])
        # Note: This method of pooling assumes there will be a value > 0
        # And will only work with max under this condition.
        return tf.reduce_max(partial_mask * point_features, axis=1)

    # Performing a map_fn over the pooled points is more memory efficient.
    pooled_point_features = tf.map_fn(_PartialPoolFeaturesFn,
                                      mask)  # [P2, N, P1]
    pooled_point_features = tf.transpose(pooled_point_features, [1, 0, 2])

    return pooled_points, pooled_point_features
Beispiel #12
0
    def _AddNoise(self, batch):
        """Adding noise the src (see https://arxiv.org/pdf/1711.00043).

    This function implement 3 types of noise (hyparams defined in
    self.params.denoise):
    1) slightly shuffle the sentence following p.shuffle_tok_range
    2) randomly drop tokens with probability p.drop_tok_prob
    3) randomly mask tokens with probability p.blank_tok_prob
    The noises are added to the input with probability p.noise_sent_prob.

    Args:
      batch: a `.NestedMap` of the input batch.
    """
        def IsSpecialExample(task_ids, special_task_ids):
            """A utility function indicates whether inputs belong to specific tasks.

      Args:
        task_ids: Task ids for the input batch. Tensor of shape [batch].
        special_task_ids: A list of specified task ids.

      Returns:
        A tensor indicating whether each sample in the batch belong to the
        specified task. Return a tensor of size [batch].
      """
            batch_size = py_utils.GetShape(task_ids)[0]
            return tf.reduce_any(
                tf.equal(
                    tf.expand_dims(task_ids, -1),
                    tf.cast(
                        tf.broadcast_to(
                            special_task_ids,
                            [batch_size, len(special_task_ids)]), tf.int32)),
                -1)

        p = self.params.denoise
        batch_size = tf.shape(batch.src.ids)[0]
        source_max_len = tf.shape(batch.src.ids)[1]

        # Shuffle tokens according to p.shuffle_tok_range
        noise = tf.random.uniform([batch_size, source_max_len], 0,
                                  p.shuffle_tok_range + 1)

        # Don't shuffle eos or padding
        shuffle_tok_range = tf.fill([batch_size, source_max_len],
                                    float(p.shuffle_tok_range))
        shifted_paddings = tf.pad(batch.src.paddings[:, 1:], [[0, 0], [0, 1]],
                                  constant_values=1)
        noise = tf.where(tf.equal(shifted_paddings, 0), noise,
                         shuffle_tok_range)
        indices = tf.broadcast_to(tf.range(source_max_len, dtype=tf.int32),
                                  [batch_size, source_max_len])
        noisy_indices = tf.cast(indices, dtype=tf.float32) + noise
        permutations = tf.argsort(noisy_indices)
        stacked = tf.stack([batch.src.ids, permutations], axis=1)
        denoise_src_ids = tf.stack(tf.map_fn(lambda x: tf.gather(x[0], x[1]),
                                             stacked),
                                   axis=0)

        # Select tokens to drop with probability=p.drop_tok_prob
        random_drop_tok = tf.random.uniform([batch_size, source_max_len])
        # Don't drop eos token
        is_keep_tok = tf.math.logical_or(
            tf.greater(random_drop_tok, p.drop_tok_prob),
            tf.equal(denoise_src_ids, self._src_tokenizer.eos_id))
        denoise_src_ids = tf.ragged.boolean_mask(
            denoise_src_ids,
            is_keep_tok).to_tensor(default_value=0,
                                   shape=tf.shape(batch.src.ids))
        denoise_src_paddings = tf.ragged.boolean_mask(
            batch.src.paddings,
            is_keep_tok).to_tensor(default_value=1,
                                   shape=tf.shape(batch.src.ids))

        # Select tokens to blank with probability=p.blank_tok_prob
        # Don't blank eos token
        random_blank_tok = tf.random.uniform([batch_size, source_max_len])
        shifted_paddings = tf.pad(denoise_src_paddings[:, 1:],
                                  [[0, 0], [0, 1]],
                                  constant_values=1)
        is_blank_tok = tf.math.logical_and(
            tf.less(random_blank_tok, p.blank_tok_prob),
            tf.equal(shifted_paddings, 0))
        blank_id = tf.fill([batch_size, source_max_len], p.blank_id)
        denoise_src_ids = tf.where(is_blank_tok, blank_id, denoise_src_ids)

        # Select denoising task examples with probability=p.denoise_sent_prob
        random_uniform_sent = tf.random.uniform([batch_size])
        is_denoise_sent = tf.math.logical_and(
            tf.less(random_uniform_sent, p.noise_sent_prob),
            IsSpecialExample(self._GetTaskIds(batch.src.source_ids[:, 0]),
                             p.task_ids))
        batch.src.ids = tf.where(is_denoise_sent, denoise_src_ids,
                                 batch.src.ids)
        batch.src.paddings = tf.where(is_denoise_sent, denoise_src_paddings,
                                      batch.src.paddings)
        batch.src.ids_indicator = 1 - batch.src.paddings
        batch.src.weights = batch.src.ids_indicator