Python pad_or_clip_nd Examples, object_detection.utils.shape_utils.pad_or_clip_nd Python Examples

Example #1

0

Show file

File: visualization_utils.py Project: zhangjiulong/models

  def draw_boxes(image_and_detections):
    """Draws boxes on image."""
    true_shape = image_and_detections[0]
    original_shape = image_and_detections[1]
    if true_image_shape is not None:
      image = shape_utils.pad_or_clip_nd(image_and_detections[2],
                                         [true_shape[0], true_shape[1], 3])
    if original_image_spatial_shape is not None:
      image_and_detections[2] = _resize_original_image(image, original_shape)

    image_with_boxes = tf.py_func(visualize_boxes_fn, image_and_detections[2:],
                                  tf.uint8)
    return image_with_boxes

Example #2

0

Show file

File: shape_utils_test.py Project: ALISCIFP/models

  def test_pad_or_clip_nd_tensor(self):
    tensor_placeholder = tf.placeholder(tf.float32, [None, 5, 4, 7])
    output_tensor = shape_utils.pad_or_clip_nd(
        tensor_placeholder, [None, 3, 5, tf.constant(6)])

    self.assertAllEqual(output_tensor.shape.as_list(), [None, 3, 5, None])

    with self.test_session() as sess:
      output_tensor_np = sess.run(
          output_tensor,
          feed_dict={
              tensor_placeholder: np.random.rand(2, 5, 4, 7),
          })

    self.assertAllEqual(output_tensor_np.shape, [2, 3, 5, 6])

Example #3

0

Show file

def pad_input_data_to_static_shapes(tensor_dict,
                                    max_num_boxes,
                                    num_classes,
                                    spatial_image_shape=None):
    """Pads input tensors to static shapes.

  Args:
    tensor_dict: Tensor dictionary of input data
    max_num_boxes: Max number of groundtruth boxes needed to compute shapes for
      padding.
    num_classes: Number of classes in the dataset needed to compute shapes for
      padding.
    spatial_image_shape: A list of two integers of the form [height, width]
      containing expected spatial shape of the image.

  Returns:
    A dictionary keyed by fields.InputDataFields containing padding shapes for
    tensors in the dataset.

  Raises:
    ValueError: If groundtruth classes is neither rank 1 nor rank 2.
  """

    if not spatial_image_shape or spatial_image_shape == [-1, -1]:
        height, width = None, None
    else:
        height, width = spatial_image_shape  # pylint: disable=unpacking-non-sequence

    num_additional_channels = 0
    if fields.InputDataFields.image_additional_channels in tensor_dict:
        num_additional_channels = tensor_dict[
            fields.InputDataFields.image_additional_channels].shape[2].value
    padding_shapes = {
        # Additional channels are merged before batching.
        fields.InputDataFields.image:
        [height, width, 3 + num_additional_channels],
        fields.InputDataFields.image_additional_channels:
        [height, width, num_additional_channels],
        fields.InputDataFields.source_id: [],
        fields.InputDataFields.filename: [],
        fields.InputDataFields.key: [],
        fields.InputDataFields.groundtruth_difficult: [max_num_boxes],
        fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4],
        fields.InputDataFields.groundtruth_classes:
        [max_num_boxes, num_classes],
        fields.InputDataFields.groundtruth_instance_masks:
        [max_num_boxes, height, width],
        fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes],
        fields.InputDataFields.groundtruth_group_of: [max_num_boxes],
        fields.InputDataFields.groundtruth_area: [max_num_boxes],
        fields.InputDataFields.groundtruth_weights: [max_num_boxes],
        fields.InputDataFields.num_groundtruth_boxes: [],
        fields.InputDataFields.groundtruth_label_types: [max_num_boxes],
        fields.InputDataFields.groundtruth_label_scores: [max_num_boxes],
        fields.InputDataFields.true_image_shape: [3],
        fields.InputDataFields.multiclass_scores:
        [max_num_boxes, num_classes + 1 if num_classes is not None else None],
        fields.InputDataFields.groundtruth_image_classes: [num_classes],
    }

    if fields.InputDataFields.original_image in tensor_dict:
        padding_shapes[fields.InputDataFields.original_image] = [
            None, None, 3 + num_additional_channels
        ]
    if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
        tensor_shape = (
            tensor_dict[fields.InputDataFields.groundtruth_keypoints].shape)
        padding_shape = [
            max_num_boxes, tensor_shape[1].value, tensor_shape[2].value
        ]
        padding_shapes[
            fields.InputDataFields.groundtruth_keypoints] = padding_shape
    if fields.InputDataFields.groundtruth_keypoint_visibilities in tensor_dict:
        tensor_shape = tensor_dict[
            fields.InputDataFields.groundtruth_keypoint_visibilities].shape
        padding_shape = [max_num_boxes, tensor_shape[1].value]
        padding_shapes[fields.InputDataFields.
                       groundtruth_keypoint_visibilities] = padding_shape

    padded_tensor_dict = {}
    for tensor_name in tensor_dict:
        padded_tensor_dict[tensor_name] = shape_utils.pad_or_clip_nd(
            tensor_dict[tensor_name], padding_shapes[tensor_name])

    # Make sure that the number of groundtruth boxes now reflects the
    # padded/clipped tensors.
    if fields.InputDataFields.num_groundtruth_boxes in padded_tensor_dict:
        padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = (
            tf.minimum(
                padded_tensor_dict[
                    fields.InputDataFields.num_groundtruth_boxes],
                max_num_boxes))
    return padded_tensor_dict

Example #4

0

Show file

 def pad_parts_tensor(instance_ind):
   points_range_start = num_points_cumulative[instance_ind]
   points_range_end = num_points_cumulative[instance_ind + 1]
   part_inds = part_index[points_range_start:points_range_end]
   return shape_utils.pad_or_clip_nd(part_inds,
                                     output_shape=[max_points_per_instance])

Example #5

0

Show file

File: inputs.py Project: pcm17/models

def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
                                    spatial_image_shape=None):
  """Pads input tensors to static shapes.

  Args:
    tensor_dict: Tensor dictionary of input data
    max_num_boxes: Max number of groundtruth boxes needed to compute shapes for
      padding.
    num_classes: Number of classes in the dataset needed to compute shapes for
      padding.
    spatial_image_shape: A list of two integers of the form [height, width]
      containing expected spatial shape of the image.

  Returns:
    A dictionary keyed by fields.InputDataFields containing padding shapes for
    tensors in the dataset.

  Raises:
    ValueError: If groundtruth classes is neither rank 1 nor rank 2.
  """

  if not spatial_image_shape or spatial_image_shape == [-1, -1]:
    height, width = None, None
  else:
    height, width = spatial_image_shape  # pylint: disable=unpacking-non-sequence

  num_additional_channels = 0
  if fields.InputDataFields.image_additional_channels in tensor_dict:
    num_additional_channels = tensor_dict[
        fields.InputDataFields.image_additional_channels].shape[2].value
  padding_shapes = {
      # Additional channels are merged before batching.
      fields.InputDataFields.image: [
          height, width, 3 + num_additional_channels
      ],
      fields.InputDataFields.original_image_spatial_shape: [2],
      fields.InputDataFields.image_additional_channels: [
          height, width, num_additional_channels
      ],
      fields.InputDataFields.source_id: [],
      fields.InputDataFields.filename: [],
      fields.InputDataFields.key: [],
      fields.InputDataFields.groundtruth_difficult: [max_num_boxes],
      fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4],
      fields.InputDataFields.groundtruth_classes: [max_num_boxes, num_classes],
      fields.InputDataFields.groundtruth_confidences: [
          max_num_boxes, num_classes],
      fields.InputDataFields.groundtruth_instance_masks: [
          max_num_boxes, height, width
      ],
      fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes],
      fields.InputDataFields.groundtruth_group_of: [max_num_boxes],
      fields.InputDataFields.groundtruth_area: [max_num_boxes],
      fields.InputDataFields.groundtruth_weights: [max_num_boxes],
      fields.InputDataFields.num_groundtruth_boxes: [],
      fields.InputDataFields.groundtruth_label_types: [max_num_boxes],
      fields.InputDataFields.groundtruth_label_scores: [max_num_boxes],
      fields.InputDataFields.true_image_shape: [3],
      fields.InputDataFields.multiclass_scores: [
          max_num_boxes, num_classes + 1 if num_classes is not None else None
      ],
      fields.InputDataFields.groundtruth_image_classes: [num_classes],
      fields.InputDataFields.groundtruth_image_confidences: [num_classes],
  }

  if fields.InputDataFields.original_image in tensor_dict:
    padding_shapes[fields.InputDataFields.original_image] = [
        height, width, 3 + num_additional_channels
    ]
  if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
    tensor_shape = (
        tensor_dict[fields.InputDataFields.groundtruth_keypoints].shape)
    padding_shape = [max_num_boxes, tensor_shape[1].value,
                     tensor_shape[2].value]
    padding_shapes[fields.InputDataFields.groundtruth_keypoints] = padding_shape
  if fields.InputDataFields.groundtruth_keypoint_visibilities in tensor_dict:
    tensor_shape = tensor_dict[fields.InputDataFields.
                               groundtruth_keypoint_visibilities].shape
    padding_shape = [max_num_boxes, tensor_shape[1].value]
    padding_shapes[fields.InputDataFields.
                   groundtruth_keypoint_visibilities] = padding_shape

  padded_tensor_dict = {}
  for tensor_name in tensor_dict:
    padded_tensor_dict[tensor_name] = shape_utils.pad_or_clip_nd(
        tensor_dict[tensor_name], padding_shapes[tensor_name])

  # Make sure that the number of groundtruth boxes now reflects the
  # padded/clipped tensors.
  if fields.InputDataFields.num_groundtruth_boxes in padded_tensor_dict:
    padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = (
        tf.minimum(
            padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes],
            max_num_boxes))
  return padded_tensor_dict

Example #6

0

Show file

File: inputs.py Project: Clark0/MDPImage

def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
                                    spatial_image_shape=None):
  """Pads input tensors to static shapes.

  In case num_additional_channels > 0, we assume that the additional channels
  have already been concatenated to the base image.

  Args:
    tensor_dict: Tensor dictionary of input data
    max_num_boxes: Max number of groundtruth boxes needed to compute shapes for
      padding.
    num_classes: Number of classes in the dataset needed to compute shapes for
      padding.
    spatial_image_shape: A list of two integers of the form [height, width]
      containing expected spatial shape of the image.

  Returns:
    A dictionary keyed by fields.InputDataFields containing padding shapes for
    tensors in the dataset.

  Raises:
    ValueError: If groundtruth classes is neither rank 1 nor rank 2, or if we
      detect that additional channels have not been concatenated yet.
  """

  if not spatial_image_shape or spatial_image_shape == [-1, -1]:
    height, width = None, None
  else:
    height, width = spatial_image_shape  # pylint: disable=unpacking-non-sequence

  num_additional_channels = 0
  if fields.InputDataFields.image_additional_channels in tensor_dict:
    num_additional_channels = shape_utils.get_dim_as_int(tensor_dict[
        fields.InputDataFields.image_additional_channels].shape[2])

  # We assume that if num_additional_channels > 0, then it has already been
  # concatenated to the base image (but not the ground truth).
  num_channels = 3
  if fields.InputDataFields.image in tensor_dict:
    num_channels = shape_utils.get_dim_as_int(
        tensor_dict[fields.InputDataFields.image].shape[2])

  if num_additional_channels:
    if num_additional_channels >= num_channels:
      raise ValueError(
          'Image must be already concatenated with additional channels.')

    if (fields.InputDataFields.original_image in tensor_dict and
        shape_utils.get_dim_as_int(
            tensor_dict[fields.InputDataFields.original_image].shape[2]) ==
        num_channels):
      raise ValueError(
          'Image must be already concatenated with additional channels.')

  padding_shapes = {
      fields.InputDataFields.image: [
          height, width, num_channels
      ],
      fields.InputDataFields.original_image_spatial_shape: [2],
      fields.InputDataFields.image_additional_channels: [
          height, width, num_additional_channels
      ],
      fields.InputDataFields.source_id: [],
      fields.InputDataFields.filename: [],
      fields.InputDataFields.key: [],
      fields.InputDataFields.groundtruth_difficult: [max_num_boxes],
      fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4],
      fields.InputDataFields.groundtruth_classes: [max_num_boxes, num_classes],
      fields.InputDataFields.groundtruth_instance_masks: [
          max_num_boxes, height, width
      ],
      fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes],
      fields.InputDataFields.groundtruth_group_of: [max_num_boxes],
      fields.InputDataFields.groundtruth_area: [max_num_boxes],
      fields.InputDataFields.groundtruth_weights: [max_num_boxes],
      fields.InputDataFields.groundtruth_confidences: [
          max_num_boxes, num_classes
      ],
      fields.InputDataFields.num_groundtruth_boxes: [],
      fields.InputDataFields.groundtruth_label_types: [max_num_boxes],
      fields.InputDataFields.groundtruth_label_weights: [max_num_boxes],
      fields.InputDataFields.true_image_shape: [3],
      fields.InputDataFields.groundtruth_image_classes: [num_classes],
      fields.InputDataFields.groundtruth_image_confidences: [num_classes],
  }

  if fields.InputDataFields.original_image in tensor_dict:
    padding_shapes[fields.InputDataFields.original_image] = [
        height, width,
        shape_utils.get_dim_as_int(tensor_dict[fields.InputDataFields.
                                               original_image].shape[2])
    ]
  if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
    tensor_shape = (
        tensor_dict[fields.InputDataFields.groundtruth_keypoints].shape)
    padding_shape = [max_num_boxes,
                     shape_utils.get_dim_as_int(tensor_shape[1]),
                     shape_utils.get_dim_as_int(tensor_shape[2])]
    padding_shapes[fields.InputDataFields.groundtruth_keypoints] = padding_shape
  if fields.InputDataFields.groundtruth_keypoint_visibilities in tensor_dict:
    tensor_shape = tensor_dict[fields.InputDataFields.
                               groundtruth_keypoint_visibilities].shape
    padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1])]
    padding_shapes[fields.InputDataFields.
                   groundtruth_keypoint_visibilities] = padding_shape

  padded_tensor_dict = {}
  for tensor_name in tensor_dict:
    padded_tensor_dict[tensor_name] = shape_utils.pad_or_clip_nd(
        tensor_dict[tensor_name], padding_shapes[tensor_name])

  # Make sure that the number of groundtruth boxes now reflects the
  # padded/clipped tensors.
  if fields.InputDataFields.num_groundtruth_boxes in padded_tensor_dict:
    padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = (
        tf.minimum(
            padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes],
            max_num_boxes))
  return padded_tensor_dict

Example #7

0

Show file

    def graph_fn(input_tensor):
      output_tensor = shape_utils.pad_or_clip_nd(
          input_tensor, [None, 3, 5, tf.constant(6)])

      return output_tensor

Example #8

0

Show file

def pad_input_data_to_static_shapes(tensor_dict,
                                    max_num_boxes,
                                    num_classes,
                                    spatial_image_shape=None):

    if not spatial_image_shape or spatial_image_shape == [-1, -1]:
        height, width = None, None
    else:
        height, width = spatial_image_shape  # pylint: disable=unpacking-non-sequence

    num_additional_channels = 0
    if fields.InputDataFields.image_additional_channels in tensor_dict:
        num_additional_channels = shape_utils.get_dim_as_int(tensor_dict[
            fields.InputDataFields.image_additional_channels].shape[2])

    # We assume that if num_additional_channels > 0, then it has already been
    # concatenated to the base image (but not the ground truth).
    num_channels = 3
    if fields.InputDataFields.image in tensor_dict:
        num_channels = shape_utils.get_dim_as_int(
            tensor_dict[fields.InputDataFields.image].shape[2])

    if num_additional_channels:
        if num_additional_channels >= num_channels:
            raise ValueError(
                'Image must be already concatenated with additional channels.')

        if (fields.InputDataFields.original_image in tensor_dict
                and shape_utils.get_dim_as_int(tensor_dict[
                    fields.InputDataFields.original_image].shape[2])
                == num_channels):
            raise ValueError(
                'Image must be already concatenated with additional channels.')

    padding_shapes = {
        fields.InputDataFields.image: [height, width, num_channels],
        fields.InputDataFields.original_image_spatial_shape: [2],
        fields.InputDataFields.image_additional_channels:
        [height, width, num_additional_channels],
        fields.InputDataFields.source_id: [],
        fields.InputDataFields.filename: [],
        fields.InputDataFields.key: [],
        fields.InputDataFields.groundtruth_difficult: [max_num_boxes],
        fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4],
        fields.InputDataFields.groundtruth_classes:
        [max_num_boxes, num_classes],
        fields.InputDataFields.groundtruth_instance_masks:
        [max_num_boxes, height, width],
        fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes],
        fields.InputDataFields.groundtruth_group_of: [max_num_boxes],
        fields.InputDataFields.groundtruth_area: [max_num_boxes],
        fields.InputDataFields.groundtruth_weights: [max_num_boxes],
        fields.InputDataFields.groundtruth_confidences:
        [max_num_boxes, num_classes],
        fields.InputDataFields.num_groundtruth_boxes: [],
        fields.InputDataFields.groundtruth_label_types: [max_num_boxes],
        fields.InputDataFields.groundtruth_label_weights: [max_num_boxes],
        fields.InputDataFields.true_image_shape: [3],
        fields.InputDataFields.groundtruth_image_classes: [num_classes],
        fields.InputDataFields.groundtruth_image_confidences: [num_classes],
    }

    if fields.InputDataFields.original_image in tensor_dict:
        padding_shapes[fields.InputDataFields.original_image] = [
            height, width,
            shape_utils.get_dim_as_int(
                tensor_dict[fields.InputDataFields.original_image].shape[2])
        ]
    if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
        tensor_shape = (
            tensor_dict[fields.InputDataFields.groundtruth_keypoints].shape)
        padding_shape = [
            max_num_boxes,
            shape_utils.get_dim_as_int(tensor_shape[1]),
            shape_utils.get_dim_as_int(tensor_shape[2])
        ]
        padding_shapes[
            fields.InputDataFields.groundtruth_keypoints] = padding_shape
    if fields.InputDataFields.groundtruth_keypoint_visibilities in tensor_dict:
        tensor_shape = tensor_dict[
            fields.InputDataFields.groundtruth_keypoint_visibilities].shape
        padding_shape = [
            max_num_boxes,
            shape_utils.get_dim_as_int(tensor_shape[1])
        ]
        padding_shapes[fields.InputDataFields.
                       groundtruth_keypoint_visibilities] = padding_shape

    padded_tensor_dict = {}
    for tensor_name in tensor_dict:
        padded_tensor_dict[tensor_name] = shape_utils.pad_or_clip_nd(
            tensor_dict[tensor_name], padding_shapes[tensor_name])

    # Make sure that the number of groundtruth boxes now reflects the
    # padded/clipped tensors.
    if fields.InputDataFields.num_groundtruth_boxes in padded_tensor_dict:
        padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = (
            tf.minimum(
                padded_tensor_dict[
                    fields.InputDataFields.num_groundtruth_boxes],
                max_num_boxes))
    return padded_tensor_dict

Example #9

0

Show file

File: inputs.py Project: TomGalla11/models-1

def pad_input_data_to_static_shapes(tensor_dict,
                                    max_num_boxes,
                                    num_classes,
                                    spatial_image_shape=None,
                                    max_num_context_features=None,
                                    context_feature_length=None,
                                    max_dp_points=336):
  """Pads input tensors to static shapes.

  In case num_additional_channels > 0, we assume that the additional channels
  have already been concatenated to the base image.

  Args:
    tensor_dict: Tensor dictionary of input data
    max_num_boxes: Max number of groundtruth boxes needed to compute shapes for
      padding.
    num_classes: Number of classes in the dataset needed to compute shapes for
      padding.
    spatial_image_shape: A list of two integers of the form [height, width]
      containing expected spatial shape of the image.
    max_num_context_features (optional): The maximum number of context
      features needed to compute shapes padding.
    context_feature_length (optional): The length of the context feature.
    max_dp_points (optional): The maximum number of DensePose sampled points per
      instance. The default (336) is selected since the original DensePose paper
      (https://arxiv.org/pdf/1802.00434.pdf) indicates that the maximum number
      of samples per part is 14, and therefore 24 * 14 = 336 is the maximum
      sampler per instance.

  Returns:
    A dictionary keyed by fields.InputDataFields containing padding shapes for
    tensors in the dataset.

  Raises:
    ValueError: If groundtruth classes is neither rank 1 nor rank 2, or if we
      detect that additional channels have not been concatenated yet, or if
      max_num_context_features is not specified and context_features is in the
      tensor dict.
  """

  if not spatial_image_shape or spatial_image_shape == [-1, -1]:
    height, width = None, None
  else:
    height, width = spatial_image_shape  # pylint: disable=unpacking-non-sequence

  num_additional_channels = 0
  if fields.InputDataFields.image_additional_channels in tensor_dict:
    num_additional_channels = shape_utils.get_dim_as_int(tensor_dict[
        fields.InputDataFields.image_additional_channels].shape[2])

  # We assume that if num_additional_channels > 0, then it has already been
  # concatenated to the base image (but not the ground truth).
  num_channels = 3
  if fields.InputDataFields.image in tensor_dict:
    num_channels = shape_utils.get_dim_as_int(
        tensor_dict[fields.InputDataFields.image].shape[2])

  if num_additional_channels:
    if num_additional_channels >= num_channels:
      raise ValueError(
          'Image must be already concatenated with additional channels.')

    if (fields.InputDataFields.original_image in tensor_dict and
        shape_utils.get_dim_as_int(
            tensor_dict[fields.InputDataFields.original_image].shape[2]) ==
        num_channels):
      raise ValueError(
          'Image must be already concatenated with additional channels.')

  if fields.InputDataFields.context_features in tensor_dict and (
      max_num_context_features is None):
    raise ValueError('max_num_context_features must be specified in the model '
                     'config if include_context is specified in the input '
                     'config')

  padding_shapes = {
      fields.InputDataFields.image: [height, width, num_channels],
      fields.InputDataFields.original_image_spatial_shape: [2],
      fields.InputDataFields.image_additional_channels: [
          height, width, num_additional_channels
      ],
      fields.InputDataFields.source_id: [],
      fields.InputDataFields.filename: [],
      fields.InputDataFields.key: [],
      fields.InputDataFields.groundtruth_difficult: [max_num_boxes],
      fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4],
      fields.InputDataFields.groundtruth_classes: [max_num_boxes, num_classes],
      fields.InputDataFields.groundtruth_instance_masks: [
          max_num_boxes, height, width
      ],
      fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes],
      fields.InputDataFields.groundtruth_group_of: [max_num_boxes],
      fields.InputDataFields.groundtruth_area: [max_num_boxes],
      fields.InputDataFields.groundtruth_weights: [max_num_boxes],
      fields.InputDataFields.groundtruth_confidences: [
          max_num_boxes, num_classes
      ],
      fields.InputDataFields.num_groundtruth_boxes: [],
      fields.InputDataFields.groundtruth_label_types: [max_num_boxes],
      fields.InputDataFields.groundtruth_label_weights: [max_num_boxes],
      fields.InputDataFields.true_image_shape: [3],
      fields.InputDataFields.groundtruth_image_classes: [num_classes],
      fields.InputDataFields.groundtruth_image_confidences: [num_classes],
      fields.InputDataFields.groundtruth_labeled_classes: [num_classes],
  }

  if fields.InputDataFields.original_image in tensor_dict:
    padding_shapes[fields.InputDataFields.original_image] = [
        height, width,
        shape_utils.get_dim_as_int(tensor_dict[fields.InputDataFields.
                                               original_image].shape[2])
    ]
  if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
    tensor_shape = (
        tensor_dict[fields.InputDataFields.groundtruth_keypoints].shape)
    padding_shape = [max_num_boxes,
                     shape_utils.get_dim_as_int(tensor_shape[1]),
                     shape_utils.get_dim_as_int(tensor_shape[2])]
    padding_shapes[fields.InputDataFields.groundtruth_keypoints] = padding_shape
  if fields.InputDataFields.groundtruth_keypoint_visibilities in tensor_dict:
    tensor_shape = tensor_dict[fields.InputDataFields.
                               groundtruth_keypoint_visibilities].shape
    padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1])]
    padding_shapes[fields.InputDataFields.
                   groundtruth_keypoint_visibilities] = padding_shape

  if fields.InputDataFields.groundtruth_keypoint_weights in tensor_dict:
    tensor_shape = (
        tensor_dict[fields.InputDataFields.groundtruth_keypoint_weights].shape)
    padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1])]
    padding_shapes[fields.InputDataFields.
                   groundtruth_keypoint_weights] = padding_shape
  if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
    padding_shapes[
        fields.InputDataFields.groundtruth_dp_num_points] = [max_num_boxes]
    padding_shapes[
        fields.InputDataFields.groundtruth_dp_part_ids] = [
            max_num_boxes, max_dp_points]
    padding_shapes[
        fields.InputDataFields.groundtruth_dp_surface_coords] = [
            max_num_boxes, max_dp_points, 4]

  # Prepare for ContextRCNN related fields.
  if fields.InputDataFields.context_features in tensor_dict:
    padding_shape = [max_num_context_features, context_feature_length]
    padding_shapes[fields.InputDataFields.context_features] = padding_shape

    tensor_shape = tf.shape(
        tensor_dict[fields.InputDataFields.context_features])
    tensor_dict[fields.InputDataFields.valid_context_size] = tensor_shape[0]
    padding_shapes[fields.InputDataFields.valid_context_size] = []
  if fields.InputDataFields.context_feature_length in tensor_dict:
    padding_shapes[fields.InputDataFields.context_feature_length] = []

  if fields.InputDataFields.is_annotated in tensor_dict:
    padding_shapes[fields.InputDataFields.is_annotated] = []

  padded_tensor_dict = {}
  for tensor_name in tensor_dict:
    padded_tensor_dict[tensor_name] = shape_utils.pad_or_clip_nd(
        tensor_dict[tensor_name], padding_shapes[tensor_name])

  # Make sure that the number of groundtruth boxes now reflects the
  # padded/clipped tensors.
  if fields.InputDataFields.num_groundtruth_boxes in padded_tensor_dict:
    padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = (
        tf.minimum(
            padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes],
            max_num_boxes))
  return padded_tensor_dict