def draw_boxes(image_and_detections): """Draws boxes on image.""" true_shape = image_and_detections[0] original_shape = image_and_detections[1] if true_image_shape is not None: image = shape_utils.pad_or_clip_nd(image_and_detections[2], [true_shape[0], true_shape[1], 3]) if original_image_spatial_shape is not None: image_and_detections[2] = _resize_original_image(image, original_shape) image_with_boxes = tf.py_func(visualize_boxes_fn, image_and_detections[2:], tf.uint8) return image_with_boxes
def test_pad_or_clip_nd_tensor(self): tensor_placeholder = tf.placeholder(tf.float32, [None, 5, 4, 7]) output_tensor = shape_utils.pad_or_clip_nd( tensor_placeholder, [None, 3, 5, tf.constant(6)]) self.assertAllEqual(output_tensor.shape.as_list(), [None, 3, 5, None]) with self.test_session() as sess: output_tensor_np = sess.run( output_tensor, feed_dict={ tensor_placeholder: np.random.rand(2, 5, 4, 7), }) self.assertAllEqual(output_tensor_np.shape, [2, 3, 5, 6])
def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes, spatial_image_shape=None): """Pads input tensors to static shapes. Args: tensor_dict: Tensor dictionary of input data max_num_boxes: Max number of groundtruth boxes needed to compute shapes for padding. num_classes: Number of classes in the dataset needed to compute shapes for padding. spatial_image_shape: A list of two integers of the form [height, width] containing expected spatial shape of the image. Returns: A dictionary keyed by fields.InputDataFields containing padding shapes for tensors in the dataset. Raises: ValueError: If groundtruth classes is neither rank 1 nor rank 2. """ if not spatial_image_shape or spatial_image_shape == [-1, -1]: height, width = None, None else: height, width = spatial_image_shape # pylint: disable=unpacking-non-sequence num_additional_channels = 0 if fields.InputDataFields.image_additional_channels in tensor_dict: num_additional_channels = tensor_dict[ fields.InputDataFields.image_additional_channels].shape[2].value padding_shapes = { # Additional channels are merged before batching. fields.InputDataFields.image: [height, width, 3 + num_additional_channels], fields.InputDataFields.image_additional_channels: [height, width, num_additional_channels], fields.InputDataFields.source_id: [], fields.InputDataFields.filename: [], fields.InputDataFields.key: [], fields.InputDataFields.groundtruth_difficult: [max_num_boxes], fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4], fields.InputDataFields.groundtruth_classes: [max_num_boxes, num_classes], fields.InputDataFields.groundtruth_instance_masks: [max_num_boxes, height, width], fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes], fields.InputDataFields.groundtruth_group_of: [max_num_boxes], fields.InputDataFields.groundtruth_area: [max_num_boxes], fields.InputDataFields.groundtruth_weights: [max_num_boxes], fields.InputDataFields.num_groundtruth_boxes: [], fields.InputDataFields.groundtruth_label_types: [max_num_boxes], fields.InputDataFields.groundtruth_label_scores: [max_num_boxes], fields.InputDataFields.true_image_shape: [3], fields.InputDataFields.multiclass_scores: [max_num_boxes, num_classes + 1 if num_classes is not None else None], fields.InputDataFields.groundtruth_image_classes: [num_classes], } if fields.InputDataFields.original_image in tensor_dict: padding_shapes[fields.InputDataFields.original_image] = [ None, None, 3 + num_additional_channels ] if fields.InputDataFields.groundtruth_keypoints in tensor_dict: tensor_shape = ( tensor_dict[fields.InputDataFields.groundtruth_keypoints].shape) padding_shape = [ max_num_boxes, tensor_shape[1].value, tensor_shape[2].value ] padding_shapes[ fields.InputDataFields.groundtruth_keypoints] = padding_shape if fields.InputDataFields.groundtruth_keypoint_visibilities in tensor_dict: tensor_shape = tensor_dict[ fields.InputDataFields.groundtruth_keypoint_visibilities].shape padding_shape = [max_num_boxes, tensor_shape[1].value] padding_shapes[fields.InputDataFields. groundtruth_keypoint_visibilities] = padding_shape padded_tensor_dict = {} for tensor_name in tensor_dict: padded_tensor_dict[tensor_name] = shape_utils.pad_or_clip_nd( tensor_dict[tensor_name], padding_shapes[tensor_name]) # Make sure that the number of groundtruth boxes now reflects the # padded/clipped tensors. if fields.InputDataFields.num_groundtruth_boxes in padded_tensor_dict: padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = ( tf.minimum( padded_tensor_dict[ fields.InputDataFields.num_groundtruth_boxes], max_num_boxes)) return padded_tensor_dict
def pad_parts_tensor(instance_ind): points_range_start = num_points_cumulative[instance_ind] points_range_end = num_points_cumulative[instance_ind + 1] part_inds = part_index[points_range_start:points_range_end] return shape_utils.pad_or_clip_nd(part_inds, output_shape=[max_points_per_instance])
def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes, spatial_image_shape=None): """Pads input tensors to static shapes. Args: tensor_dict: Tensor dictionary of input data max_num_boxes: Max number of groundtruth boxes needed to compute shapes for padding. num_classes: Number of classes in the dataset needed to compute shapes for padding. spatial_image_shape: A list of two integers of the form [height, width] containing expected spatial shape of the image. Returns: A dictionary keyed by fields.InputDataFields containing padding shapes for tensors in the dataset. Raises: ValueError: If groundtruth classes is neither rank 1 nor rank 2. """ if not spatial_image_shape or spatial_image_shape == [-1, -1]: height, width = None, None else: height, width = spatial_image_shape # pylint: disable=unpacking-non-sequence num_additional_channels = 0 if fields.InputDataFields.image_additional_channels in tensor_dict: num_additional_channels = tensor_dict[ fields.InputDataFields.image_additional_channels].shape[2].value padding_shapes = { # Additional channels are merged before batching. fields.InputDataFields.image: [ height, width, 3 + num_additional_channels ], fields.InputDataFields.original_image_spatial_shape: [2], fields.InputDataFields.image_additional_channels: [ height, width, num_additional_channels ], fields.InputDataFields.source_id: [], fields.InputDataFields.filename: [], fields.InputDataFields.key: [], fields.InputDataFields.groundtruth_difficult: [max_num_boxes], fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4], fields.InputDataFields.groundtruth_classes: [max_num_boxes, num_classes], fields.InputDataFields.groundtruth_confidences: [ max_num_boxes, num_classes], fields.InputDataFields.groundtruth_instance_masks: [ max_num_boxes, height, width ], fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes], fields.InputDataFields.groundtruth_group_of: [max_num_boxes], fields.InputDataFields.groundtruth_area: [max_num_boxes], fields.InputDataFields.groundtruth_weights: [max_num_boxes], fields.InputDataFields.num_groundtruth_boxes: [], fields.InputDataFields.groundtruth_label_types: [max_num_boxes], fields.InputDataFields.groundtruth_label_scores: [max_num_boxes], fields.InputDataFields.true_image_shape: [3], fields.InputDataFields.multiclass_scores: [ max_num_boxes, num_classes + 1 if num_classes is not None else None ], fields.InputDataFields.groundtruth_image_classes: [num_classes], fields.InputDataFields.groundtruth_image_confidences: [num_classes], } if fields.InputDataFields.original_image in tensor_dict: padding_shapes[fields.InputDataFields.original_image] = [ height, width, 3 + num_additional_channels ] if fields.InputDataFields.groundtruth_keypoints in tensor_dict: tensor_shape = ( tensor_dict[fields.InputDataFields.groundtruth_keypoints].shape) padding_shape = [max_num_boxes, tensor_shape[1].value, tensor_shape[2].value] padding_shapes[fields.InputDataFields.groundtruth_keypoints] = padding_shape if fields.InputDataFields.groundtruth_keypoint_visibilities in tensor_dict: tensor_shape = tensor_dict[fields.InputDataFields. groundtruth_keypoint_visibilities].shape padding_shape = [max_num_boxes, tensor_shape[1].value] padding_shapes[fields.InputDataFields. groundtruth_keypoint_visibilities] = padding_shape padded_tensor_dict = {} for tensor_name in tensor_dict: padded_tensor_dict[tensor_name] = shape_utils.pad_or_clip_nd( tensor_dict[tensor_name], padding_shapes[tensor_name]) # Make sure that the number of groundtruth boxes now reflects the # padded/clipped tensors. if fields.InputDataFields.num_groundtruth_boxes in padded_tensor_dict: padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = ( tf.minimum( padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes], max_num_boxes)) return padded_tensor_dict
def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes, spatial_image_shape=None): """Pads input tensors to static shapes. In case num_additional_channels > 0, we assume that the additional channels have already been concatenated to the base image. Args: tensor_dict: Tensor dictionary of input data max_num_boxes: Max number of groundtruth boxes needed to compute shapes for padding. num_classes: Number of classes in the dataset needed to compute shapes for padding. spatial_image_shape: A list of two integers of the form [height, width] containing expected spatial shape of the image. Returns: A dictionary keyed by fields.InputDataFields containing padding shapes for tensors in the dataset. Raises: ValueError: If groundtruth classes is neither rank 1 nor rank 2, or if we detect that additional channels have not been concatenated yet. """ if not spatial_image_shape or spatial_image_shape == [-1, -1]: height, width = None, None else: height, width = spatial_image_shape # pylint: disable=unpacking-non-sequence num_additional_channels = 0 if fields.InputDataFields.image_additional_channels in tensor_dict: num_additional_channels = shape_utils.get_dim_as_int(tensor_dict[ fields.InputDataFields.image_additional_channels].shape[2]) # We assume that if num_additional_channels > 0, then it has already been # concatenated to the base image (but not the ground truth). num_channels = 3 if fields.InputDataFields.image in tensor_dict: num_channels = shape_utils.get_dim_as_int( tensor_dict[fields.InputDataFields.image].shape[2]) if num_additional_channels: if num_additional_channels >= num_channels: raise ValueError( 'Image must be already concatenated with additional channels.') if (fields.InputDataFields.original_image in tensor_dict and shape_utils.get_dim_as_int( tensor_dict[fields.InputDataFields.original_image].shape[2]) == num_channels): raise ValueError( 'Image must be already concatenated with additional channels.') padding_shapes = { fields.InputDataFields.image: [ height, width, num_channels ], fields.InputDataFields.original_image_spatial_shape: [2], fields.InputDataFields.image_additional_channels: [ height, width, num_additional_channels ], fields.InputDataFields.source_id: [], fields.InputDataFields.filename: [], fields.InputDataFields.key: [], fields.InputDataFields.groundtruth_difficult: [max_num_boxes], fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4], fields.InputDataFields.groundtruth_classes: [max_num_boxes, num_classes], fields.InputDataFields.groundtruth_instance_masks: [ max_num_boxes, height, width ], fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes], fields.InputDataFields.groundtruth_group_of: [max_num_boxes], fields.InputDataFields.groundtruth_area: [max_num_boxes], fields.InputDataFields.groundtruth_weights: [max_num_boxes], fields.InputDataFields.groundtruth_confidences: [ max_num_boxes, num_classes ], fields.InputDataFields.num_groundtruth_boxes: [], fields.InputDataFields.groundtruth_label_types: [max_num_boxes], fields.InputDataFields.groundtruth_label_weights: [max_num_boxes], fields.InputDataFields.true_image_shape: [3], fields.InputDataFields.groundtruth_image_classes: [num_classes], fields.InputDataFields.groundtruth_image_confidences: [num_classes], } if fields.InputDataFields.original_image in tensor_dict: padding_shapes[fields.InputDataFields.original_image] = [ height, width, shape_utils.get_dim_as_int(tensor_dict[fields.InputDataFields. original_image].shape[2]) ] if fields.InputDataFields.groundtruth_keypoints in tensor_dict: tensor_shape = ( tensor_dict[fields.InputDataFields.groundtruth_keypoints].shape) padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1]), shape_utils.get_dim_as_int(tensor_shape[2])] padding_shapes[fields.InputDataFields.groundtruth_keypoints] = padding_shape if fields.InputDataFields.groundtruth_keypoint_visibilities in tensor_dict: tensor_shape = tensor_dict[fields.InputDataFields. groundtruth_keypoint_visibilities].shape padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1])] padding_shapes[fields.InputDataFields. groundtruth_keypoint_visibilities] = padding_shape padded_tensor_dict = {} for tensor_name in tensor_dict: padded_tensor_dict[tensor_name] = shape_utils.pad_or_clip_nd( tensor_dict[tensor_name], padding_shapes[tensor_name]) # Make sure that the number of groundtruth boxes now reflects the # padded/clipped tensors. if fields.InputDataFields.num_groundtruth_boxes in padded_tensor_dict: padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = ( tf.minimum( padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes], max_num_boxes)) return padded_tensor_dict
def graph_fn(input_tensor): output_tensor = shape_utils.pad_or_clip_nd( input_tensor, [None, 3, 5, tf.constant(6)]) return output_tensor
def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes, spatial_image_shape=None): if not spatial_image_shape or spatial_image_shape == [-1, -1]: height, width = None, None else: height, width = spatial_image_shape # pylint: disable=unpacking-non-sequence num_additional_channels = 0 if fields.InputDataFields.image_additional_channels in tensor_dict: num_additional_channels = shape_utils.get_dim_as_int(tensor_dict[ fields.InputDataFields.image_additional_channels].shape[2]) # We assume that if num_additional_channels > 0, then it has already been # concatenated to the base image (but not the ground truth). num_channels = 3 if fields.InputDataFields.image in tensor_dict: num_channels = shape_utils.get_dim_as_int( tensor_dict[fields.InputDataFields.image].shape[2]) if num_additional_channels: if num_additional_channels >= num_channels: raise ValueError( 'Image must be already concatenated with additional channels.') if (fields.InputDataFields.original_image in tensor_dict and shape_utils.get_dim_as_int(tensor_dict[ fields.InputDataFields.original_image].shape[2]) == num_channels): raise ValueError( 'Image must be already concatenated with additional channels.') padding_shapes = { fields.InputDataFields.image: [height, width, num_channels], fields.InputDataFields.original_image_spatial_shape: [2], fields.InputDataFields.image_additional_channels: [height, width, num_additional_channels], fields.InputDataFields.source_id: [], fields.InputDataFields.filename: [], fields.InputDataFields.key: [], fields.InputDataFields.groundtruth_difficult: [max_num_boxes], fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4], fields.InputDataFields.groundtruth_classes: [max_num_boxes, num_classes], fields.InputDataFields.groundtruth_instance_masks: [max_num_boxes, height, width], fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes], fields.InputDataFields.groundtruth_group_of: [max_num_boxes], fields.InputDataFields.groundtruth_area: [max_num_boxes], fields.InputDataFields.groundtruth_weights: [max_num_boxes], fields.InputDataFields.groundtruth_confidences: [max_num_boxes, num_classes], fields.InputDataFields.num_groundtruth_boxes: [], fields.InputDataFields.groundtruth_label_types: [max_num_boxes], fields.InputDataFields.groundtruth_label_weights: [max_num_boxes], fields.InputDataFields.true_image_shape: [3], fields.InputDataFields.groundtruth_image_classes: [num_classes], fields.InputDataFields.groundtruth_image_confidences: [num_classes], } if fields.InputDataFields.original_image in tensor_dict: padding_shapes[fields.InputDataFields.original_image] = [ height, width, shape_utils.get_dim_as_int( tensor_dict[fields.InputDataFields.original_image].shape[2]) ] if fields.InputDataFields.groundtruth_keypoints in tensor_dict: tensor_shape = ( tensor_dict[fields.InputDataFields.groundtruth_keypoints].shape) padding_shape = [ max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1]), shape_utils.get_dim_as_int(tensor_shape[2]) ] padding_shapes[ fields.InputDataFields.groundtruth_keypoints] = padding_shape if fields.InputDataFields.groundtruth_keypoint_visibilities in tensor_dict: tensor_shape = tensor_dict[ fields.InputDataFields.groundtruth_keypoint_visibilities].shape padding_shape = [ max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1]) ] padding_shapes[fields.InputDataFields. groundtruth_keypoint_visibilities] = padding_shape padded_tensor_dict = {} for tensor_name in tensor_dict: padded_tensor_dict[tensor_name] = shape_utils.pad_or_clip_nd( tensor_dict[tensor_name], padding_shapes[tensor_name]) # Make sure that the number of groundtruth boxes now reflects the # padded/clipped tensors. if fields.InputDataFields.num_groundtruth_boxes in padded_tensor_dict: padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = ( tf.minimum( padded_tensor_dict[ fields.InputDataFields.num_groundtruth_boxes], max_num_boxes)) return padded_tensor_dict
def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes, spatial_image_shape=None, max_num_context_features=None, context_feature_length=None, max_dp_points=336): """Pads input tensors to static shapes. In case num_additional_channels > 0, we assume that the additional channels have already been concatenated to the base image. Args: tensor_dict: Tensor dictionary of input data max_num_boxes: Max number of groundtruth boxes needed to compute shapes for padding. num_classes: Number of classes in the dataset needed to compute shapes for padding. spatial_image_shape: A list of two integers of the form [height, width] containing expected spatial shape of the image. max_num_context_features (optional): The maximum number of context features needed to compute shapes padding. context_feature_length (optional): The length of the context feature. max_dp_points (optional): The maximum number of DensePose sampled points per instance. The default (336) is selected since the original DensePose paper (https://arxiv.org/pdf/1802.00434.pdf) indicates that the maximum number of samples per part is 14, and therefore 24 * 14 = 336 is the maximum sampler per instance. Returns: A dictionary keyed by fields.InputDataFields containing padding shapes for tensors in the dataset. Raises: ValueError: If groundtruth classes is neither rank 1 nor rank 2, or if we detect that additional channels have not been concatenated yet, or if max_num_context_features is not specified and context_features is in the tensor dict. """ if not spatial_image_shape or spatial_image_shape == [-1, -1]: height, width = None, None else: height, width = spatial_image_shape # pylint: disable=unpacking-non-sequence num_additional_channels = 0 if fields.InputDataFields.image_additional_channels in tensor_dict: num_additional_channels = shape_utils.get_dim_as_int(tensor_dict[ fields.InputDataFields.image_additional_channels].shape[2]) # We assume that if num_additional_channels > 0, then it has already been # concatenated to the base image (but not the ground truth). num_channels = 3 if fields.InputDataFields.image in tensor_dict: num_channels = shape_utils.get_dim_as_int( tensor_dict[fields.InputDataFields.image].shape[2]) if num_additional_channels: if num_additional_channels >= num_channels: raise ValueError( 'Image must be already concatenated with additional channels.') if (fields.InputDataFields.original_image in tensor_dict and shape_utils.get_dim_as_int( tensor_dict[fields.InputDataFields.original_image].shape[2]) == num_channels): raise ValueError( 'Image must be already concatenated with additional channels.') if fields.InputDataFields.context_features in tensor_dict and ( max_num_context_features is None): raise ValueError('max_num_context_features must be specified in the model ' 'config if include_context is specified in the input ' 'config') padding_shapes = { fields.InputDataFields.image: [height, width, num_channels], fields.InputDataFields.original_image_spatial_shape: [2], fields.InputDataFields.image_additional_channels: [ height, width, num_additional_channels ], fields.InputDataFields.source_id: [], fields.InputDataFields.filename: [], fields.InputDataFields.key: [], fields.InputDataFields.groundtruth_difficult: [max_num_boxes], fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4], fields.InputDataFields.groundtruth_classes: [max_num_boxes, num_classes], fields.InputDataFields.groundtruth_instance_masks: [ max_num_boxes, height, width ], fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes], fields.InputDataFields.groundtruth_group_of: [max_num_boxes], fields.InputDataFields.groundtruth_area: [max_num_boxes], fields.InputDataFields.groundtruth_weights: [max_num_boxes], fields.InputDataFields.groundtruth_confidences: [ max_num_boxes, num_classes ], fields.InputDataFields.num_groundtruth_boxes: [], fields.InputDataFields.groundtruth_label_types: [max_num_boxes], fields.InputDataFields.groundtruth_label_weights: [max_num_boxes], fields.InputDataFields.true_image_shape: [3], fields.InputDataFields.groundtruth_image_classes: [num_classes], fields.InputDataFields.groundtruth_image_confidences: [num_classes], fields.InputDataFields.groundtruth_labeled_classes: [num_classes], } if fields.InputDataFields.original_image in tensor_dict: padding_shapes[fields.InputDataFields.original_image] = [ height, width, shape_utils.get_dim_as_int(tensor_dict[fields.InputDataFields. original_image].shape[2]) ] if fields.InputDataFields.groundtruth_keypoints in tensor_dict: tensor_shape = ( tensor_dict[fields.InputDataFields.groundtruth_keypoints].shape) padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1]), shape_utils.get_dim_as_int(tensor_shape[2])] padding_shapes[fields.InputDataFields.groundtruth_keypoints] = padding_shape if fields.InputDataFields.groundtruth_keypoint_visibilities in tensor_dict: tensor_shape = tensor_dict[fields.InputDataFields. groundtruth_keypoint_visibilities].shape padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1])] padding_shapes[fields.InputDataFields. groundtruth_keypoint_visibilities] = padding_shape if fields.InputDataFields.groundtruth_keypoint_weights in tensor_dict: tensor_shape = ( tensor_dict[fields.InputDataFields.groundtruth_keypoint_weights].shape) padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1])] padding_shapes[fields.InputDataFields. groundtruth_keypoint_weights] = padding_shape if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict: padding_shapes[ fields.InputDataFields.groundtruth_dp_num_points] = [max_num_boxes] padding_shapes[ fields.InputDataFields.groundtruth_dp_part_ids] = [ max_num_boxes, max_dp_points] padding_shapes[ fields.InputDataFields.groundtruth_dp_surface_coords] = [ max_num_boxes, max_dp_points, 4] # Prepare for ContextRCNN related fields. if fields.InputDataFields.context_features in tensor_dict: padding_shape = [max_num_context_features, context_feature_length] padding_shapes[fields.InputDataFields.context_features] = padding_shape tensor_shape = tf.shape( tensor_dict[fields.InputDataFields.context_features]) tensor_dict[fields.InputDataFields.valid_context_size] = tensor_shape[0] padding_shapes[fields.InputDataFields.valid_context_size] = [] if fields.InputDataFields.context_feature_length in tensor_dict: padding_shapes[fields.InputDataFields.context_feature_length] = [] if fields.InputDataFields.is_annotated in tensor_dict: padding_shapes[fields.InputDataFields.is_annotated] = [] padded_tensor_dict = {} for tensor_name in tensor_dict: padded_tensor_dict[tensor_name] = shape_utils.pad_or_clip_nd( tensor_dict[tensor_name], padding_shapes[tensor_name]) # Make sure that the number of groundtruth boxes now reflects the # padded/clipped tensors. if fields.InputDataFields.num_groundtruth_boxes in padded_tensor_dict: padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = ( tf.minimum( padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes], max_num_boxes)) return padded_tensor_dict