def test_filter_groundtruth_with_nan_box_coordinates(self): input_tensors = { fields.InputDataFields.groundtruth_boxes: [[np.nan, np.nan, np.nan, np.nan], [0.2, 0.4, 0.1, 0.8]], fields.InputDataFields.groundtruth_classes: [1, 2], fields.InputDataFields.groundtruth_is_crowd: [False, True], fields.InputDataFields.groundtruth_area: [100.0, 238.7] } expected_tensors = { fields.InputDataFields.groundtruth_boxes: [[0.2, 0.4, 0.1, 0.8]], fields.InputDataFields.groundtruth_classes: [2], fields.InputDataFields.groundtruth_is_crowd: [True], fields.InputDataFields.groundtruth_area: [238.7] } output_tensors = ops.filter_groundtruth_with_nan_box_coordinates( input_tensors) with self.test_session() as sess: output_tensors = sess.run(output_tensors) for key in [fields.InputDataFields.groundtruth_boxes, fields.InputDataFields.groundtruth_area]: self.assertAllClose(expected_tensors[key], output_tensors[key]) for key in [fields.InputDataFields.groundtruth_classes, fields.InputDataFields.groundtruth_is_crowd]: self.assertAllEqual(expected_tensors[key], output_tensors[key])
def test_filter_groundtruth_with_nan_box_coordinates(self): input_tensors = { fields.InputDataFields.groundtruth_boxes: [[np.nan, np.nan, np.nan, np.nan], [0.2, 0.4, 0.1, 0.8]], fields.InputDataFields.groundtruth_classes: [1, 2], fields.InputDataFields.groundtruth_is_crowd: [False, True], fields.InputDataFields.groundtruth_area: [100.0, 238.7] } expected_tensors = { fields.InputDataFields.groundtruth_boxes: [[0.2, 0.4, 0.1, 0.8]], fields.InputDataFields.groundtruth_classes: [2], fields.InputDataFields.groundtruth_is_crowd: [True], fields.InputDataFields.groundtruth_area: [238.7] } output_tensors = ops.filter_groundtruth_with_nan_box_coordinates( input_tensors) with self.test_session() as sess: output_tensors = sess.run(output_tensors) for key in [fields.InputDataFields.groundtruth_boxes, fields.InputDataFields.groundtruth_area]: self.assertAllClose(expected_tensors[key], output_tensors[key]) for key in [fields.InputDataFields.groundtruth_classes, fields.InputDataFields.groundtruth_is_crowd]: self.assertAllEqual(expected_tensors[key], output_tensors[key])
def _build_training_batch_dict(batch_sequences_with_states, unroll_length, batch_size): """Builds training batch samples. Args: batch_sequences_with_states: A batch_sequences_with_states object. unroll_length: Unrolled length for LSTM training. batch_size: Batch size for queue outputs. Returns: A dictionary of tensors based on items in input_reader_config. """ seq_tensors_dict = { fields.InputDataFields.image: [], fields.InputDataFields.groundtruth_boxes: [], fields.InputDataFields.groundtruth_classes: [], 'batch': batch_sequences_with_states, } for i in range(unroll_length): for j in range(batch_size): filtered_dict = util_ops.filter_groundtruth_with_nan_box_coordinates( { fields.InputDataFields.groundtruth_boxes: (batch_sequences_with_states.sequences['groundtruth_boxes'] [j][i]), fields.InputDataFields.groundtruth_classes: (batch_sequences_with_states. sequences['groundtruth_classes'][j][i]), }) filtered_dict = util_ops.retain_groundtruth_with_positive_classes( filtered_dict) seq_tensors_dict[fields.InputDataFields.image].append( batch_sequences_with_states.sequences['image'][j][i]) seq_tensors_dict[fields.InputDataFields.groundtruth_boxes].append( filtered_dict[fields.InputDataFields.groundtruth_boxes]) seq_tensors_dict[ fields.InputDataFields.groundtruth_classes].append( filtered_dict[fields.InputDataFields.groundtruth_classes]) seq_tensors_dict[fields.InputDataFields.image] = tuple( seq_tensors_dict[fields.InputDataFields.image]) seq_tensors_dict[fields.InputDataFields.groundtruth_boxes] = tuple( seq_tensors_dict[fields.InputDataFields.groundtruth_boxes]) seq_tensors_dict[fields.InputDataFields.groundtruth_classes] = tuple( seq_tensors_dict[fields.InputDataFields.groundtruth_classes]) return seq_tensors_dict
def _build_training_batch_dict(batch_sequences_with_states, unroll_length, batch_size): """Builds training batch samples. Args: batch_sequences_with_states: A batch_sequences_with_states object. unroll_length: Unrolled length for LSTM training. batch_size: Batch size for queue outputs. Returns: A dictionary of tensors based on items in input_reader_config. """ seq_tensors_dict = { fields.InputDataFields.image: [], fields.InputDataFields.groundtruth_boxes: [], fields.InputDataFields.groundtruth_classes: [], 'batch': batch_sequences_with_states, } for i in range(unroll_length): for j in range(batch_size): filtered_dict = util_ops.filter_groundtruth_with_nan_box_coordinates({ fields.InputDataFields.groundtruth_boxes: ( batch_sequences_with_states.sequences['groundtruth_boxes'][j][i]), fields.InputDataFields.groundtruth_classes: ( batch_sequences_with_states.sequences['groundtruth_classes'][j][i] ), }) filtered_dict = util_ops.retain_groundtruth_with_positive_classes( filtered_dict) seq_tensors_dict[fields.InputDataFields.image].append( batch_sequences_with_states.sequences['image'][j][i]) seq_tensors_dict[fields.InputDataFields.groundtruth_boxes].append( filtered_dict[fields.InputDataFields.groundtruth_boxes]) seq_tensors_dict[fields.InputDataFields.groundtruth_classes].append( filtered_dict[fields.InputDataFields.groundtruth_classes]) seq_tensors_dict[fields.InputDataFields.image] = tuple( seq_tensors_dict[fields.InputDataFields.image]) seq_tensors_dict[fields.InputDataFields.groundtruth_boxes] = tuple( seq_tensors_dict[fields.InputDataFields.groundtruth_boxes]) seq_tensors_dict[fields.InputDataFields.groundtruth_classes] = tuple( seq_tensors_dict[fields.InputDataFields.groundtruth_classes]) return seq_tensors_dict
def transform_input_data(tensor_dict, model_preprocess_fn, image_resizer_fn, num_classes, data_augmentation_fn=None, merge_multiple_boxes=False, retain_original_image=False, use_multiclass_scores=False, use_bfloat16=False, retain_original_image_additional_channels=False): """A single function that is responsible for all input data transformations. Data transformation functions are applied in the following order. 1. If key fields.InputDataFields.image_additional_channels is present in tensor_dict, the additional channels will be merged into fields.InputDataFields.image. 2. data_augmentation_fn (optional): applied on tensor_dict. 3. model_preprocess_fn: applied only on image tensor in tensor_dict. 4. image_resizer_fn: applied on original image and instance mask tensor in tensor_dict. 5. one_hot_encoding: applied to classes tensor in tensor_dict. 6. merge_multiple_boxes (optional): when groundtruth boxes are exactly the same they can be merged into a single box with an associated k-hot class label. Args: tensor_dict: dictionary containing input tensors keyed by fields.InputDataFields. model_preprocess_fn: model's preprocess function to apply on image tensor. This function must take in a 4-D float tensor and return a 4-D preprocess float tensor and a tensor containing the true image shape. image_resizer_fn: image resizer function to apply on groundtruth instance `masks. This function must take a 3-D float tensor of an image and a 3-D tensor of instance masks and return a resized version of these along with the true shapes. num_classes: number of max classes to one-hot (or k-hot) encode the class labels. data_augmentation_fn: (optional) data augmentation function to apply on input `tensor_dict`. merge_multiple_boxes: (optional) whether to merge multiple groundtruth boxes and classes for a given image if the boxes are exactly the same. retain_original_image: (optional) whether to retain original image in the output dictionary. use_multiclass_scores: whether to use multiclass scores as class targets instead of one-hot encoding of `groundtruth_classes`. When this is True and multiclass_scores is empty, one-hot encoding of `groundtruth_classes` is used as a fallback. use_bfloat16: (optional) a bool, whether to use bfloat16 in training. retain_original_image_additional_channels: (optional) Whether to retain original image additional channels in the output dictionary. Returns: A dictionary keyed by fields.InputDataFields containing the tensors obtained after applying all the transformations. """ out_tensor_dict = tensor_dict.copy() if fields.InputDataFields.multiclass_scores in out_tensor_dict: out_tensor_dict[ fields.InputDataFields .multiclass_scores] = _multiclass_scores_or_one_hot_labels( out_tensor_dict[fields.InputDataFields.multiclass_scores], out_tensor_dict[fields.InputDataFields.groundtruth_boxes], out_tensor_dict[fields.InputDataFields.groundtruth_classes], num_classes) if fields.InputDataFields.groundtruth_boxes in out_tensor_dict: out_tensor_dict = util_ops.filter_groundtruth_with_nan_box_coordinates( out_tensor_dict) out_tensor_dict = util_ops.filter_unrecognized_classes(out_tensor_dict) if retain_original_image: out_tensor_dict[fields.InputDataFields.original_image] = tf.cast( image_resizer_fn(out_tensor_dict[fields.InputDataFields.image], None)[0], tf.uint8) if fields.InputDataFields.image_additional_channels in out_tensor_dict: channels = out_tensor_dict[fields.InputDataFields.image_additional_channels] out_tensor_dict[fields.InputDataFields.image] = tf.concat( [out_tensor_dict[fields.InputDataFields.image], channels], axis=2) if retain_original_image_additional_channels: out_tensor_dict[ fields.InputDataFields.image_additional_channels] = tf.cast( image_resizer_fn(channels, None)[0], tf.uint8) # Apply data augmentation ops. if data_augmentation_fn is not None: out_tensor_dict = data_augmentation_fn(out_tensor_dict) # Apply model preprocessing ops and resize instance masks. image = out_tensor_dict[fields.InputDataFields.image] preprocessed_resized_image, true_image_shape = model_preprocess_fn( tf.expand_dims(tf.cast(image, dtype=tf.float32), axis=0)) if use_bfloat16: preprocessed_resized_image = tf.cast( preprocessed_resized_image, tf.bfloat16) out_tensor_dict[fields.InputDataFields.image] = tf.squeeze( preprocessed_resized_image, axis=0) out_tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze( true_image_shape, axis=0) if fields.InputDataFields.groundtruth_instance_masks in out_tensor_dict: masks = out_tensor_dict[fields.InputDataFields.groundtruth_instance_masks] _, resized_masks, _ = image_resizer_fn(image, masks) if use_bfloat16: resized_masks = tf.cast(resized_masks, tf.bfloat16) out_tensor_dict[ fields.InputDataFields.groundtruth_instance_masks] = resized_masks label_offset = 1 zero_indexed_groundtruth_classes = out_tensor_dict[ fields.InputDataFields.groundtruth_classes] - label_offset if use_multiclass_scores: out_tensor_dict[ fields.InputDataFields.groundtruth_classes] = out_tensor_dict[ fields.InputDataFields.multiclass_scores] else: out_tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot( zero_indexed_groundtruth_classes, num_classes) out_tensor_dict.pop(fields.InputDataFields.multiclass_scores, None) if fields.InputDataFields.groundtruth_confidences in out_tensor_dict: groundtruth_confidences = out_tensor_dict[ fields.InputDataFields.groundtruth_confidences] # Map the confidences to the one-hot encoding of classes out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( tf.reshape(groundtruth_confidences, [-1, 1]) * out_tensor_dict[fields.InputDataFields.groundtruth_classes]) else: groundtruth_confidences = tf.ones_like( zero_indexed_groundtruth_classes, dtype=tf.float32) out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( out_tensor_dict[fields.InputDataFields.groundtruth_classes]) if merge_multiple_boxes: merged_boxes, merged_classes, merged_confidences, _ = ( util_ops.merge_boxes_with_multiple_labels( out_tensor_dict[fields.InputDataFields.groundtruth_boxes], zero_indexed_groundtruth_classes, groundtruth_confidences, num_classes)) merged_classes = tf.cast(merged_classes, tf.float32) out_tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes out_tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( merged_confidences) if fields.InputDataFields.groundtruth_boxes in out_tensor_dict: out_tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape( out_tensor_dict[fields.InputDataFields.groundtruth_boxes])[0] return out_tensor_dict
def transform_input_data(tensor_dict, model_preprocess_fn, image_resizer_fn, num_classes, data_augmentation_fn=None, merge_multiple_boxes=False, retain_original_image=False): """A single function that is responsible for all input data transformations. Data transformation functions are applied in the following order. 1. If key fields.InputDataFields.image_additional_channels is present in tensor_dict, the additional channels will be merged into fields.InputDataFields.image. 2. data_augmentation_fn (optional): applied on tensor_dict. 3. model_preprocess_fn: applied only on image tensor in tensor_dict. 4. image_resizer_fn: applied on original image and instance mask tensor in tensor_dict. 5. one_hot_encoding: applied to classes tensor in tensor_dict. 6. merge_multiple_boxes (optional): when groundtruth boxes are exactly the same they can be merged into a single box with an associated k-hot class label. Args: tensor_dict: dictionary containing input tensors keyed by fields.InputDataFields. model_preprocess_fn: model's preprocess function to apply on image tensor. This function must take in a 4-D float tensor and return a 4-D preprocess float tensor and a tensor containing the true image shape. image_resizer_fn: image resizer function to apply on groundtruth instance `masks. This function must take a 3-D float tensor of an image and a 3-D tensor of instance masks and return a resized version of these along with the true shapes. num_classes: number of max classes to one-hot (or k-hot) encode the class data. data_augmentation_fn: (optional) data augmentation function to apply on input `tensor_dict`. merge_multiple_boxes: (optional) whether to merge multiple groundtruth boxes and classes for a given image if the boxes are exactly the same. retain_original_image: (optional) whether to retain original image in the output dictionary. Returns: A dictionary keyed by fields.InputDataFields containing the tensors obtained after applying all the transformations. """ if fields.InputDataFields.groundtruth_boxes in tensor_dict: tensor_dict = util_ops.filter_groundtruth_with_nan_box_coordinates( tensor_dict) if fields.InputDataFields.image_additional_channels in tensor_dict: channels = tensor_dict[fields.InputDataFields.image_additional_channels] tensor_dict[fields.InputDataFields.image] = tf.concat( [tensor_dict[fields.InputDataFields.image], channels], axis=2) if retain_original_image: tensor_dict[fields.InputDataFields.original_image] = tf.cast( tensor_dict[fields.InputDataFields.image], tf.uint8) # Apply data augmentation ops. if data_augmentation_fn is not None: tensor_dict = data_augmentation_fn(tensor_dict) # Apply model preprocessing ops and resize instance masks. image = tensor_dict[fields.InputDataFields.image] preprocessed_resized_image, true_image_shape = model_preprocess_fn( tf.expand_dims(tf.to_float(image), axis=0)) tensor_dict[fields.InputDataFields.image] = tf.squeeze( preprocessed_resized_image, axis=0) tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze( true_image_shape, axis=0) if fields.InputDataFields.groundtruth_instance_masks in tensor_dict: masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks] _, resized_masks, _ = image_resizer_fn(image, masks) tensor_dict[fields.InputDataFields. groundtruth_instance_masks] = resized_masks # Transform groundtruth classes to one hot encodings. label_offset = 1 zero_indexed_groundtruth_classes = tensor_dict[ fields.InputDataFields.groundtruth_classes] - label_offset tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot( zero_indexed_groundtruth_classes, num_classes) if merge_multiple_boxes: merged_boxes, merged_classes, _ = util_ops.merge_boxes_with_multiple_labels( tensor_dict[fields.InputDataFields.groundtruth_boxes], zero_indexed_groundtruth_classes, num_classes) merged_classes = tf.cast(merged_classes, tf.float32) tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes return tensor_dict
def transform_input_data(tensor_dict, model_preprocess_fn, image_resizer_fn, num_classes, data_augmentation_fn=None, merge_multiple_boxes=False, retain_original_image=False, use_multiclass_scores=False): """A single function that is responsible for all input data transformations. Data transformation functions are applied in the following order. 1. If key fields.InputDataFields.image_additional_channels is present in tensor_dict, the additional channels will be merged into fields.InputDataFields.image. 2. data_augmentation_fn (optional): applied on tensor_dict. 3. model_preprocess_fn: applied only on image tensor in tensor_dict. 4. image_resizer_fn: applied on original image and instance mask tensor in tensor_dict. 5. one_hot_encoding: applied to classes tensor in tensor_dict. 6. merge_multiple_boxes (optional): when groundtruth boxes are exactly the same they can be merged into a single box with an associated k-hot class label. Args: tensor_dict: dictionary containing input tensors keyed by fields.InputDataFields. model_preprocess_fn: model's preprocess function to apply on image tensor. This function must take in a 4-D float tensor and return a 4-D preprocess float tensor and a tensor containing the true image shape. image_resizer_fn: image resizer function to apply on groundtruth instance `masks. This function must take a 3-D float tensor of an image and a 3-D tensor of instance masks and return a resized version of these along with the true shapes. num_classes: number of max classes to one-hot (or k-hot) encode the class labels. data_augmentation_fn: (optional) data augmentation function to apply on input `tensor_dict`. merge_multiple_boxes: (optional) whether to merge multiple groundtruth boxes and classes for a given image if the boxes are exactly the same. retain_original_image: (optional) whether to retain original image in the output dictionary. use_multiclass_scores: whether to use multiclass scores as class targets instead of one-hot encoding of `groundtruth_classes`. use_bfloat16: (optional) a bool, whether to use bfloat16 in training. Returns: A dictionary keyed by fields.InputDataFields containing the tensors obtained after applying all the transformations. """ # Reshape flattened multiclass scores tensor into a 2D tensor of shape # [num_boxes, num_classes]. if fields.InputDataFields.multiclass_scores in tensor_dict: tensor_dict[fields.InputDataFields.multiclass_scores] = tf.reshape( tensor_dict[fields.InputDataFields.multiclass_scores], [ tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0], num_classes ]) if fields.InputDataFields.groundtruth_boxes in tensor_dict: tensor_dict = util_ops.filter_groundtruth_with_nan_box_coordinates( tensor_dict) tensor_dict = util_ops.filter_unrecognized_classes(tensor_dict) if retain_original_image: tensor_dict[fields.InputDataFields.original_image] = tf.cast( image_resizer_fn(tensor_dict[fields.InputDataFields.image])[0], tf.uint8) if fields.InputDataFields.image_additional_channels in tensor_dict: channels = tensor_dict[fields.InputDataFields.image_additional_channels] tensor_dict[fields.InputDataFields.image] = tf.concat( [tensor_dict[fields.InputDataFields.image], channels], axis=2) # # Create gt_boxes_masks # height, width, _ = tf.unstack(tf.shape(tensor_dict[fields.InputDataFields.image])) # # image_template = tf.squeeze(tensor_dict[fields.InputDataFields.groundtruth_bel_O], axis=2) # # image_template = tensor_dict[fields.InputDataFields.groundtruth_bel_O] # label_boxes_list = tensor_dict[fields.InputDataFields.groundtruth_boxes] # print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') # # print(image_template) # print(label_boxes_list) # boxes_mask = boxes2mask(label_boxes_list) # tensor_dict[fields.InputDataFields.groundtruth_boxes_mask] = tf.stop_gradient(boxes_mask) # # Create detection masks # det_mask = tf.squeeze(tensor_dict[fields.InputDataFields.groundtruth_bel_O], axis=2) # zeros = tf.zeros_like(det_mask) # ones = tf.ones_like(det_mask) # tensor_dict[fields.InputDataFields.groundtruth_boxes_mask] = tf.stop_gradient(tf.where(det_mask > 0, ones, zeros)) # Apply data augmentation ops. if data_augmentation_fn is not None: tensor_dict = data_augmentation_fn(tensor_dict) # todo first without data augm # Apply model preprocessing ops and resize instance masks. image = tensor_dict[fields.InputDataFields.image] preprocessed_resized_image, true_image_shape = model_preprocess_fn( tf.expand_dims(tf.cast(image, dtype=tf.float32), axis=0)) tensor_dict[fields.InputDataFields.image] = tf.squeeze( preprocessed_resized_image, axis=0) tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze( true_image_shape, axis=0) groundtruth_bel_F = tensor_dict[fields.InputDataFields.groundtruth_bel_F] groundtruth_bel_O = tensor_dict[fields.InputDataFields.groundtruth_bel_O] groundtruth_z_max_detections = tensor_dict[fields.InputDataFields.groundtruth_z_max_detections] groundtruth_z_min_observations = tensor_dict[fields.InputDataFields.groundtruth_z_min_observations] groundtruth_bel_U = tensor_dict[fields.InputDataFields.groundtruth_bel_U] groundtruth_z_min_detections = tensor_dict[fields.InputDataFields.groundtruth_z_min_detections] groundtruth_detections_drivingCorridor = tensor_dict[fields.InputDataFields.groundtruth_detections_drivingCorridor] groundtruth_intensity = tensor_dict[fields.InputDataFields.groundtruth_intensity] groundtruth_bel_F = tf.expand_dims(tf.squeeze(groundtruth_bel_F, axis=2), axis=0) _, resized_groundtruth_bel_F, _ = image_resizer_fn(image, groundtruth_bel_F) # resized_groundtruth_bel_F = image_resizer_fn(groundtruth_bel_F) groundtruth_bel_O = tf.expand_dims(tf.squeeze(groundtruth_bel_O, axis=2), axis=0) _, resized_groundtruth_bel_O, _ = image_resizer_fn(image, groundtruth_bel_O) # resized_groundtruth_bel_O = image_resizer_fn(groundtruth_bel_O) groundtruth_z_max_detections = tf.expand_dims(tf.squeeze(groundtruth_z_max_detections, axis=2), axis=0) _, resized_groundtruth_z_max_detections, _ = image_resizer_fn(image, groundtruth_z_max_detections) groundtruth_z_min_observations = tf.expand_dims(tf.squeeze(groundtruth_z_min_observations, axis=2), axis=0) _, resized_groundtruth_z_min_observations, _ = image_resizer_fn(image, groundtruth_z_min_observations) groundtruth_bel_U = tf.expand_dims(tf.squeeze(groundtruth_bel_U, axis=2), axis=0) _, resized_groundtruth_bel_U, _ = image_resizer_fn(image, groundtruth_bel_U) groundtruth_z_min_detections = tf.expand_dims(tf.squeeze(groundtruth_z_min_detections, axis=2), axis=0) _, resized_groundtruth_z_min_detections, _ = image_resizer_fn(image, groundtruth_z_min_detections) groundtruth_detections_drivingCorridor = tf.expand_dims(tf.squeeze(groundtruth_detections_drivingCorridor, axis=2), axis=0) _, resized_groundtruth_detections_drivingCorridor, _ = image_resizer_fn(image, groundtruth_detections_drivingCorridor) groundtruth_intensity = tf.expand_dims(tf.squeeze(groundtruth_intensity, axis=2), axis=0) _, resized_groundtruth_intensity, _ = image_resizer_fn(image, groundtruth_intensity) tensor_dict[fields.InputDataFields.groundtruth_bel_F] = tf.expand_dims(tf.squeeze( resized_groundtruth_bel_F, axis=0), axis=2) tensor_dict[fields.InputDataFields.groundtruth_bel_O] = tf.expand_dims(tf.squeeze( resized_groundtruth_bel_O, axis=0), axis=2) tensor_dict[fields.InputDataFields.groundtruth_z_min_observations] = tf.expand_dims(tf.squeeze( resized_groundtruth_z_min_observations, axis=0), axis=2) tensor_dict[fields.InputDataFields.groundtruth_z_max_detections] = tf.expand_dims(tf.squeeze( resized_groundtruth_z_max_detections, axis=0), axis=2) tensor_dict[fields.InputDataFields.groundtruth_bel_U] = tf.expand_dims(tf.squeeze( resized_groundtruth_bel_U, axis=0), axis=2) tensor_dict[fields.InputDataFields.groundtruth_detections_drivingCorridor] = tf.expand_dims(tf.squeeze( resized_groundtruth_detections_drivingCorridor, axis=0), axis=2) tensor_dict[fields.InputDataFields.groundtruth_z_min_detections] = tf.expand_dims(tf.squeeze( resized_groundtruth_z_min_detections, axis=0), axis=2) tensor_dict[fields.InputDataFields.groundtruth_intensity] = tf.expand_dims(tf.squeeze( resized_groundtruth_intensity, axis=0), axis=2) # Transform groundtruth classes to one hot encodings. label_offset = 1 zero_indexed_groundtruth_classes = tensor_dict[ fields.InputDataFields.groundtruth_classes] - label_offset tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot( zero_indexed_groundtruth_classes, num_classes) if use_multiclass_scores: tensor_dict[fields.InputDataFields.groundtruth_classes] = tensor_dict[ fields.InputDataFields.multiclass_scores] tensor_dict.pop(fields.InputDataFields.multiclass_scores, None) if fields.InputDataFields.groundtruth_confidences in tensor_dict: groundtruth_confidences = tensor_dict[ fields.InputDataFields.groundtruth_confidences] # Map the confidences to the one-hot encoding of classes tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( tf.reshape(groundtruth_confidences, [-1, 1]) * tensor_dict[fields.InputDataFields.groundtruth_classes]) else: groundtruth_confidences = tf.ones_like( zero_indexed_groundtruth_classes, dtype=tf.float32) tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( tensor_dict[fields.InputDataFields.groundtruth_classes]) if merge_multiple_boxes: merged_boxes, merged_classes, merged_confidences, _ = ( util_ops.merge_boxes_with_multiple_labels( tensor_dict[fields.InputDataFields.groundtruth_boxes], zero_indexed_groundtruth_classes, groundtruth_confidences, num_classes)) merged_classes = tf.cast(merged_classes, tf.float32) tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( merged_confidences) if fields.InputDataFields.groundtruth_boxes in tensor_dict: tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape( tensor_dict[fields.InputDataFields.groundtruth_boxes])[0] # if fields.InputDataFields.groundtruth_bel_F in tensor_dict: # channels = tensor_dict[fields.InputDataFields.groundtruth_bel_F] # tensor_dict[fields.InputDataFields.groundtruth_bel_F] = tf.concat( # [tensor_dict[fields.InputDataFields.groundtruth_bel_F], channels], axis=2) # """ValueError: Can't concatenate scalars (use tf.stack instead) for 'concat_10' (op: 'ConcatV2') with input shapes: [], [], [].""" # if fields.InputDataFields.groundtruth_bel_O in tensor_dict: # channels = tensor_dict[fields.InputDataFields.groundtruth_bel_O] # tensor_dict[fields.InputDataFields.groundtruth_bel_O] = tf.concat( # [tensor_dict[fields.InputDataFields.groundtruth_bel_O], channels], axis=2) # """ValueError: Can't concatenate scalars (use tf.stack instead) for 'concat_10' (op: 'ConcatV2') with input shapes: [], [], [].""" return tensor_dict
def transform_input_data(tensor_dict, model_preprocess_fn, image_resizer_fn, num_classes, data_augmentation_fn=None, merge_multiple_boxes=False, retain_original_image=False, use_bfloat16=False): """A single function that is responsible for all input data transformations. Data transformation functions are applied in the following order. 1. If key fields.InputDataFields.image_additional_channels is present in tensor_dict, the additional channels will be merged into fields.InputDataFields.image. 2. data_augmentation_fn (optional): applied on tensor_dict. 3. model_preprocess_fn: applied only on image tensor in tensor_dict. 4. image_resizer_fn: applied on original image and instance mask tensor in tensor_dict. 5. one_hot_encoding: applied to classes tensor in tensor_dict. 6. merge_multiple_boxes (optional): when groundtruth boxes are exactly the same they can be merged into a single box with an associated k-hot class label. Args: tensor_dict: dictionary containing input tensors keyed by fields.InputDataFields. model_preprocess_fn: model's preprocess function to apply on image tensor. This function must take in a 4-D float tensor and return a 4-D preprocess float tensor and a tensor containing the true image shape. image_resizer_fn: image resizer function to apply on groundtruth instance `masks. This function must take a 3-D float tensor of an image and a 3-D tensor of instance masks and return a resized version of these along with the true shapes. num_classes: number of max classes to one-hot (or k-hot) encode the class labels. data_augmentation_fn: (optional) data augmentation function to apply on input `tensor_dict`. merge_multiple_boxes: (optional) whether to merge multiple groundtruth boxes and classes for a given image if the boxes are exactly the same. retain_original_image: (optional) whether to retain original image in the output dictionary. use_bfloat16: (optional) a bool, whether to use bfloat16 in training. Returns: A dictionary keyed by fields.InputDataFields containing the tensors obtained after applying all the transformations. """ if fields.InputDataFields.groundtruth_boxes in tensor_dict: tensor_dict = util_ops.filter_groundtruth_with_nan_box_coordinates( tensor_dict) if fields.InputDataFields.image_additional_channels in tensor_dict: channels = tensor_dict[fields.InputDataFields.image_additional_channels] tensor_dict[fields.InputDataFields.image] = tf.concat( [tensor_dict[fields.InputDataFields.image], channels], axis=2) if retain_original_image: tensor_dict[fields.InputDataFields.original_image] = tf.cast( image_resizer_fn(tensor_dict[fields.InputDataFields.image], None)[0], tf.uint8) # Apply data augmentation ops. if data_augmentation_fn is not None: tensor_dict = data_augmentation_fn(tensor_dict) # Apply model preprocessing ops and resize instance masks. image = tensor_dict[fields.InputDataFields.image] preprocessed_resized_image, true_image_shape = model_preprocess_fn( tf.expand_dims(tf.to_float(image), axis=0)) if use_bfloat16: preprocessed_resized_image = tf.cast( preprocessed_resized_image, tf.bfloat16) tensor_dict[fields.InputDataFields.image] = tf.squeeze( preprocessed_resized_image, axis=0) tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze( true_image_shape, axis=0) if fields.InputDataFields.groundtruth_instance_masks in tensor_dict: masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks] _, resized_masks, _ = image_resizer_fn(image, masks) if use_bfloat16: resized_masks = tf.cast(resized_masks, tf.bfloat16) tensor_dict[fields.InputDataFields. groundtruth_instance_masks] = resized_masks # Transform groundtruth classes to one hot encodings. label_offset = 1 zero_indexed_groundtruth_classes = tensor_dict[ fields.InputDataFields.groundtruth_classes] - label_offset tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot( zero_indexed_groundtruth_classes, num_classes) if fields.InputDataFields.groundtruth_confidences in tensor_dict: groundtruth_confidences = tensor_dict[ fields.InputDataFields.groundtruth_confidences] tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( tf.sparse_to_dense( zero_indexed_groundtruth_classes, [num_classes], groundtruth_confidences, validate_indices=False)) else: groundtruth_confidences = tf.ones_like( zero_indexed_groundtruth_classes, dtype=tf.float32) tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( tensor_dict[fields.InputDataFields.groundtruth_classes]) if merge_multiple_boxes: merged_boxes, merged_classes, merged_confidences, _ = ( util_ops.merge_boxes_with_multiple_labels( tensor_dict[fields.InputDataFields.groundtruth_boxes], zero_indexed_groundtruth_classes, groundtruth_confidences, num_classes)) merged_classes = tf.cast(merged_classes, tf.float32) tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( merged_confidences) if fields.InputDataFields.groundtruth_boxes in tensor_dict: tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape( tensor_dict[fields.InputDataFields.groundtruth_boxes])[0] return tensor_dict
def transform_input_data(tensor_dict, model_preprocess_fn, image_resizer_fn, num_classes, data_augmentation_fn=None, merge_multiple_boxes=False, retain_original_image=False, use_multiclass_scores=False, use_bfloat16=False, retain_original_image_additional_channels=False, keypoint_type_weight=None): """A single function that is responsible for all input data transformations. Data transformation functions are applied in the following order. 1. If key fields.InputDataFields.image_additional_channels is present in tensor_dict, the additional channels will be merged into fields.InputDataFields.image. 2. data_augmentation_fn (optional): applied on tensor_dict. 3. model_preprocess_fn: applied only on image tensor in tensor_dict. 4. keypoint_type_weight (optional): If groundtruth keypoints are in the tensor dictionary, per-keypoint weights are produced. These weights are initialized by `keypoint_type_weight` (or ones if left None). Then, for all keypoints that are not visible, the weights are set to 0 (to avoid penalizing the model in a loss function). 5. image_resizer_fn: applied on original image and instance mask tensor in tensor_dict. 6. one_hot_encoding: applied to classes tensor in tensor_dict. 7. merge_multiple_boxes (optional): when groundtruth boxes are exactly the same they can be merged into a single box with an associated k-hot class label. Args: tensor_dict: dictionary containing input tensors keyed by fields.InputDataFields. model_preprocess_fn: model's preprocess function to apply on image tensor. This function must take in a 4-D float tensor and return a 4-D preprocess float tensor and a tensor containing the true image shape. image_resizer_fn: image resizer function to apply on groundtruth instance `masks. This function must take a 3-D float tensor of an image and a 3-D tensor of instance masks and return a resized version of these along with the true shapes. num_classes: number of max classes to one-hot (or k-hot) encode the class labels. data_augmentation_fn: (optional) data augmentation function to apply on input `tensor_dict`. merge_multiple_boxes: (optional) whether to merge multiple groundtruth boxes and classes for a given image if the boxes are exactly the same. retain_original_image: (optional) whether to retain original image in the output dictionary. use_multiclass_scores: whether to use multiclass scores as class targets instead of one-hot encoding of `groundtruth_classes`. When this is True and multiclass_scores is empty, one-hot encoding of `groundtruth_classes` is used as a fallback. use_bfloat16: (optional) a bool, whether to use bfloat16 in training. retain_original_image_additional_channels: (optional) Whether to retain original image additional channels in the output dictionary. keypoint_type_weight: A list (of length num_keypoints) containing groundtruth loss weights to use for each keypoint. If None, will use a weight of 1. Returns: A dictionary keyed by fields.InputDataFields containing the tensors obtained after applying all the transformations. """ out_tensor_dict = tensor_dict.copy() if fields.InputDataFields.multiclass_scores in out_tensor_dict: out_tensor_dict[ fields.InputDataFields. multiclass_scores] = _multiclass_scores_or_one_hot_labels( out_tensor_dict[fields.InputDataFields.multiclass_scores], out_tensor_dict[fields.InputDataFields.groundtruth_boxes], out_tensor_dict[fields.InputDataFields.groundtruth_classes], num_classes) if fields.InputDataFields.groundtruth_boxes in out_tensor_dict: out_tensor_dict = util_ops.filter_groundtruth_with_nan_box_coordinates( out_tensor_dict) out_tensor_dict = util_ops.filter_unrecognized_classes(out_tensor_dict) if retain_original_image: out_tensor_dict[fields.InputDataFields.original_image] = tf.cast( image_resizer_fn(out_tensor_dict[fields.InputDataFields.image], None)[0], tf.uint8) if fields.InputDataFields.image_additional_channels in out_tensor_dict: channels = out_tensor_dict[ fields.InputDataFields.image_additional_channels] out_tensor_dict[fields.InputDataFields.image] = tf.concat( [out_tensor_dict[fields.InputDataFields.image], channels], axis=2) if retain_original_image_additional_channels: out_tensor_dict[ fields.InputDataFields.image_additional_channels] = tf.cast( image_resizer_fn(channels, None)[0], tf.uint8) # Apply data augmentation ops. if data_augmentation_fn is not None: out_tensor_dict = data_augmentation_fn(out_tensor_dict) # Apply model preprocessing ops and resize instance masks. image = out_tensor_dict[fields.InputDataFields.image] preprocessed_resized_image, true_image_shape = model_preprocess_fn( tf.expand_dims(tf.cast(image, dtype=tf.float32), axis=0)) preprocessed_shape = tf.shape(preprocessed_resized_image) new_height, new_width = preprocessed_shape[1], preprocessed_shape[2] im_box = tf.stack([ 0.0, 0.0, tf.to_float(new_height) / tf.to_float(true_image_shape[0, 0]), tf.to_float(new_width) / tf.to_float(true_image_shape[0, 1]) ]) if fields.InputDataFields.groundtruth_boxes in tensor_dict: bboxes = out_tensor_dict[fields.InputDataFields.groundtruth_boxes] boxlist = box_list.BoxList(bboxes) realigned_bboxes = box_list_ops.change_coordinate_frame( boxlist, im_box) out_tensor_dict[ fields.InputDataFields.groundtruth_boxes] = realigned_bboxes.get() if fields.InputDataFields.groundtruth_keypoints in tensor_dict: keypoints = out_tensor_dict[ fields.InputDataFields.groundtruth_keypoints] realigned_keypoints = keypoint_ops.change_coordinate_frame( keypoints, im_box) out_tensor_dict[ fields.InputDataFields.groundtruth_keypoints] = realigned_keypoints flds_gt_kpt = fields.InputDataFields.groundtruth_keypoints flds_gt_kpt_vis = fields.InputDataFields.groundtruth_keypoint_visibilities flds_gt_kpt_weights = fields.InputDataFields.groundtruth_keypoint_weights if flds_gt_kpt_vis not in out_tensor_dict: out_tensor_dict[flds_gt_kpt_vis] = tf.ones_like( out_tensor_dict[flds_gt_kpt][:, :, 0], dtype=tf.bool) out_tensor_dict[flds_gt_kpt_weights] = ( keypoint_ops.keypoint_weights_from_visibilities( out_tensor_dict[flds_gt_kpt_vis], keypoint_type_weight)) if use_bfloat16: preprocessed_resized_image = tf.cast(preprocessed_resized_image, tf.bfloat16) out_tensor_dict[fields.InputDataFields.image] = tf.squeeze( preprocessed_resized_image, axis=0) out_tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze( true_image_shape, axis=0) if fields.InputDataFields.groundtruth_instance_masks in out_tensor_dict: masks = out_tensor_dict[ fields.InputDataFields.groundtruth_instance_masks] _, resized_masks, _ = image_resizer_fn(image, masks) if use_bfloat16: resized_masks = tf.cast(resized_masks, tf.bfloat16) out_tensor_dict[ fields.InputDataFields.groundtruth_instance_masks] = resized_masks label_offset = 1 zero_indexed_groundtruth_classes = out_tensor_dict[ fields.InputDataFields.groundtruth_classes] - label_offset if use_multiclass_scores: out_tensor_dict[ fields.InputDataFields.groundtruth_classes] = out_tensor_dict[ fields.InputDataFields.multiclass_scores] else: out_tensor_dict[ fields.InputDataFields.groundtruth_classes] = tf.one_hot( zero_indexed_groundtruth_classes, num_classes) out_tensor_dict.pop(fields.InputDataFields.multiclass_scores, None) if fields.InputDataFields.groundtruth_confidences in out_tensor_dict: groundtruth_confidences = out_tensor_dict[ fields.InputDataFields.groundtruth_confidences] # Map the confidences to the one-hot encoding of classes out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( tf.reshape(groundtruth_confidences, [-1, 1]) * out_tensor_dict[fields.InputDataFields.groundtruth_classes]) else: groundtruth_confidences = tf.ones_like( zero_indexed_groundtruth_classes, dtype=tf.float32) out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( out_tensor_dict[fields.InputDataFields.groundtruth_classes]) if merge_multiple_boxes: merged_boxes, merged_classes, merged_confidences, _ = ( util_ops.merge_boxes_with_multiple_labels( out_tensor_dict[fields.InputDataFields.groundtruth_boxes], zero_indexed_groundtruth_classes, groundtruth_confidences, num_classes)) merged_classes = tf.cast(merged_classes, tf.float32) out_tensor_dict[ fields.InputDataFields.groundtruth_boxes] = merged_boxes out_tensor_dict[ fields.InputDataFields.groundtruth_classes] = merged_classes out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( merged_confidences) if fields.InputDataFields.groundtruth_boxes in out_tensor_dict: out_tensor_dict[ fields.InputDataFields.num_groundtruth_boxes] = tf.shape( out_tensor_dict[fields.InputDataFields.groundtruth_boxes])[0] return out_tensor_dict
def transform_input_data(tensor_dict, model_preprocess_fn, image_resizer_fn, num_classes, data_augmentation_fn=None, merge_multiple_boxes=False, retain_original_image=False, use_multiclass_scores=False, use_bfloat16=False): out_tensor_dict = tensor_dict.copy() if fields.InputDataFields.multiclass_scores in out_tensor_dict: out_tensor_dict[ fields.InputDataFields. multiclass_scores] = _multiclass_scores_or_one_hot_labels( out_tensor_dict[fields.InputDataFields.multiclass_scores], out_tensor_dict[fields.InputDataFields.groundtruth_boxes], out_tensor_dict[fields.InputDataFields.groundtruth_classes], num_classes) if fields.InputDataFields.groundtruth_boxes in out_tensor_dict: out_tensor_dict = util_ops.filter_groundtruth_with_nan_box_coordinates( out_tensor_dict) out_tensor_dict = util_ops.filter_unrecognized_classes(out_tensor_dict) if retain_original_image: out_tensor_dict[fields.InputDataFields.original_image] = tf.cast( image_resizer_fn(out_tensor_dict[fields.InputDataFields.image], None)[0], tf.uint8) if fields.InputDataFields.image_additional_channels in out_tensor_dict: channels = out_tensor_dict[ fields.InputDataFields.image_additional_channels] out_tensor_dict[fields.InputDataFields.image] = tf.concat( [out_tensor_dict[fields.InputDataFields.image], channels], axis=2) # Apply data augmentation ops. if data_augmentation_fn is not None: out_tensor_dict = data_augmentation_fn(out_tensor_dict) # Apply model preprocessing ops and resize instance masks. image = out_tensor_dict[fields.InputDataFields.image] preprocessed_resized_image, true_image_shape = model_preprocess_fn( tf.expand_dims(tf.cast(image, dtype=tf.float32), axis=0)) if use_bfloat16: preprocessed_resized_image = tf.cast(preprocessed_resized_image, tf.bfloat16) out_tensor_dict[fields.InputDataFields.image] = tf.squeeze( preprocessed_resized_image, axis=0) out_tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze( true_image_shape, axis=0) if fields.InputDataFields.groundtruth_instance_masks in out_tensor_dict: masks = out_tensor_dict[ fields.InputDataFields.groundtruth_instance_masks] _, resized_masks, _ = image_resizer_fn(image, masks) if use_bfloat16: resized_masks = tf.cast(resized_masks, tf.bfloat16) out_tensor_dict[ fields.InputDataFields.groundtruth_instance_masks] = resized_masks label_offset = 1 zero_indexed_groundtruth_classes = out_tensor_dict[ fields.InputDataFields.groundtruth_classes] - label_offset if use_multiclass_scores: out_tensor_dict[ fields.InputDataFields.groundtruth_classes] = out_tensor_dict[ fields.InputDataFields.multiclass_scores] else: out_tensor_dict[ fields.InputDataFields.groundtruth_classes] = tf.one_hot( zero_indexed_groundtruth_classes, num_classes) out_tensor_dict.pop(fields.InputDataFields.multiclass_scores, None) if fields.InputDataFields.groundtruth_confidences in out_tensor_dict: groundtruth_confidences = out_tensor_dict[ fields.InputDataFields.groundtruth_confidences] # Map the confidences to the one-hot encoding of classes out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( tf.reshape(groundtruth_confidences, [-1, 1]) * out_tensor_dict[fields.InputDataFields.groundtruth_classes]) else: groundtruth_confidences = tf.ones_like( zero_indexed_groundtruth_classes, dtype=tf.float32) out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( out_tensor_dict[fields.InputDataFields.groundtruth_classes]) if merge_multiple_boxes: merged_boxes, merged_classes, merged_confidences, _ = ( util_ops.merge_boxes_with_multiple_labels( out_tensor_dict[fields.InputDataFields.groundtruth_boxes], zero_indexed_groundtruth_classes, groundtruth_confidences, num_classes)) merged_classes = tf.cast(merged_classes, tf.float32) out_tensor_dict[ fields.InputDataFields.groundtruth_boxes] = merged_boxes out_tensor_dict[ fields.InputDataFields.groundtruth_classes] = merged_classes out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( merged_confidences) if fields.InputDataFields.groundtruth_boxes in out_tensor_dict: out_tensor_dict[ fields.InputDataFields.num_groundtruth_boxes] = tf.shape( out_tensor_dict[fields.InputDataFields.groundtruth_boxes])[0] return out_tensor_dict