def test_to_absolute_coordinates_already_abolute(self): coordinates = tf.constant([[0, 0, 100, 100], [25, 25, 75, 75]], tf.float32) img = tf.ones((128, 100, 100, 3)) boxlist = box_list.BoxList(coordinates) absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist, tf.shape(img)[1], tf.shape(img)[2]) with self.test_session() as sess: with self.assertRaisesOpError('assertion failed'): sess.run(absolute_boxlist.get())
def test_to_absolute_coordinates(self): coordinates = tf.constant([[0, 0, 1, 1], [0.25, 0.25, 0.75, 0.75]], tf.float32) img = tf.ones((128, 100, 100, 3)) boxlist = box_list.BoxList(coordinates) absolute_boxlist = box_list_ops.to_absolute_coordinates( boxlist, tf.shape(img)[1], tf.shape(img)[2]) expected_boxes = [[0, 0, 100, 100], [25, 25, 75, 75]] with self.test_session() as sess: absolute_boxes = sess.run(absolute_boxlist.get()) self.assertAllClose(absolute_boxes, expected_boxes)
def test_convert_to_absolute_and_back(self): coordinates = np.random.uniform(size=(100, 4)) coordinates = np.sort(coordinates) coordinates[99, :] = [0, 0, 1, 1] img = tf.ones((128, 202, 202, 3)) boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32)) boxlist = box_list_ops.to_absolute_coordinates(boxlist, tf.shape(img)[1], tf.shape(img)[2]) boxlist = box_list_ops.to_normalized_coordinates(boxlist, tf.shape(img)[1], tf.shape(img)[2]) with self.test_session() as sess: out = sess.run(boxlist.get()) self.assertAllClose(out, coordinates)
def result_dict_for_single_example(image, key, detections, groundtruth=None, class_agnostic=False, scale_to_absolute=False): """Merges all detection and groundtruth information for a single example. Note that evaluation tools require classes that are 1-indexed, and so this function performs the offset. If `class_agnostic` is True, all output classes have label 1. Args: image: A single 4D image tensor of shape [1, H, W, C]. key: A single string tensor identifying the image. detections: A dictionary of detections, returned from DetectionModel.postprocess(). groundtruth: (Optional) Dictionary of groundtruth items, with fields: 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in normalized coordinates. 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) 'groundtruth_instance_masks': 3D int64 tensor of instance masks (Optional). class_agnostic: Boolean indicating whether the detections are class-agnostic (i.e. binary). Default False. scale_to_absolute: Boolean indicating whether boxes and keypoints should be scaled to absolute coordinates. Note that for IoU based evaluations, it does not matter whether boxes are expressed in absolute or relative coordinates. Default False. Returns: A dictionary with: 'original_image': A [1, H, W, C] uint8 image tensor. 'key': A string tensor with image identifier. 'detection_boxes': [max_detections, 4] float32 tensor of boxes, in normalized or absolute coordinates, depending on the value of `scale_to_absolute`. 'detection_scores': [max_detections] float32 tensor of scores. 'detection_classes': [max_detections] int64 tensor of 1-indexed classes. 'detection_masks': [max_detections, H, W] float32 tensor of binarized masks, reframed to full image masks. 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in normalized or absolute coordinates, depending on the value of `scale_to_absolute`. (Optional) 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. (Optional) 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) 'groundtruth_instance_masks': 3D int64 tensor of instance masks (Optional). """ label_id_offset = 1 # Applying label id offset (b/63711816) input_data_fields = fields.InputDataFields() output_dict = { input_data_fields.original_image: image, input_data_fields.key: key, } detection_fields = fields.DetectionResultFields detection_boxes = detections[detection_fields.detection_boxes][0] output_dict[detection_fields.detection_boxes] = detection_boxes image_shape = tf.shape(image) if scale_to_absolute: absolute_detection_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(detection_boxes), image_shape[1], image_shape[2]) output_dict[detection_fields.detection_boxes] = ( absolute_detection_boxlist.get()) detection_scores = detections[detection_fields.detection_scores][0] output_dict[detection_fields.detection_scores] = detection_scores if class_agnostic: detection_classes = tf.ones_like(detection_scores, dtype=tf.int64) else: detection_classes = ( tf.to_int64(detections[detection_fields.detection_classes][0]) + label_id_offset) output_dict[detection_fields.detection_classes] = detection_classes if detection_fields.detection_masks in detections: detection_masks = detections[detection_fields.detection_masks][0] # TODO: This should be done in model's postprocess # function ideally. num_detections = tf.to_int32( detections[detection_fields.num_detections][0]) detection_boxes = tf.slice(detection_boxes, begin=[0, 0], size=[num_detections, -1]) detection_masks = tf.slice(detection_masks, begin=[0, 0, 0], size=[num_detections, -1, -1]) detection_masks_reframed = ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image_shape[1], image_shape[2]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) output_dict[ detection_fields.detection_masks] = detection_masks_reframed if detection_fields.detection_keypoints in detections: detection_keypoints = detections[ detection_fields.detection_keypoints][0] output_dict[detection_fields.detection_keypoints] = detection_keypoints if scale_to_absolute: absolute_detection_keypoints = keypoint_ops.scale( detection_keypoints, image_shape[1], image_shape[2]) output_dict[detection_fields.detection_keypoints] = ( absolute_detection_keypoints) if groundtruth: if input_data_fields.groundtruth_instance_masks in groundtruth: groundtruth[ input_data_fields.groundtruth_instance_masks] = tf.cast( groundtruth[input_data_fields.groundtruth_instance_masks], tf.uint8) output_dict.update(groundtruth) if scale_to_absolute: groundtruth_boxes = groundtruth[ input_data_fields.groundtruth_boxes] absolute_gt_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(groundtruth_boxes), image_shape[1], image_shape[2]) output_dict[input_data_fields.groundtruth_boxes] = ( absolute_gt_boxlist.get()) # For class-agnostic models, groundtruth classes all become 1. if class_agnostic: groundtruth_classes = groundtruth[ input_data_fields.groundtruth_classes] groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64) output_dict[ input_data_fields.groundtruth_classes] = groundtruth_classes return output_dict
def _to_absolute_coordinates(normalized_boxes): return box_list_ops.to_absolute_coordinates( box_list.BoxList(normalized_boxes), image_shape[1], image_shape[2], check_range=False).get()
def _predict(self, prediction_dict, true_image_shapes): # Postprocess FasterRCNN stage 2 detection_model = self.detection_model detections_dict = detection_model._postprocess_box_classifier( prediction_dict['refined_box_encodings'], prediction_dict['class_predictions_with_background'], prediction_dict['proposal_boxes'], prediction_dict['num_proposals'], true_image_shapes) prediction_dict.update(detections_dict) detection_boxes = detections_dict[ fields.DetectionResultFields.detection_boxes][0] detection_scores = detections_dict[ fields.DetectionResultFields.detection_scores][0] detection_transcriptions = None num_detections = tf.cast( detections_dict[fields.DetectionResultFields.num_detections], tf.int32) rpn_features_to_crop = prediction_dict['rpn_features_to_crop'] # rpn_features_to_crop = tf.Print(rpn_features_to_crop, [tf.shape(rpn_features_to_crop)], message="The size of the Feature Map is", summarize=9999) if detection_model._is_training: gt_boxlists, gt_classes, _, gt_weights, gt_transcriptions = detection_model._format_groundtruth_data( true_image_shapes, stage='transcription') # gt_transcriptions = tf.Print(gt_transcriptions, [gt_transcriptions, tf.shape(gt_transcriptions)], message="CRNN received this transcr.", summarize=99999) detection_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(detection_boxes), true_image_shapes[0, 0], true_image_shapes[0, 1]) detection_boxlist.add_field(fields.BoxListFields.scores, detection_scores) (_, cls_weights, _, _, match) = self.target_assigner.assign( detection_boxlist, gt_boxlists[0], gt_classes[0], unmatched_class_label=tf.constant( [1] + detection_model._num_classes * [0], dtype=tf.float32), groundtruth_weights=gt_weights[0]) detection_transcriptions = match.gather_based_on_match( gt_transcriptions[0], '', '') # detection_transcriptions = tf.Print(detection_transcriptions, [detection_transcriptions], message="These are the matched GTs transcr.", summarize=99999) detection_boxlist.add_field(fields.BoxListFields.transcription, detection_transcriptions) positive_indicator = match.matched_column_indicator() # positive_indicator = tf.Print(positive_indicator, [positive_indicator], message="positive_indicator", summarize=99999) valid_indicator = tf.logical_and( tf.range(detection_boxlist.num_boxes()) < num_detections, cls_weights > 0) sampled_indices = detection_model._second_stage_sampler.subsample( valid_indicator, self.batch_size, positive_indicator, stage="transcription") def compute_loss(): sampled_boxlist = box_list_ops.boolean_mask( detection_boxlist, sampled_indices) sampled_padded_boxlist = box_list_ops.pad_or_clip_box_list( sampled_boxlist, num_boxes=self.batch_size) detection_boxes = sampled_padded_boxlist.get() detection_transcriptions = sampled_padded_boxlist.get_field( fields.BoxListFields.transcription) # detection_transcriptions = tf.Print(detection_transcriptions, [detection_transcriptions], message="These are the subsampled GTs transcr.", summarize=99999) detection_scores = sampled_padded_boxlist.get_field( fields.BoxListFields.scores) num_detections = tf.minimum(sampled_boxlist.num_boxes(), self.batch_size) transcriptions_dict, eval_metric_ops = self._predict_lstm( rpn_features_to_crop, detection_boxes, detection_transcriptions, detection_scores, num_detections) return [ self.loss(transcriptions_dict), (transcriptions_dict, eval_metric_ops) ] fail = tf.Print(tf.constant(0, dtype=tf.float32), [], message="Not enough boxes to train CRNN") return tf.cond(tf.equal(tf.shape(sampled_indices)[0], 0), lambda: [fail, ({}, None)], compute_loss) # return self._predict_lstm(rpn_features_to_crop, detection_boxes, detection_transcriptions, # detection_scores, num_detections) return [ tf.constant(0, dtype=tf.float32), self._predict_lstm(rpn_features_to_crop, detection_boxes, detection_transcriptions, detection_scores, num_detections) ]
def _extract_prediction_tensors(model, create_input_dict_fn, ignore_groundtruth=False, preprocess_input_options=None): """Restores the model in a tensorflow session. Args: model: model to perform predictions with. create_input_dict_fn: function to create input tensor dictionaries. ignore_groundtruth: whether groundtruth should be ignored. preprocess_input_options: a list of tuples, where each tuple contains a preprocess input function and a dictionary containing arguments and their values (see preprocessor_input.py). Returns: tensor_dict: A tensor dictionary with evaluations. """ input_dict = create_input_dict_fn() prefetch_queue = prefetcher.prefetch(input_dict, capacity=500) input_dict = prefetch_queue.dequeue() images = tf.expand_dims(input_dict[fields.InputDataFields.image], 0) float_images = tf.to_float(images) input_dict[fields.InputDataFields.image] = float_images if preprocess_input_options: input_dict = preprocessor_input.preprocess(input_dict, preprocess_input_options) original_image = input_dict[fields.InputDataFields.image] preprocessed_image = model.preprocess(original_image) prediction_dict = model.predict(preprocessed_image) detections = model.postprocess(prediction_dict) original_image_shape = tf.shape(original_image) if model.is_rbbox: absolute_detection_boxlist = rbox_list_ops.to_absolute_coordinates( rbox_list.RBoxList(tf.squeeze(detections['detection_boxes'], axis=0)), original_image_shape[1], original_image_shape[2]) else: absolute_detection_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)), original_image_shape[1], original_image_shape[2]) label_id_offset = 1 tensor_dict = { 'original_image': original_image, 'image_id': input_dict[fields.InputDataFields.source_id], 'filename': input_dict[fields.InputDataFields.filename], 'sensor': input_dict[fields.InputDataFields.sensor], 'detection_boxes': absolute_detection_boxlist.get(), 'detection_scores': tf.squeeze(detections['detection_scores'], axis=0), 'detection_classes': ( tf.squeeze(detections['detection_classes'], axis=0) + label_id_offset), } if 'detection_masks' in detections: detection_masks = tf.squeeze(detections['detection_masks'], axis=0) detection_boxes = tf.squeeze(detections['detection_boxes'], axis=0) # TODO: This should be done in model's postprocess function ideally. detection_masks_reframed = ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, original_image_shape[1], original_image_shape[2]) detection_masks_reframed = tf.to_float(tf.greater(detection_masks_reframed, 0.5)) tensor_dict['detection_masks'] = detection_masks_reframed # load groundtruth fields into tensor_dict if not ignore_groundtruth: if model.is_rbbox: normalized_gt_boxlist = rbox_list.RBoxList(input_dict[fields.InputDataFields.groundtruth_rboxes]) gt_boxlist = rbox_list_ops.scale(normalized_gt_boxlist, tf.shape(original_image)[1], tf.shape(original_image)[2]) else: normalized_gt_boxlist = box_list.BoxList(input_dict[fields.InputDataFields.groundtruth_boxes]) gt_boxlist = box_list_ops.scale(normalized_gt_boxlist, tf.shape(original_image)[1], tf.shape(original_image)[2]) groundtruth_boxes = gt_boxlist.get() groundtruth_classes = input_dict[fields.InputDataFields.groundtruth_classes] tensor_dict['groundtruth_boxes'] = groundtruth_boxes tensor_dict['groundtruth_classes'] = groundtruth_classes tensor_dict['area'] = input_dict[fields.InputDataFields.groundtruth_area] tensor_dict['is_crowd'] = input_dict[fields.InputDataFields.groundtruth_is_crowd] tensor_dict['difficult'] = input_dict[fields.InputDataFields.groundtruth_difficult] if 'detection_masks' in tensor_dict: tensor_dict['groundtruth_instance_masks'] = input_dict[ fields.InputDataFields.groundtruth_instance_masks] return tensor_dict