def _extract_prediction_tensors(model, create_input_dict_fn, image_root, ignore_groundtruth=False): """Restores the model in a tensorflow session. Args: model: model to perform predictions with. create_input_dict_fn: function to create input tensor dictionaries. ignore_groundtruth: whether groundtruth should be ignored. Returns: tensor_dict: A tensor dictionary with evaluations. """ # input_dict = create_input_dict_fn() # prefetch_queue = prefetcher.prefetch(input_dict, capacity=500) # input_dict = prefetch_queue.dequeue() ## ##########################################3 input_queue = _create_input_queue(batch_size_per_clone = 1, create_tensor_dict_fn = create_input_dict_fn, detection_model = model, batch_queue_capacity = 10, num_batch_queue_threads = 8, prefetch_queue_capacity = 10, image_path = image_root) (images, groundtruth_boxes, groundtruth_classes, original_image) = _get_inputs(input_queue) model.provide_groundtruth(groundtruth_boxes, groundtruth_classes, None) prediction_dict = model.predict(images) detections = model.postprocess(prediction_dict) # original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0) # preprocessed_image = model.preprocess(tf.to_float(original_image)) # prediction_dict = model.predict(preprocessed_image) # detections = model.postprocess(prediction_dict) original_image_shape = tf.shape(original_image) absolute_detection_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)), original_image_shape[2], original_image_shape[3]) absolute_groundtruth_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(tf.squeeze(groundtruth_boxes, axis=0)), original_image_shape[2], original_image_shape[3]) label_id_offset = 1 tensor_dict = { 'original_image': original_image, 'detection_boxes': absolute_detection_boxlist.get(), 'groundtruth_boxes': absolute_groundtruth_boxlist.get(), 'detection_scores': tf.squeeze(detections['detection_scores'], axis=0), 'detection_classes': ( tf.squeeze(detections['detection_classes'], axis=0) + label_id_offset), } return tensor_dict
def _get_instance_embeddings(self, boxes, instance_embedding): """Return the instance embeddings from bounding box centers. Args: boxes: A [num_instances, 4] float tensor holding bounding boxes. The coordinates are in normalized input space. instance_embedding: A [height, width, embedding_size] float tensor containing the instance embeddings. Returns: instance_embeddings: A [num_instances, embedding_size] shaped float tensor containing the center embedding for each instance. """ blist = box_list.BoxList(boxes) output_height = tf.shape(instance_embedding)[0] output_width = tf.shape(instance_embedding)[1] blist_output = box_list_ops.to_absolute_coordinates(blist, output_height, output_width, check_range=False) (y_center_output, x_center_output, _, _) = blist_output.get_center_coordinates_and_sizes() center_coords_output = tf.stack([y_center_output, x_center_output], axis=1) center_coords_output_int = tf.cast(center_coords_output, tf.int32) center_latents = tf.gather_nd(instance_embedding, center_coords_output_int) return center_latents
def build_test_graph(model, model_scope, reuse=None, weights_dict=None): input_init_gt_box = tf.constant(np.zeros((1, 4)), dtype=tf.float32) # input_init_image = tf.constant(init_img_array, dtype=tf.uint8) input_init_image = tf.placeholder(dtype=tf.uint8, shape=[128, 128, 3]) input_cur_image = tf.placeholder(dtype=tf.uint8, shape=[300, 300, 3]) init_gt_box = tf.reshape(input_init_gt_box, shape=[1, 1, 4]) groundtruth_classes = tf.ones(dtype=tf.float32, shape=[1, 1, 1]) float_init_image = tf.to_float(input_init_image) float_init_image = tf.expand_dims(tf.expand_dims(float_init_image, axis=0), axis=0) preprocessed_init_image = model.preprocess(float_init_image, [128, 128]) images = tf.expand_dims(input_cur_image, axis=0) float_images = tf.to_float(images) preprocessed_images = model.preprocess(float_images) preprocessed_images = tf.expand_dims(preprocessed_images, axis=0) model.provide_groundtruth(init_gt_box, groundtruth_classes, None) with tf.variable_scope(model_scope, reuse=reuse): prediction_dict = model.predict(preprocessed_init_image, preprocessed_images, istraining=False, reuse=reuse) detections = model.postprocess(prediction_dict) original_image_shape = tf.shape(preprocessed_images) absolute_detection_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)), original_image_shape[2], original_image_shape[3]) return absolute_detection_boxlist.get( ), detections['detection_scores'], input_cur_image, input_init_image
def _absolute_boxes(normalized_boxes): absolute_detection_boxlist_list = [ box_list_ops.to_absolute_coordinates( box_list.BoxList(tf.squeeze(k, axis=0)), original_image_shape[1], original_image_shape[2]) for k in tf.split(normalized_boxes, k_shot) ] return tf.stack([db.get() for db in absolute_detection_boxlist_list])
def graph_fn(): coordinates = tf.constant([[0, 0, 100, 100], [25, 25, 75, 75]], tf.float32) img = tf.ones((128, 100, 100, 3)) boxlist = box_list.BoxList(coordinates) absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist, tf.shape(img)[1], tf.shape(img)[2]) return absolute_boxlist.get()
def graph_fn(): img = tf.ones((128, 202, 202, 3)) boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32)) boxlist = box_list_ops.to_absolute_coordinates(boxlist, tf.shape(img)[1], tf.shape(img)[2]) boxlist = box_list_ops.to_normalized_coordinates(boxlist, tf.shape(img)[1], tf.shape(img)[2]) return boxlist.get()
def graph_fn(): coordinates = tf.constant([[0, 0, 1.2, 1.2], [0.25, 0.25, 0.75, 0.75]], tf.float32) img = tf.ones((128, 100, 100, 3)) boxlist = box_list.BoxList(coordinates) absolute_boxlist = box_list_ops.to_absolute_coordinates( boxlist, tf.shape(img)[1], tf.shape(img)[2], maximum_normalized_coordinate=1.1) return absolute_boxlist.get()
def test_to_absolute_coordinates_already_abolute(self): coordinates = tf.constant([[0, 0, 100, 100], [25, 25, 75, 75]], tf.float32) img = tf.ones((128, 100, 100, 3)) boxlist = box_list.BoxList(coordinates) absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist, tf.shape(img)[1], tf.shape(img)[2]) with self.test_session() as sess: with self.assertRaisesOpError('assertion failed'): sess.run(absolute_boxlist.get())
def test_to_absolute_coordinates_already_abolute(self): coordinates = tf.constant([[0, 0, 100, 100], [25, 25, 75, 75]], tf.float32) img = tf.ones((128, 100, 100, 3)) boxlist = box_list.BoxList(coordinates) absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist, tf.shape(img)[1], tf.shape(img)[2]) with self.test_session() as sess: with self.assertRaisesOpError('assertion failed'): sess.run(absolute_boxlist.get())
def test_to_absolute_coordinates_maximum_coordinate_check(self): coordinates = tf.constant([[0, 0, 1.2, 1.2], [0.25, 0.25, 0.75, 0.75]], tf.float32) img = tf.ones((128, 100, 100, 3)) boxlist = box_list.BoxList(coordinates) absolute_boxlist = box_list_ops.to_absolute_coordinates( boxlist, tf.shape(img)[1], tf.shape(img)[2], maximum_normalized_coordinate=1.1) with self.test_session() as sess: with self.assertRaisesOpError('assertion failed'): sess.run(absolute_boxlist.get())
def test_to_absolute_coordinates(self): coordinates = tf.constant([[0, 0, 1, 1], [0.25, 0.25, 0.75, 0.75]], tf.float32) img = tf.ones((128, 100, 100, 3)) boxlist = box_list.BoxList(coordinates) absolute_boxlist = box_list_ops.to_absolute_coordinates( boxlist, tf.shape(img)[1], tf.shape(img)[2]) expected_boxes = [[0, 0, 100, 100], [25, 25, 75, 75]] with self.test_session() as sess: absolute_boxes = sess.run(absolute_boxlist.get()) self.assertAllClose(absolute_boxes, expected_boxes)
def test_to_absolute_coordinates(self): coordinates = tf.constant([[0, 0, 1, 1], [0.25, 0.25, 0.75, 0.75]], tf.float32) img = tf.ones((128, 100, 100, 3)) boxlist = box_list.BoxList(coordinates) absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist, tf.shape(img)[1], tf.shape(img)[2]) expected_boxes = [[0, 0, 100, 100], [25, 25, 75, 75]] with self.test_session() as sess: absolute_boxes = sess.run(absolute_boxlist.get()) self.assertAllClose(absolute_boxes, expected_boxes)
def test_to_absolute_coordinates_maximum_coordinate_check(self): coordinates = tf.constant([[0, 0, 1.2, 1.2], [0.25, 0.25, 0.75, 0.75]], tf.float32) img = tf.ones((128, 100, 100, 3)) boxlist = box_list.BoxList(coordinates) absolute_boxlist = box_list_ops.to_absolute_coordinates( boxlist, tf.shape(img)[1], tf.shape(img)[2], maximum_normalized_coordinate=1.1) with self.test_session() as sess: with self.assertRaisesOpError('assertion failed'): sess.run(absolute_boxlist.get())
def transform_boxes(elems): boxes_per_image, true_image_shape = elems blist = box_list.BoxList(boxes_per_image) # First transform boxes from image space to resized image space since # there may have paddings in the resized images. blist = box_list_ops.scale( blist, true_image_shape[0] / resized_image_height, true_image_shape[1] / resized_image_width) # Then transform boxes from resized image space (normalized) to the # feature map space (absolute). blist = box_list_ops.to_absolute_coordinates(blist, height, width, check_range=False) return blist.get()
def test_convert_to_absolute_and_back(self): coordinates = np.random.uniform(size=(100, 4)) coordinates = np.sort(coordinates) coordinates[99, :] = [0, 0, 1, 1] img = tf.ones((128, 202, 202, 3)) boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32)) boxlist = box_list_ops.to_absolute_coordinates(boxlist, tf.shape(img)[1], tf.shape(img)[2]) boxlist = box_list_ops.to_normalized_coordinates(boxlist, tf.shape(img)[1], tf.shape(img)[2]) with self.test_session() as sess: out = sess.run(boxlist.get()) self.assertAllClose(out, coordinates)
def test_convert_to_absolute_and_back(self): coordinates = np.random.uniform(size=(100, 4)) coordinates = np.sort(coordinates) coordinates[99, :] = [0, 0, 1, 1] img = tf.ones((128, 202, 202, 3)) boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32)) boxlist = box_list_ops.to_absolute_coordinates(boxlist, tf.shape(img)[1], tf.shape(img)[2]) boxlist = box_list_ops.to_normalized_coordinates(boxlist, tf.shape(img)[1], tf.shape(img)[2]) with self.test_session() as sess: out = sess.run(boxlist.get()) self.assertAllClose(out, coordinates)
def fill_boxes(boxes, height, width): """Fills the area included in the box.""" blist = box_list.BoxList(boxes) blist = box_list_ops.to_absolute_coordinates(blist, height, width) boxes = blist.get() ymin, xmin, ymax, xmax = tf.unstack(boxes[:, tf.newaxis, tf.newaxis, :], 4, axis=3) ygrid, xgrid = tf.meshgrid(tf.range(height), tf.range(width), indexing='ij') ygrid, xgrid = tf.cast(ygrid, tf.float32), tf.cast(xgrid, tf.float32) ygrid, xgrid = ygrid[tf.newaxis, :, :], xgrid[tf.newaxis, :, :] filled_boxes = tf.logical_and(tf.logical_and(ygrid >= ymin, ygrid <= ymax), tf.logical_and(xgrid >= xmin, xgrid <= xmax)) return tf.cast(filled_boxes, tf.float32)
def _format_groundtruth_data(self, image_shape, to_absolute=True): """Helper function for preparing groundtruth data for target assignment. In order to be consistent with the model.DetectionModel interface, groundtruth boxes are specified in normalized coordinates and classes are specified as label indices with no assumed background category. To prepare for target assignment, we: 1) convert boxes to absolute coordinates, 2) add a background class at class index 0 Args: image_shape: A 1-D int32 tensor of shape [4] representing the shape of the input image batch. Returns: groundtruth_boxlists: A list of BoxLists containing (absolute) coordinates of the groundtruth boxes. groundtruth_classes_with_background_list: A list of 2-D one-hot (or k-hot) tensors of shape [num_boxes, num_classes+1] containing the class targets with the 0th index assumed to map to the background class. """ if to_absolute: groundtruth_boxlists = [ box_list_ops.to_absolute_coordinates(box_list.BoxList(boxes), image_shape[1], image_shape[2]) for boxes in self.groundtruth_lists(fields.BoxListFields.boxes) ] else: groundtruth_boxlists = tf.concat( self.groundtruth_lists(fields.BoxListFields.boxes), 0) groundtruth_classes_with_background_list = [ tf.to_float(one_hot_encoding) #tf.to_float( # tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT')) for one_hot_encoding in self.groundtruth_lists( fields.BoxListFields.classes) ] return groundtruth_boxlists, groundtruth_classes_with_background_list
def _extract_prediction_tensors(model, create_input_dict_fn, ignore_groundtruth=False, provide_groundtruth_to_model=False, calc_loss=False): """Restores the model in a tensorflow session. Args: model: model to perform predictions with. create_input_dict_fn: function to create input tensor dictionaries. ignore_groundtruth: whether groundtruth should be ignored. provide_groundtruth_to_model: whether to use model.provide_groundtruth() Returns: tensor_dict: A tensor dictionary with evaluations. """ mtl = model._mtl input_dict = create_input_dict_fn() prefetch_queue = prefetcher.prefetch(input_dict, capacity=500) input_dict = prefetch_queue.dequeue() if calc_loss or mtl.window or mtl.edgemask: provide_groundtruth_to_model = True # Get groundtruth information if provide_groundtruth_to_model: (_, groundtruth_boxes_list, groundtruth_ignore_list, groundtruth_classes_list, groundtruth_masks_list, _, window_boxes_list, window_classes_list, groundtruth_closeness_list, groundtruth_edgemask_list) = _get_inputs([input_dict], model.num_classes, with_filename=False) if any(mask is None for mask in groundtruth_masks_list): groundtruth_masks_list = None model.provide_groundtruth(groundtruth_boxes_list, groundtruth_classes_list, groundtruth_closeness_list, groundtruth_ignore_list, groundtruth_masks_list) model.provide_window(window_boxes_list, window_classes_list) model.provide_edgemask(groundtruth_edgemask_list) original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0) preprocessed_image = model.preprocess(tf.to_float(original_image)) prediction_dict = model.predict(preprocessed_image) if mtl.window: prediction_dict = model.predict_with_window(prediction_dict) if mtl.edgemask: prediction_dict = model.predict_edgemask(prediction_dict) if mtl.refine: prediction_dict = model.predict_with_mtl_results(prediction_dict) detections = model.postprocess(prediction_dict) original_image_shape = tf.shape(original_image) absolute_detection_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)), original_image_shape[1], original_image_shape[2]) label_id_offset = 1 tensor_dict = { 'original_image': original_image, 'image_id': input_dict[fields.InputDataFields.source_id], 'detection_boxes': absolute_detection_boxlist.get(), 'detection_scores': tf.squeeze(detections['detection_scores'], axis=0), 'detection_classes': (tf.squeeze(detections['detection_classes'], axis=0) + label_id_offset), } if 'detection_thresholds' in detections: tensor_dict['detection_thresholds'] = \ tf.squeeze(detections['detection_thresholds'], axis=0) if 'detection_masks' in detections: detection_masks = tf.squeeze(detections['detection_masks'], axis=0) detection_boxes = tf.squeeze(detections['detection_boxes'], axis=0) # TODO: This should be done in model's postprocess function ideally. detection_masks_reframed = ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, original_image_shape[1], original_image_shape[2]) detection_masks_reframed = tf.to_float( tf.greater(detection_masks_reframed, 0.5)) tensor_dict['detection_masks'] = detection_masks_reframed # load groundtruth fields into tensor_dict if not ignore_groundtruth: normalized_gt_boxlist = box_list.BoxList( input_dict[fields.InputDataFields.groundtruth_boxes]) gt_boxlist = box_list_ops.scale(normalized_gt_boxlist, tf.shape(original_image)[1], tf.shape(original_image)[2]) groundtruth_boxes = gt_boxlist.get() groundtruth_classes = input_dict[ fields.InputDataFields.groundtruth_classes] tensor_dict['groundtruth_boxes'] = groundtruth_boxes tensor_dict['groundtruth_classes'] = groundtruth_classes tensor_dict['area'] = input_dict[ fields.InputDataFields.groundtruth_area] tensor_dict['difficult'] = input_dict[ fields.InputDataFields.groundtruth_difficult] if 'detection_masks' in tensor_dict: tensor_dict['groundtruth_instance_masks'] = input_dict[ fields.InputDataFields.groundtruth_instance_masks] # Subset annotations if fields.InputDataFields.groundtruth_subset in input_dict: tensor_dict['groundtruth_subset'] \ = input_dict[fields.InputDataFields.groundtruth_subset] if calc_loss: losses_dict = model.loss(prediction_dict) for loss_name, loss_tensor in losses_dict.iteritems(): loss_tensor = tf.check_numerics(loss_tensor, '%s is inf or nan.' % loss_name, name='Loss/' + loss_name) tensor_dict['Loss/' + loss_name] = loss_tensor # mtl groundtruth if mtl.window: tensor_dict['window_classes_gt'] = input_dict[ fields.InputDataFields.window_classes] tensor_dict['window_classes_dt'] = prediction_dict[ 'window_class_predictions'] if mtl.closeness: tensor_dict['closeness_gt'] = input_dict[ fields.InputDataFields.groundtruth_closeness] tensor_dict['closeness_dt'] = prediction_dict['closeness_predictions'] if mtl.edgemask: tensor_dict['edgemask_gt'] = input_dict[ fields.InputDataFields.groundtruth_edgemask_masks] tensor_dict['edgemask_dt'] = prediction_dict['edgemask_predictions'] return tensor_dict
def _format_groundtruth_data(self, image_shape): groundtruth_boxlists = [ box_list_ops.to_absolute_coordinates( box_list.BoxList(boxes), image_shape[1], image_shape[2], check_range=False) for boxes in self.groundtruth_lists(fields.BoxListFields.boxes)] return groundtruth_boxlists
def _create_losses(input_queue, create_model_fn): """Creates loss function for a DetectionModel. Args: input_queue: BatchQueue object holding enqueued tensor_dicts. create_model_fn: A function to create the DetectionModel. """ detection_model = create_model_fn() (original_images, filenames, groundtruth_boxes_list, groundtruth_classes_list, groundtruth_transcriptions_list, groundtruth_masks_list ) = _get_inputs(input_queue, detection_model.num_classes) images = [detection_model.preprocess(image) for image in original_images] images = tf.concat(images, 0) if any(mask is None for mask in groundtruth_masks_list): groundtruth_masks_list = None tf.summary.image('InputImage', images, max_outputs=99999) print '' print '_create_losses' print original_images print images print groundtruth_boxes_list print groundtruth_classes_list print groundtruth_transcriptions_list sys.stdout.flush() detection_model.provide_groundtruth(groundtruth_boxes_list, groundtruth_classes_list, groundtruth_masks_list, groundtruth_transcriptions_list = groundtruth_transcriptions_list) prediction_dict = detection_model.predict(images) losses_dict = detection_model.loss(prediction_dict) for name, loss_tensor in losses_dict.iteritems(): tf.summary.scalar(name, loss_tensor) tf.losses.add_loss(loss_tensor) print losses_dict sys.stdout.flush() # Metrics for sequence accuracy if prediction_dict['transcriptions'] is not None: tf.summary.scalar('CharAccuracy', metrics.char_accuracy(prediction_dict['transcriptions'], prediction_dict['transcriptions_groundtruth'])) tf.summary.scalar('SequenceAccuracy', metrics.sequence_accuracy(prediction_dict['transcriptions'], prediction_dict['transcriptions_groundtruth'])) return # All the rest is for debugging and testing during training purpose. # Metrics for detection detections = detection_model.postprocess(prediction_dict) original_images = original_images[0] filenames = filenames[0] original_image_shape = tf.shape(original_images) absolute_detection_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)), original_image_shape[1], original_image_shape[2]) label_id_offset = 1 det_boxes = absolute_detection_boxlist.get() det_scores = tf.squeeze(detections['detection_scores'], axis=0) det_classes = tf.ones_like(det_scores) det_transcriptions = tf.squeeze(detections['detection_transcriptions'], axis=0) print '' print 'Metrics printing' print groundtruth_boxes_list print groundtruth_classes_list print groundtruth_transcriptions_list normalized_gt_boxlist = box_list.BoxList(groundtruth_boxes_list[0]) gt_boxlist = box_list_ops.scale(normalized_gt_boxlist, original_image_shape[1], original_image_shape[2]) gt_boxes = gt_boxlist.get() gt_classes = groundtruth_classes_list[0] gt_transcriptions = groundtruth_transcriptions_list[0] print original_images print filenames print det_boxes print det_scores print det_classes print det_transcriptions print gt_boxes print gt_classes print gt_transcriptions #images = tf.Print(images, [groundtruth_boxes_list[0], xx, tf.shape(original_images[0])], message='groundtruthboxes', summarize=10000) sys.stdout.flush() mAP = tf.py_func(eval_wrapper, [original_images, filenames, det_boxes, det_scores, det_classes, det_transcriptions, gt_boxes, gt_classes, gt_transcriptions, tf.train.get_global_step()], tf.float64, stateful=False) tf.summary.scalar('mAP', mAP)
def _scale_box_to_absolute(args): boxes, height, width = args return box_list_ops.to_absolute_coordinates(box_list.BoxList(boxes), height, width).get()
def _scale_box_to_absolute(args): boxes, image_shape = args return box_list_ops.to_absolute_coordinates(box_list.BoxList(boxes), image_shape[0], image_shape[1]).get()
def _scale_box_to_absolute(args): boxes, image_shape = args return box_list_ops.to_absolute_coordinates( box_list.BoxList(boxes), image_shape[0], image_shape[1]).get()
def _extract_prediction_tensors(model, create_input_dict_fn, ignore_groundtruth=False): """Restores the model in a tensorflow session. Args: model: model to perform predictions with. create_input_dict_fn: function to create input tensor dictionaries. ignore_groundtruth: whether groundtruth should be ignored. Returns: tensor_dict: A tensor dictionary with evaluations. """ input_dict = create_input_dict_fn() prefetch_queue = prefetcher.prefetch(input_dict, capacity=500) # TODO input_dict = prefetch_queue.dequeue() original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0) next_image = input_dict.get(fields.InputDataFields.next_image) image_input = tf.to_float(original_image) if next_image is not None: next_image = tf.to_float(next_image) image_input = tf.concat( [image_input, tf.expand_dims(next_image, 0)], 3) depth = input_dict.get(fields.InputDataFields.groundtruth_depth) next_depth = input_dict.get( fields.InputDataFields.groundtruth_next_depth) image_input.set_shape([1, None, None, 6]) if depth is not None and next_depth is not None: camera_intrinsics = input_dict[ fields.InputDataFields.camera_intrinsics] coords = motion_util.get_3D_coords(tf.expand_dims(depth, 0), camera_intrinsics) next_coords = motion_util.get_3D_coords( tf.expand_dims(next_depth, 0), camera_intrinsics) image_input = tf.concat([image_input, coords, next_coords], 3) image_input.set_shape([1, None, None, 12]) preprocessed_image = model.preprocess(image_input) prediction_dict = model.predict(preprocessed_image) detections = model.postprocess(prediction_dict) original_image_shape = tf.shape(original_image) absolute_detection_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)), original_image_shape[1], original_image_shape[2]) label_id_offset = 1 tensor_dict = { 'original_image': original_image, 'image_id': input_dict[fields.InputDataFields.source_id], 'detection_boxes': absolute_detection_boxlist.get(), 'detection_scores': tf.squeeze(detections['detection_scores'], axis=0), 'detection_classes': (tf.squeeze(detections['detection_classes'], axis=0) + label_id_offset), } if 'detection_masks' in detections: detection_masks = tf.squeeze(detections['detection_masks'], axis=0) detection_boxes = tf.squeeze(detections['detection_boxes'], axis=0) # TODO: This should be done in model's postprocess function ideally. detection_masks_reframed = ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, original_image_shape[1], original_image_shape[2]) detection_masks_reframed = tf.to_float( tf.greater(detection_masks_reframed, 0.5)) tensor_dict['detection_masks'] = detection_masks_reframed if 'detection_motions' in detections: detection_motions = tf.squeeze(detections['detection_motions'], axis=0) detection_motions_with_matrices = ( motion_util.postprocess_detection_motions(detection_motions, keep_logits=False)) tensor_dict['detection_motions'] = detection_motions_with_matrices if 'camera_motion' in detections: camera_motion_with_matrices = tf.squeeze( motion_util.postprocess_camera_motion(detections['camera_motion']), axis=0) tensor_dict['camera_motion'] = camera_motion_with_matrices tensor_dict['groundtruth_camera_motion'] = input_dict[ fields.InputDataFields.groundtruth_camera_motion] # load groundtruth fields into tensor_dict if not ignore_groundtruth: normalized_gt_boxlist = box_list.BoxList( input_dict[fields.InputDataFields.groundtruth_boxes]) gt_boxlist = box_list_ops.scale(normalized_gt_boxlist, tf.shape(original_image)[1], tf.shape(original_image)[2]) groundtruth_boxes = gt_boxlist.get() groundtruth_classes = input_dict[ fields.InputDataFields.groundtruth_classes] tensor_dict['groundtruth_boxes'] = groundtruth_boxes tensor_dict['groundtruth_classes'] = groundtruth_classes tensor_dict['area'] = input_dict[ fields.InputDataFields.groundtruth_area] tensor_dict['is_crowd'] = input_dict[ fields.InputDataFields.groundtruth_is_crowd] tensor_dict['difficult'] = input_dict[ fields.InputDataFields.groundtruth_difficult] if 'detection_masks' in tensor_dict: tensor_dict['groundtruth_instance_masks'] = input_dict[ fields.InputDataFields.groundtruth_instance_masks] if 'detection_motions' in tensor_dict: tensor_dict['groundtruth_camera_motion'] = input_dict[ fields.InputDataFields.groundtruth_camera_motion] tensor_dict['groundtruth_instance_motions'] = input_dict[ fields.InputDataFields.groundtruth_instance_motions] tensor_dict['camera_intrinsics'] = input_dict[ fields.InputDataFields.camera_intrinsics] if fields.InputDataFields.groundtruth_flow in input_dict: tensor_dict['groundtruth_flow'] = input_dict[ fields.InputDataFields.groundtruth_flow] if not 'depth' in tensor_dict: tensor_dict['depth'] = input_dict[ fields.InputDataFields.groundtruth_depth] else: tensor_dict['groundtruth_depth'] = input_dict[ fields.InputDataFields.groundtruth_depth] return tensor_dict
def _extract_prediction_tensors(model, create_input_dict_fn, ignore_groundtruth=False): """Restores the model in a tensorflow session. Args: model: model to perform predictions with. create_input_dict_fn: function to create input tensor dictionaries. ignore_groundtruth: whether groundtruth should be ignored. Returns: tensor_dict: A tensor dictionary with evaluations. """ input_dict = create_input_dict_fn() prefetch_queue = prefetcher.prefetch(input_dict, capacity=500) input_dict = prefetch_queue.dequeue() original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0) preprocessed_image = model.preprocess(tf.to_float(original_image)) prediction_dict = model.predict(preprocessed_image) detections = model.postprocess(prediction_dict) original_image_shape = tf.shape(original_image) absolute_detection_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)), original_image_shape[1], original_image_shape[2]) label_id_offset = 1 tensor_dict = { 'original_image': original_image, 'image_id': input_dict[fields.InputDataFields.source_id], 'detection_boxes': absolute_detection_boxlist.get(), 'detection_scores': tf.squeeze(detections['detection_scores'], axis=0), 'detection_classes': ( tf.squeeze(detections['detection_classes'], axis=0) + label_id_offset), } if 'detection_masks' in detections: detection_masks = tf.squeeze(detections['detection_masks'], axis=0) detection_boxes = tf.squeeze(detections['detection_boxes'], axis=0) # TODO: This should be done in model's postprocess function ideally. detection_masks_reframed = ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, original_image_shape[1], original_image_shape[2]) detection_masks_reframed = tf.to_float(tf.greater(detection_masks_reframed, 0.5)) tensor_dict['detection_masks'] = detection_masks_reframed # load groundtruth fields into tensor_dict if not ignore_groundtruth: normalized_gt_boxlist = box_list.BoxList( input_dict[fields.InputDataFields.groundtruth_boxes]) gt_boxlist = box_list_ops.scale(normalized_gt_boxlist, tf.shape(original_image)[1], tf.shape(original_image)[2]) groundtruth_boxes = gt_boxlist.get() groundtruth_classes = input_dict[fields.InputDataFields.groundtruth_classes] tensor_dict['groundtruth_boxes'] = groundtruth_boxes tensor_dict['groundtruth_classes'] = groundtruth_classes tensor_dict['area'] = input_dict[fields.InputDataFields.groundtruth_area] tensor_dict['is_crowd'] = input_dict[ fields.InputDataFields.groundtruth_is_crowd] tensor_dict['difficult'] = input_dict[ fields.InputDataFields.groundtruth_difficult] if 'detection_masks' in tensor_dict: tensor_dict['groundtruth_instance_masks'] = input_dict[ fields.InputDataFields.groundtruth_instance_masks] return tensor_dict
def _to_absolute_coordinates(normalized_boxes): return box_list_ops.to_absolute_coordinates( box_list.BoxList(normalized_boxes), image_shape[1], image_shape[2], check_range=False).get()
def result_dict_for_single_example(image, key, detections, groundtruth=None, class_agnostic=False, scale_to_absolute=False): """Merges all detection and groundtruth information for a single example. Note that evaluation tools require classes that are 1-indexed, and so this function performs the offset. If `class_agnostic` is True, all output classes have label 1. Args: image: A single 4D uint8 image tensor of shape [1, H, W, C]. key: A single string tensor identifying the image. detections: A dictionary of detections, returned from DetectionModel.postprocess(). groundtruth: (Optional) Dictionary of groundtruth items, with fields: 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in normalized coordinates. 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) 'groundtruth_instance_masks': 3D int64 tensor of instance masks (Optional). class_agnostic: Boolean indicating whether the detections are class-agnostic (i.e. binary). Default False. scale_to_absolute: Boolean indicating whether boxes and keypoints should be scaled to absolute coordinates. Note that for IoU based evaluations, it does not matter whether boxes are expressed in absolute or relative coordinates. Default False. Returns: A dictionary with: 'original_image': A [1, H, W, C] uint8 image tensor. 'key': A string tensor with image identifier. 'detection_boxes': [max_detections, 4] float32 tensor of boxes, in normalized or absolute coordinates, depending on the value of `scale_to_absolute`. 'detection_scores': [max_detections] float32 tensor of scores. 'detection_classes': [max_detections] int64 tensor of 1-indexed classes. 'detection_masks': [max_detections, H, W] float32 tensor of binarized masks, reframed to full image masks. 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in normalized or absolute coordinates, depending on the value of `scale_to_absolute`. (Optional) 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. (Optional) 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) 'groundtruth_instance_masks': 3D int64 tensor of instance masks (Optional). """ label_id_offset = 1 # Applying label id offset (b/63711816) input_data_fields = fields.InputDataFields output_dict = { input_data_fields.original_image: image, input_data_fields.key: key, } detection_fields = fields.DetectionResultFields detection_boxes = detections[detection_fields.detection_boxes][0] image_shape = tf.shape(image) detection_scores = detections[detection_fields.detection_scores][0] if class_agnostic: detection_classes = tf.ones_like(detection_scores, dtype=tf.int64) else: detection_classes = ( tf.to_int64(detections[detection_fields.detection_classes][0]) + label_id_offset) num_detections = tf.to_int32(detections[detection_fields.num_detections][0]) detection_boxes = tf.slice( detection_boxes, begin=[0, 0], size=[num_detections, -1]) detection_classes = tf.slice( detection_classes, begin=[0], size=[num_detections]) detection_scores = tf.slice( detection_scores, begin=[0], size=[num_detections]) if scale_to_absolute: absolute_detection_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(detection_boxes), image_shape[1], image_shape[2]) output_dict[detection_fields.detection_boxes] = ( absolute_detection_boxlist.get()) else: output_dict[detection_fields.detection_boxes] = detection_boxes output_dict[detection_fields.detection_classes] = detection_classes output_dict[detection_fields.detection_scores] = detection_scores if detection_fields.detection_masks in detections: detection_masks = detections[detection_fields.detection_masks][0] # TODO(rathodv): This should be done in model's postprocess # function ideally. detection_masks = tf.slice( detection_masks, begin=[0, 0, 0], size=[num_detections, -1, -1]) detection_masks_reframed = ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image_shape[1], image_shape[2]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) output_dict[detection_fields.detection_masks] = detection_masks_reframed if detection_fields.detection_keypoints in detections: detection_keypoints = detections[detection_fields.detection_keypoints][0] output_dict[detection_fields.detection_keypoints] = detection_keypoints if scale_to_absolute: absolute_detection_keypoints = keypoint_ops.scale( detection_keypoints, image_shape[1], image_shape[2]) output_dict[detection_fields.detection_keypoints] = ( absolute_detection_keypoints) if groundtruth: if input_data_fields.groundtruth_instance_masks in groundtruth: groundtruth[input_data_fields.groundtruth_instance_masks] = tf.cast( groundtruth[input_data_fields.groundtruth_instance_masks], tf.uint8) output_dict.update(groundtruth) if scale_to_absolute: groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes] absolute_gt_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(groundtruth_boxes), image_shape[1], image_shape[2]) output_dict[input_data_fields.groundtruth_boxes] = ( absolute_gt_boxlist.get()) # For class-agnostic models, groundtruth classes all become 1. if class_agnostic: groundtruth_classes = groundtruth[input_data_fields.groundtruth_classes] groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64) output_dict[input_data_fields.groundtruth_classes] = groundtruth_classes return output_dict
def _extract_prediction_tensors(model, create_input_dict_fn, ignore_groundtruth=False): """Restores the model in a tensorflow session. Args: model: model to perform predictions with. create_input_dict_fn: function to create input tensor dictionaries. ignore_groundtruth: whether groundtruth should be ignored. Returns: tensor_dict: A tensor dictionary with evaluations. """ input_dict = create_input_dict_fn() prefetch_queue = prefetcher.prefetch(input_dict, capacity=500) input_dict = prefetch_queue.dequeue() original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0) preprocessed_image = model.preprocess(tf.to_float(original_image)) prediction_dict = model.predict(preprocessed_image) detections = model.postprocess(prediction_dict) original_image_shape = tf.shape(original_image) absolute_detection_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)), original_image_shape[1], original_image_shape[2]) label_id_offset = 1 tensor_dict = { 'original_image': original_image, 'image_id': input_dict[fields.InputDataFields.source_id], 'detection_boxes': absolute_detection_boxlist.get(), 'detection_scores': tf.squeeze(detections['detection_scores'], axis=0), 'detection_classes': ( tf.squeeze(detections['detection_classes'], axis=0) + label_id_offset), } if 'detection_masks' in detections: detection_masks = tf.squeeze(detections['detection_masks'], axis=0) detection_boxes = tf.squeeze(detections['detection_boxes'], axis=0) # TODO: This should be done in model's postprocess function ideally. detection_masks_reframed = ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, original_image_shape[1], original_image_shape[2]) detection_masks_reframed = tf.to_float(tf.greater(detection_masks_reframed, 0.5)) tensor_dict['detection_masks'] = detection_masks_reframed # load groundtruth fields into tensor_dict if not ignore_groundtruth: normalized_gt_boxlist = box_list.BoxList( input_dict[fields.InputDataFields.groundtruth_boxes]) gt_boxlist = box_list_ops.scale(normalized_gt_boxlist, tf.shape(original_image)[1], tf.shape(original_image)[2]) groundtruth_boxes = gt_boxlist.get() groundtruth_classes = input_dict[fields.InputDataFields.groundtruth_classes] tensor_dict['groundtruth_boxes'] = groundtruth_boxes tensor_dict['groundtruth_classes'] = groundtruth_classes tensor_dict['area'] = input_dict[fields.InputDataFields.groundtruth_area] tensor_dict['is_crowd'] = input_dict[ fields.InputDataFields.groundtruth_is_crowd] tensor_dict['difficult'] = input_dict[ fields.InputDataFields.groundtruth_difficult] if 'detection_masks' in tensor_dict: tensor_dict['groundtruth_instance_masks'] = input_dict[ fields.InputDataFields.groundtruth_instance_masks] return tensor_dict
def _to_absolute_coordinates(normalized_boxes): return box_list_ops.to_absolute_coordinates( box_list.BoxList(normalized_boxes), image_shape[1], image_shape[2], check_range=False).get()
def result_dict_for_single_example(image, key, detections, groundtruth=None, class_agnostic=False, scale_to_absolute=False): """Merges all detection and groundtruth information for a single example. Note that evaluation tools require classes that are 1-indexed, and so this function performs the offset. If `class_agnostic` is True, all output classes have label 1. Args: image: A single 4D uint8 image tensor of shape [1, H, W, C]. key: A single string tensor identifying the image. detections: A dictionary of detections, returned from DetectionModel.postprocess(). groundtruth: (Optional) Dictionary of groundtruth items, with fields: 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in normalized coordinates. 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) 'groundtruth_instance_masks': 3D int64 tensor of instance masks (Optional). class_agnostic: Boolean indicating whether the detections are class-agnostic (i.e. binary). Default False. scale_to_absolute: Boolean indicating whether boxes and keypoints should be scaled to absolute coordinates. Note that for IoU based evaluations, it does not matter whether boxes are expressed in absolute or relative coordinates. Default False. Returns: A dictionary with: 'original_image': A [1, H, W, C] uint8 image tensor. 'key': A string tensor with image identifier. 'detection_boxes': [max_detections, 4] float32 tensor of boxes, in normalized or absolute coordinates, depending on the value of `scale_to_absolute`. 'detection_scores': [max_detections] float32 tensor of scores. 'detection_classes': [max_detections] int64 tensor of 1-indexed classes. 'detection_masks': [max_detections, H, W] float32 tensor of binarized masks, reframed to full image masks. 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in normalized or absolute coordinates, depending on the value of `scale_to_absolute`. (Optional) 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. (Optional) 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) 'groundtruth_instance_masks': 3D int64 tensor of instance masks (Optional). """ label_id_offset = 1 # Applying label id offset (b/63711816) input_data_fields = fields.InputDataFields output_dict = { input_data_fields.original_image: image, input_data_fields.key: key, } detection_fields = fields.DetectionResultFields detection_boxes = detections[detection_fields.detection_boxes][0] image_shape = tf.shape(image) detection_scores = detections[detection_fields.detection_scores][0] if class_agnostic: detection_classes = tf.ones_like(detection_scores, dtype=tf.int64) else: detection_classes = ( tf.to_int64(detections[detection_fields.detection_classes][0]) + label_id_offset) num_detections = tf.to_int32(detections[detection_fields.num_detections][0]) detection_boxes = tf.slice( detection_boxes, begin=[0, 0], size=[num_detections, -1]) detection_classes = tf.slice( detection_classes, begin=[0], size=[num_detections]) detection_scores = tf.slice( detection_scores, begin=[0], size=[num_detections]) if scale_to_absolute: absolute_detection_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(detection_boxes), image_shape[1], image_shape[2]) output_dict[detection_fields.detection_boxes] = ( absolute_detection_boxlist.get()) else: output_dict[detection_fields.detection_boxes] = detection_boxes output_dict[detection_fields.detection_classes] = detection_classes output_dict[detection_fields.detection_scores] = detection_scores if detection_fields.detection_masks in detections: detection_masks = detections[detection_fields.detection_masks][0] # TODO(rathodv): This should be done in model's postprocess # function ideally. detection_masks = tf.slice( detection_masks, begin=[0, 0, 0], size=[num_detections, -1, -1]) detection_masks_reframed = ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image_shape[1], image_shape[2]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) output_dict[detection_fields.detection_masks] = detection_masks_reframed if detection_fields.detection_keypoints in detections: detection_keypoints = detections[detection_fields.detection_keypoints][0] output_dict[detection_fields.detection_keypoints] = detection_keypoints if scale_to_absolute: absolute_detection_keypoints = keypoint_ops.scale( detection_keypoints, image_shape[1], image_shape[2]) output_dict[detection_fields.detection_keypoints] = ( absolute_detection_keypoints) if groundtruth: if input_data_fields.groundtruth_instance_masks in groundtruth: groundtruth[input_data_fields.groundtruth_instance_masks] = tf.cast( groundtruth[input_data_fields.groundtruth_instance_masks], tf.uint8) output_dict.update(groundtruth) if scale_to_absolute: groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes] absolute_gt_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(groundtruth_boxes), image_shape[1], image_shape[2]) output_dict[input_data_fields.groundtruth_boxes] = ( absolute_gt_boxlist.get()) # For class-agnostic models, groundtruth classes all become 1. if class_agnostic: groundtruth_classes = groundtruth[input_data_fields.groundtruth_classes] groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64) output_dict[input_data_fields.groundtruth_classes] = groundtruth_classes return output_dict