Ejemplo n.º 1
0
  def test_to_absolute_coordinates_already_abolute(self):
    coordinates = tf.constant([[0, 0, 100, 100],
                               [25, 25, 75, 75]], tf.float32)
    img = tf.ones((128, 100, 100, 3))
    boxlist = box_list.BoxList(coordinates)
    absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist,
                                                            tf.shape(img)[1],
                                                            tf.shape(img)[2])

    with self.test_session() as sess:
      with self.assertRaisesOpError('assertion failed'):
        sess.run(absolute_boxlist.get())
Ejemplo n.º 2
0
    def test_to_absolute_coordinates(self):
        coordinates = tf.constant([[0, 0, 1, 1], [0.25, 0.25, 0.75, 0.75]],
                                  tf.float32)
        img = tf.ones((128, 100, 100, 3))
        boxlist = box_list.BoxList(coordinates)
        absolute_boxlist = box_list_ops.to_absolute_coordinates(
            boxlist,
            tf.shape(img)[1],
            tf.shape(img)[2])
        expected_boxes = [[0, 0, 100, 100], [25, 25, 75, 75]]

        with self.test_session() as sess:
            absolute_boxes = sess.run(absolute_boxlist.get())
            self.assertAllClose(absolute_boxes, expected_boxes)
Ejemplo n.º 3
0
  def test_convert_to_absolute_and_back(self):
    coordinates = np.random.uniform(size=(100, 4))
    coordinates = np.sort(coordinates)
    coordinates[99, :] = [0, 0, 1, 1]
    img = tf.ones((128, 202, 202, 3))

    boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32))
    boxlist = box_list_ops.to_absolute_coordinates(boxlist,
                                                   tf.shape(img)[1],
                                                   tf.shape(img)[2])
    boxlist = box_list_ops.to_normalized_coordinates(boxlist,
                                                     tf.shape(img)[1],
                                                     tf.shape(img)[2])

    with self.test_session() as sess:
      out = sess.run(boxlist.get())
      self.assertAllClose(out, coordinates)
Ejemplo n.º 4
0
def result_dict_for_single_example(image,
                                   key,
                                   detections,
                                   groundtruth=None,
                                   class_agnostic=False,
                                   scale_to_absolute=False):
    """Merges all detection and groundtruth information for a single example.

  Note that evaluation tools require classes that are 1-indexed, and so this
  function performs the offset. If `class_agnostic` is True, all output classes
  have label 1.

  Args:
    image: A single 4D image tensor of shape [1, H, W, C].
    key: A single string tensor identifying the image.
    detections: A dictionary of detections, returned from
      DetectionModel.postprocess().
    groundtruth: (Optional) Dictionary of groundtruth items, with fields:
      'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
        normalized coordinates.
      'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
      'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
      'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
      'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
      'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
      'groundtruth_instance_masks': 3D int64 tensor of instance masks
        (Optional).
    class_agnostic: Boolean indicating whether the detections are class-agnostic
      (i.e. binary). Default False.
    scale_to_absolute: Boolean indicating whether boxes and keypoints should be
      scaled to absolute coordinates. Note that for IoU based evaluations, it
      does not matter whether boxes are expressed in absolute or relative
      coordinates. Default False.

  Returns:
    A dictionary with:
    'original_image': A [1, H, W, C] uint8 image tensor.
    'key': A string tensor with image identifier.
    'detection_boxes': [max_detections, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`.
    'detection_scores': [max_detections] float32 tensor of scores.
    'detection_classes': [max_detections] int64 tensor of 1-indexed classes.
    'detection_masks': [max_detections, H, W] float32 tensor of binarized
      masks, reframed to full image masks.
    'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`. (Optional)
    'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
      (Optional)
    'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
    'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
    'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
    'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
    'groundtruth_instance_masks': 3D int64 tensor of instance masks
      (Optional).

  """
    label_id_offset = 1  # Applying label id offset (b/63711816)

    input_data_fields = fields.InputDataFields()
    output_dict = {
        input_data_fields.original_image: image,
        input_data_fields.key: key,
    }

    detection_fields = fields.DetectionResultFields
    detection_boxes = detections[detection_fields.detection_boxes][0]
    output_dict[detection_fields.detection_boxes] = detection_boxes
    image_shape = tf.shape(image)
    if scale_to_absolute:
        absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
            box_list.BoxList(detection_boxes), image_shape[1], image_shape[2])
        output_dict[detection_fields.detection_boxes] = (
            absolute_detection_boxlist.get())
    detection_scores = detections[detection_fields.detection_scores][0]
    output_dict[detection_fields.detection_scores] = detection_scores

    if class_agnostic:
        detection_classes = tf.ones_like(detection_scores, dtype=tf.int64)
    else:
        detection_classes = (
            tf.to_int64(detections[detection_fields.detection_classes][0]) +
            label_id_offset)
    output_dict[detection_fields.detection_classes] = detection_classes

    if detection_fields.detection_masks in detections:
        detection_masks = detections[detection_fields.detection_masks][0]
        # TODO: This should be done in model's postprocess
        # function ideally.
        num_detections = tf.to_int32(
            detections[detection_fields.num_detections][0])
        detection_boxes = tf.slice(detection_boxes,
                                   begin=[0, 0],
                                   size=[num_detections, -1])
        detection_masks = tf.slice(detection_masks,
                                   begin=[0, 0, 0],
                                   size=[num_detections, -1, -1])
        detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image_shape[1], image_shape[2])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        output_dict[
            detection_fields.detection_masks] = detection_masks_reframed
    if detection_fields.detection_keypoints in detections:
        detection_keypoints = detections[
            detection_fields.detection_keypoints][0]
        output_dict[detection_fields.detection_keypoints] = detection_keypoints
        if scale_to_absolute:
            absolute_detection_keypoints = keypoint_ops.scale(
                detection_keypoints, image_shape[1], image_shape[2])
            output_dict[detection_fields.detection_keypoints] = (
                absolute_detection_keypoints)

    if groundtruth:
        if input_data_fields.groundtruth_instance_masks in groundtruth:
            groundtruth[
                input_data_fields.groundtruth_instance_masks] = tf.cast(
                    groundtruth[input_data_fields.groundtruth_instance_masks],
                    tf.uint8)
        output_dict.update(groundtruth)
        if scale_to_absolute:
            groundtruth_boxes = groundtruth[
                input_data_fields.groundtruth_boxes]
            absolute_gt_boxlist = box_list_ops.to_absolute_coordinates(
                box_list.BoxList(groundtruth_boxes), image_shape[1],
                image_shape[2])
            output_dict[input_data_fields.groundtruth_boxes] = (
                absolute_gt_boxlist.get())
        # For class-agnostic models, groundtruth classes all become 1.
        if class_agnostic:
            groundtruth_classes = groundtruth[
                input_data_fields.groundtruth_classes]
            groundtruth_classes = tf.ones_like(groundtruth_classes,
                                               dtype=tf.int64)
            output_dict[
                input_data_fields.groundtruth_classes] = groundtruth_classes

    return output_dict
Ejemplo n.º 5
0
 def _to_absolute_coordinates(normalized_boxes):
     return box_list_ops.to_absolute_coordinates(
         box_list.BoxList(normalized_boxes),
         image_shape[1],
         image_shape[2],
         check_range=False).get()
Ejemplo n.º 6
0
    def _predict(self, prediction_dict, true_image_shapes):
        # Postprocess FasterRCNN stage 2
        detection_model = self.detection_model
        detections_dict = detection_model._postprocess_box_classifier(
            prediction_dict['refined_box_encodings'],
            prediction_dict['class_predictions_with_background'],
            prediction_dict['proposal_boxes'],
            prediction_dict['num_proposals'], true_image_shapes)
        prediction_dict.update(detections_dict)
        detection_boxes = detections_dict[
            fields.DetectionResultFields.detection_boxes][0]
        detection_scores = detections_dict[
            fields.DetectionResultFields.detection_scores][0]
        detection_transcriptions = None

        num_detections = tf.cast(
            detections_dict[fields.DetectionResultFields.num_detections],
            tf.int32)
        rpn_features_to_crop = prediction_dict['rpn_features_to_crop']
        # rpn_features_to_crop = tf.Print(rpn_features_to_crop, [tf.shape(rpn_features_to_crop)], message="The size of the Feature Map is", summarize=9999)

        if detection_model._is_training:
            gt_boxlists, gt_classes, _, gt_weights, gt_transcriptions = detection_model._format_groundtruth_data(
                true_image_shapes, stage='transcription')

            # gt_transcriptions = tf.Print(gt_transcriptions, [gt_transcriptions, tf.shape(gt_transcriptions)], message="CRNN received this transcr.", summarize=99999)

            detection_boxlist = box_list_ops.to_absolute_coordinates(
                box_list.BoxList(detection_boxes), true_image_shapes[0, 0],
                true_image_shapes[0, 1])
            detection_boxlist.add_field(fields.BoxListFields.scores,
                                        detection_scores)

            (_, cls_weights, _, _, match) = self.target_assigner.assign(
                detection_boxlist,
                gt_boxlists[0],
                gt_classes[0],
                unmatched_class_label=tf.constant(
                    [1] + detection_model._num_classes * [0],
                    dtype=tf.float32),
                groundtruth_weights=gt_weights[0])

            detection_transcriptions = match.gather_based_on_match(
                gt_transcriptions[0], '', '')
            # detection_transcriptions = tf.Print(detection_transcriptions, [detection_transcriptions], message="These are the matched GTs transcr.", summarize=99999)
            detection_boxlist.add_field(fields.BoxListFields.transcription,
                                        detection_transcriptions)

            positive_indicator = match.matched_column_indicator()
            # positive_indicator = tf.Print(positive_indicator, [positive_indicator], message="positive_indicator", summarize=99999)
            valid_indicator = tf.logical_and(
                tf.range(detection_boxlist.num_boxes()) < num_detections,
                cls_weights > 0)
            sampled_indices = detection_model._second_stage_sampler.subsample(
                valid_indicator,
                self.batch_size,
                positive_indicator,
                stage="transcription")

            def compute_loss():
                sampled_boxlist = box_list_ops.boolean_mask(
                    detection_boxlist, sampled_indices)

                sampled_padded_boxlist = box_list_ops.pad_or_clip_box_list(
                    sampled_boxlist, num_boxes=self.batch_size)
                detection_boxes = sampled_padded_boxlist.get()
                detection_transcriptions = sampled_padded_boxlist.get_field(
                    fields.BoxListFields.transcription)
                # detection_transcriptions = tf.Print(detection_transcriptions, [detection_transcriptions], message="These are the subsampled GTs transcr.", summarize=99999)
                detection_scores = sampled_padded_boxlist.get_field(
                    fields.BoxListFields.scores)
                num_detections = tf.minimum(sampled_boxlist.num_boxes(),
                                            self.batch_size)
                transcriptions_dict, eval_metric_ops = self._predict_lstm(
                    rpn_features_to_crop, detection_boxes,
                    detection_transcriptions, detection_scores, num_detections)
                return [
                    self.loss(transcriptions_dict),
                    (transcriptions_dict, eval_metric_ops)
                ]

            fail = tf.Print(tf.constant(0, dtype=tf.float32), [],
                            message="Not enough boxes to train CRNN")
            return tf.cond(tf.equal(tf.shape(sampled_indices)[0], 0),
                           lambda: [fail, ({}, None)], compute_loss)

        # return self._predict_lstm(rpn_features_to_crop, detection_boxes, detection_transcriptions,
        #             detection_scores, num_detections)
        return [
            tf.constant(0, dtype=tf.float32),
            self._predict_lstm(rpn_features_to_crop, detection_boxes,
                               detection_transcriptions, detection_scores,
                               num_detections)
        ]
Ejemplo n.º 7
0
def _extract_prediction_tensors(model,
                                create_input_dict_fn,
                                ignore_groundtruth=False,
                                preprocess_input_options=None):
    """Restores the model in a tensorflow session.

    Args:
      model: model to perform predictions with.
      create_input_dict_fn: function to create input tensor dictionaries.
      ignore_groundtruth: whether groundtruth should be ignored.
      preprocess_input_options: a list of tuples, where each tuple contains a
        preprocess input function and a dictionary containing arguments and their
        values (see preprocessor_input.py).

    Returns:
      tensor_dict: A tensor dictionary with evaluations.
    """
    input_dict = create_input_dict_fn()
    prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
    input_dict = prefetch_queue.dequeue()
    images = tf.expand_dims(input_dict[fields.InputDataFields.image], 0)
    float_images = tf.to_float(images)
    input_dict[fields.InputDataFields.image] = float_images

    if preprocess_input_options:
        input_dict = preprocessor_input.preprocess(input_dict, preprocess_input_options)

    original_image = input_dict[fields.InputDataFields.image]
    preprocessed_image = model.preprocess(original_image)

    prediction_dict = model.predict(preprocessed_image)

    detections = model.postprocess(prediction_dict)

    original_image_shape = tf.shape(original_image)
    if model.is_rbbox:
        absolute_detection_boxlist = rbox_list_ops.to_absolute_coordinates(
            rbox_list.RBoxList(tf.squeeze(detections['detection_boxes'], axis=0)),
            original_image_shape[1], original_image_shape[2])
    else:
        absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
            box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)),
            original_image_shape[1], original_image_shape[2])
    label_id_offset = 1
    tensor_dict = {
        'original_image': original_image,
        'image_id': input_dict[fields.InputDataFields.source_id],
        'filename': input_dict[fields.InputDataFields.filename],
        'sensor': input_dict[fields.InputDataFields.sensor],
        'detection_boxes': absolute_detection_boxlist.get(),
        'detection_scores': tf.squeeze(detections['detection_scores'], axis=0),
        'detection_classes': (
            tf.squeeze(detections['detection_classes'], axis=0) +
            label_id_offset),
    }
    if 'detection_masks' in detections:
        detection_masks = tf.squeeze(detections['detection_masks'],
                                     axis=0)
        detection_boxes = tf.squeeze(detections['detection_boxes'],
                                     axis=0)
        # TODO: This should be done in model's postprocess function ideally.
        detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
            detection_masks,
            detection_boxes,
            original_image_shape[1], original_image_shape[2])
        detection_masks_reframed = tf.to_float(tf.greater(detection_masks_reframed,
                                                          0.5))

        tensor_dict['detection_masks'] = detection_masks_reframed
    # load groundtruth fields into tensor_dict
    if not ignore_groundtruth:
        if model.is_rbbox:
            normalized_gt_boxlist = rbox_list.RBoxList(input_dict[fields.InputDataFields.groundtruth_rboxes])
            gt_boxlist = rbox_list_ops.scale(normalized_gt_boxlist,
                                             tf.shape(original_image)[1],
                                             tf.shape(original_image)[2])
        else:
            normalized_gt_boxlist = box_list.BoxList(input_dict[fields.InputDataFields.groundtruth_boxes])
            gt_boxlist = box_list_ops.scale(normalized_gt_boxlist,
                                            tf.shape(original_image)[1],
                                            tf.shape(original_image)[2])
        groundtruth_boxes = gt_boxlist.get()
        groundtruth_classes = input_dict[fields.InputDataFields.groundtruth_classes]
        tensor_dict['groundtruth_boxes'] = groundtruth_boxes
        tensor_dict['groundtruth_classes'] = groundtruth_classes
        tensor_dict['area'] = input_dict[fields.InputDataFields.groundtruth_area]
        tensor_dict['is_crowd'] = input_dict[fields.InputDataFields.groundtruth_is_crowd]
        tensor_dict['difficult'] = input_dict[fields.InputDataFields.groundtruth_difficult]
        if 'detection_masks' in tensor_dict:
            tensor_dict['groundtruth_instance_masks'] = input_dict[
                fields.InputDataFields.groundtruth_instance_masks]
    return tensor_dict