Example #1
0
  def accuracy_function(self, logits, labels, data_type):
    """Returns the ops to measure the mean precision of the model."""
    try:
      import ssd_dataloader  # pylint: disable=g-import-not-at-top
      from object_detection.box_coders import faster_rcnn_box_coder  # pylint: disable=g-import-not-at-top
      from object_detection.core import box_coder  # pylint: disable=g-import-not-at-top
      from object_detection.core import box_list  # pylint: disable=g-import-not-at-top
    except ImportError:
      raise ImportError('To use the COCO dataset, you must clone the '
                        'repo https://github.com/tensorflow/models and add '
                        'tensorflow/models and tensorflow/models/research to '
                        'the PYTHONPATH, and compile the protobufs by '
                        'following https://github.com/tensorflow/models/blob/'
                        'master/research/object_detection/g3doc/installation.md'
                        '#protobuf-compilation ; To evaluate using COCO'
                        'metric, download and install Python COCO API from'
                        'https://github.com/cocodataset/cocoapi')

    # Unpack model output back to locations and confidence scores of predictions
    # pred_locs: relative locations (coordiates) of objects in all SSD boxes
    # shape: [batch_size, NUM_SSD_BOXES, 4]
    # pred_labels: confidence scores of objects being of all categories
    # shape: [batch_size, NUM_SSD_BOXES, label_num]
    pred_locs, pred_labels = tf.split(logits, [4, self.label_num], 2)

    ssd_box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
        scale_factors=ssd_constants.BOX_CODER_SCALES)
    anchors = box_list.BoxList(
        tf.convert_to_tensor(ssd_dataloader.DefaultBoxes()('ltrb')))
    pred_boxes = box_coder.batch_decode(
        encoded_boxes=pred_locs, box_coder=ssd_box_coder, anchors=anchors)

    pred_scores = tf.nn.softmax(pred_labels, axis=2)

    boxes_classes, id_shape = tf.split(
        labels, [ssd_constants.MAX_NUM_EVAL_BOXES, 1], 1)
    # TODO(haoyuzhang): maybe use these values for visualization.
    gt_boxes, gt_classes = tf.split(boxes_classes, [4, 1], 2)  # pylint: disable=unused-variable
    id_shape = tf.squeeze(id_shape, 1)
    source_id, raw_shape, _ = tf.split(id_shape, [1, 3, 1], 1)
    source_id = tf.squeeze(source_id, 1)

    return {
        (constants.UNREDUCED_ACCURACY_OP_PREFIX +
         ssd_constants.PRED_BOXES): pred_boxes,
        (constants.UNREDUCED_ACCURACY_OP_PREFIX +
         ssd_constants.PRED_SCORES): pred_scores,
        # TODO(haoyuzhang): maybe use these values for visualization.
        # constants.UNREDUCED_ACCURACY_OP_PREFIX+'gt_boxes': gt_boxes,
        # constants.UNREDUCED_ACCURACY_OP_PREFIX+'gt_classes': gt_classes,
        (constants.UNREDUCED_ACCURACY_OP_PREFIX +
         ssd_constants.SOURCE_ID): source_id,
        (constants.UNREDUCED_ACCURACY_OP_PREFIX +
         ssd_constants.RAW_SHAPE): raw_shape
    }
    def postprocess(self, prediction_dict):
        """Converts prediction tensors to final detections.

    This function converts raw predictions tensors to final detection results by
    slicing off the background class, decoding box predictions and applying
    non max suppression and clipping to the image window.

    See base class for output format conventions.  Note also that by default,
    scores are to be interpreted as logits, but if a score_conversion_fn is
    used, then scores are remapped (and may thus have a different
    interpretation).

    Args:
      prediction_dict: a dictionary holding prediction tensors with
        1) box_encodings: 4-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        2) class_predictions_with_background: 2-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
          (logits) for each of the anchors.  Note that this tensor *includes*
          background class predictions.

    Returns:
      detections: a dictionary containing the following fields
        detection_boxes: [batch, max_detection, 4]
        detection_scores: [batch, max_detections]
        detection_classes: [batch, max_detections]
        num_detections: [batch]
    Raises:
      ValueError: if prediction_dict does not contain `box_encodings` or
        `class_predictions_with_background` fields.
    """
        if ('box_encodings' not in prediction_dict
                or 'class_predictions_with_background' not in prediction_dict):
            raise ValueError(
                'prediction_dict does not contain expected entries.')
        with tf.name_scope('Postprocessor'):
            box_encodings = prediction_dict['box_encodings']
            class_predictions = prediction_dict[
                'class_predictions_with_background']
            detection_boxes = bcoder.batch_decode(box_encodings,
                                                  self._box_coder,
                                                  self.anchors)
            detection_boxes = tf.expand_dims(detection_boxes, axis=2)

            class_predictions_without_background = tf.slice(
                class_predictions, [0, 0, 1], [-1, -1, -1])
            detection_scores = self._score_conversion_fn(
                class_predictions_without_background)
            clip_window = tf.constant([0, 0, 1, 1], tf.float32)
            detections = self._non_max_suppression_fn(detection_boxes,
                                                      detection_scores,
                                                      clip_window=clip_window)
        return detections
Example #3
0
    def graph_fn():
      mock_anchor_corners = tf.constant(
          [[0, 0.1, 0.2, 0.3], [0.2, 0.4, 0.4, 0.6]], tf.float32)
      mock_anchors = box_list.BoxList(mock_anchor_corners)
      mock_box_coder = MockBoxCoder()

      encoded_boxes_list = [mock_box_coder.encode(
          box_list.BoxList(tf.constant(boxes)), mock_anchors)
                            for boxes in expected_boxes]
      encoded_boxes = tf.stack(encoded_boxes_list)
      decoded_boxes = box_coder.batch_decode(
          encoded_boxes, mock_box_coder, mock_anchors)
      return decoded_boxes
  def postprocess(self, prediction_dict):
    """Converts prediction tensors to final detections.

    This function converts raw predictions tensors to final detection results by
    slicing off the background class, decoding box predictions and applying
    non max suppression and clipping to the image window.

    See base class for output format conventions.  Note also that by default,
    scores are to be interpreted as logits, but if a score_conversion_fn is
    used, then scores are remapped (and may thus have a different
    interpretation).

    Args:
      prediction_dict: a dictionary holding prediction tensors with
        1) box_encodings: 4-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        2) class_predictions_with_background: 2-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
          (logits) for each of the anchors.  Note that this tensor *includes*
          background class predictions.

    Returns:
      detections: a dictionary containing the following fields
        detection_boxes: [batch, max_detection, 4]
        detection_scores: [batch, max_detections]
        detection_classes: [batch, max_detections]
        num_detections: [batch]
    Raises:
      ValueError: if prediction_dict does not contain `box_encodings` or
        `class_predictions_with_background` fields.
    """
    if ('box_encodings' not in prediction_dict or
        'class_predictions_with_background' not in prediction_dict):
      raise ValueError('prediction_dict does not contain expected entries.')
    with tf.name_scope('Postprocessor'):
      box_encodings = prediction_dict['box_encodings']
      class_predictions = prediction_dict['class_predictions_with_background']
      detection_boxes = bcoder.batch_decode(box_encodings, self._box_coder,
                                            self.anchors)
      detection_boxes = tf.expand_dims(detection_boxes, axis=2)

      class_predictions_without_background = tf.slice(class_predictions,
                                                      [0, 0, 1],
                                                      [-1, -1, -1])
      detection_scores = self._score_conversion_fn(
          class_predictions_without_background)
      clip_window = tf.constant([0, 0, 1, 1], tf.float32)
      detections = self._non_max_suppression_fn(detection_boxes,
                                                detection_scores,
                                                clip_window=clip_window)
    return detections
    def _apply_hard_mining(self, location_losses, cls_losses, prediction_dict,
                           match_list):
        """Applies hard mining to anchorwise losses.

    Args:
      location_losses: Float tensor of shape [batch_size, num_anchors]
        representing anchorwise location losses.
      cls_losses: Float tensor of shape [batch_size, num_anchors]
        representing anchorwise classification losses.
      prediction_dict: p a dictionary holding prediction tensors with
        1) box_encodings: 4-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        2) class_predictions_with_background: 2-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
          (logits) for each of the anchors.  Note that this tensor *includes*
          background class predictions.
      match_list: a list of matcher.Match objects encoding the match between
        anchors and groundtruth boxes for each image of the batch,
        with rows of the Match objects corresponding to groundtruth boxes
        and columns corresponding to anchors.

    Returns:
      mined_location_loss: a float scalar with sum of localization losses from
        selected hard examples.
      mined_cls_loss: a float scalar with sum of classification losses from
        selected hard examples.
    """
        class_pred_shape = [
            -1, self.anchors.num_boxes_static(), self.num_classes
        ]
        class_predictions = tf.reshape(
            tf.slice(prediction_dict['class_predictions_with_background'],
                     [0, 0, 1], class_pred_shape), class_pred_shape)

        decoded_boxes = bcoder.batch_decode(prediction_dict['box_encodings'],
                                            self._box_coder, self.anchors)
        decoded_box_tensors_list = tf.unstack(decoded_boxes)
        class_prediction_list = tf.unstack(class_predictions)
        decoded_boxlist_list = []
        for box_location, box_score in zip(decoded_box_tensors_list,
                                           class_prediction_list):
            decoded_boxlist = box_list.BoxList(box_location)
            decoded_boxlist.add_field('scores', box_score)
            decoded_boxlist_list.append(decoded_boxlist)
        return self._hard_example_miner(
            location_losses=location_losses,
            cls_losses=cls_losses,
            decoded_boxlist_list=decoded_boxlist_list,
            match_list=match_list)
Example #6
0
 def batch_decode(self, batch_codes):
     """
     :param batch_codes: list of batched codes [batched_a0_code, batched_a1_code, ...],
                         each tensor shape must be [batch_size, H*W, 4]
     :return: [batchsize, H*W, 4]
     """
     batch_boxes = []
     assert(len(batch_codes) == len(self.anchors))
     for i in range(len(batch_codes)):
         batch_code = batch_codes[i]
         anchor = self.anchors[i]
         batch_boxes_anchor_i = bcoder.batch_decode(batch_code, self._box_coder, anchor)
         batch_boxes.append(batch_boxes_anchor_i)
     batch_boxes = tf.concat(batch_boxes, axis=1)
     return batch_boxes
  def _apply_hard_mining(self, location_losses, cls_losses, prediction_dict,
                         match_list):
    """Applies hard mining to anchorwise losses.

    Args:
      location_losses: Float tensor of shape [batch_size, num_anchors]
        representing anchorwise location losses.
      cls_losses: Float tensor of shape [batch_size, num_anchors]
        representing anchorwise classification losses.
      prediction_dict: p a dictionary holding prediction tensors with
        1) box_encodings: 4-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        2) class_predictions_with_background: 2-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
          (logits) for each of the anchors.  Note that this tensor *includes*
          background class predictions.
      match_list: a list of matcher.Match objects encoding the match between
        anchors and groundtruth boxes for each image of the batch,
        with rows of the Match objects corresponding to groundtruth boxes
        and columns corresponding to anchors.

    Returns:
      mined_location_loss: a float scalar with sum of localization losses from
        selected hard examples.
      mined_cls_loss: a float scalar with sum of classification losses from
        selected hard examples.
    """
    class_pred_shape = [-1, self.anchors.num_boxes_static(), self.num_classes]
    class_predictions = tf.reshape(
        tf.slice(prediction_dict['class_predictions_with_background'],
                 [0, 0, 1], class_pred_shape), class_pred_shape)

    decoded_boxes = bcoder.batch_decode(prediction_dict['box_encodings'],
                                        self._box_coder, self.anchors)
    decoded_box_tensors_list = tf.unstack(decoded_boxes)
    class_prediction_list = tf.unstack(class_predictions)
    decoded_boxlist_list = []
    for box_location, box_score in zip(decoded_box_tensors_list,
                                       class_prediction_list):
      decoded_boxlist = box_list.BoxList(box_location)
      decoded_boxlist.add_field('scores', box_score)
      decoded_boxlist_list.append(decoded_boxlist)
    return self._hard_example_miner(
        location_losses=location_losses,
        cls_losses=cls_losses,
        decoded_boxlist_list=decoded_boxlist_list,
        match_list=match_list)
Example #8
0
  def test_batch_decode(self):
    mock_anchor_corners = tf.constant(
        [[0, 0.1, 0.2, 0.3], [0.2, 0.4, 0.4, 0.6]], tf.float32)
    mock_anchors = box_list.BoxList(mock_anchor_corners)
    mock_box_coder = MockBoxCoder()

    expected_boxes = [[[0.0, 0.1, 0.5, 0.6], [0.5, 0.6, 0.7, 0.8]],
                      [[0.1, 0.2, 0.3, 0.4], [0.7, 0.8, 0.9, 1.0]]]

    encoded_boxes_list = [mock_box_coder.encode(
        box_list.BoxList(tf.constant(boxes)), mock_anchors)
                          for boxes in expected_boxes]
    encoded_boxes = tf.stack(encoded_boxes_list)
    decoded_boxes = box_coder.batch_decode(
        encoded_boxes, mock_box_coder, mock_anchors)

    with self.test_session() as sess:
      decoded_boxes_result = sess.run(decoded_boxes)
      self.assertAllClose(expected_boxes, decoded_boxes_result)
Example #9
0
 def batch_decode(self, batch_code, batch_score, max_out, thres):
     """
     :param batch_codes: list of batched codes [batched_a0_code, batched_a1_code, ...],
                         each tensor shape must be [batch_size, H*W, 4]
     :param batch_score: list of batched scores
     :param max_out: max output
     :param thres: threshold
     :return: [batchsize, *, 4]
     """
     with tf.name_scope('batch_decode'):
         anchor = self.anchor
         code_rank_assert = tf.assert_equal(tf.rank(batch_code), 4)
         score_rank_assert = tf.assert_equal(tf.rank(batch_score), 3)
         with tf.control_dependencies([code_rank_assert, score_rank_assert]):
             c_shape = batch_code.shape
             s_shape = batch_score.shape
             batch_code = tf.reshape(batch_code, [c_shape[0], -1, c_shape[3]])
             batch_score = tf.reshape(batch_score, [s_shape[0], -1])
             batch_boxes = bcoder.batch_decode(batch_code, self._box_coder, anchor)
             batch_boxes, batch_scores = tf_ops.nms_batch(batch_boxes, batch_score,
                                                         max_output_size=max_out, nms_thres=0.4,
                                                         score_thres=thres, pad=True)
     return batch_boxes, batch_scores