def accuracy_function(self, logits, labels, data_type): """Returns the ops to measure the mean precision of the model.""" try: import ssd_dataloader # pylint: disable=g-import-not-at-top from object_detection.box_coders import faster_rcnn_box_coder # pylint: disable=g-import-not-at-top from object_detection.core import box_coder # pylint: disable=g-import-not-at-top from object_detection.core import box_list # pylint: disable=g-import-not-at-top except ImportError: raise ImportError('To use the COCO dataset, you must clone the ' 'repo https://github.com/tensorflow/models and add ' 'tensorflow/models and tensorflow/models/research to ' 'the PYTHONPATH, and compile the protobufs by ' 'following https://github.com/tensorflow/models/blob/' 'master/research/object_detection/g3doc/installation.md' '#protobuf-compilation ; To evaluate using COCO' 'metric, download and install Python COCO API from' 'https://github.com/cocodataset/cocoapi') # Unpack model output back to locations and confidence scores of predictions # pred_locs: relative locations (coordiates) of objects in all SSD boxes # shape: [batch_size, NUM_SSD_BOXES, 4] # pred_labels: confidence scores of objects being of all categories # shape: [batch_size, NUM_SSD_BOXES, label_num] pred_locs, pred_labels = tf.split(logits, [4, self.label_num], 2) ssd_box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( scale_factors=ssd_constants.BOX_CODER_SCALES) anchors = box_list.BoxList( tf.convert_to_tensor(ssd_dataloader.DefaultBoxes()('ltrb'))) pred_boxes = box_coder.batch_decode( encoded_boxes=pred_locs, box_coder=ssd_box_coder, anchors=anchors) pred_scores = tf.nn.softmax(pred_labels, axis=2) boxes_classes, id_shape = tf.split( labels, [ssd_constants.MAX_NUM_EVAL_BOXES, 1], 1) # TODO(haoyuzhang): maybe use these values for visualization. gt_boxes, gt_classes = tf.split(boxes_classes, [4, 1], 2) # pylint: disable=unused-variable id_shape = tf.squeeze(id_shape, 1) source_id, raw_shape, _ = tf.split(id_shape, [1, 3, 1], 1) source_id = tf.squeeze(source_id, 1) return { (constants.UNREDUCED_ACCURACY_OP_PREFIX + ssd_constants.PRED_BOXES): pred_boxes, (constants.UNREDUCED_ACCURACY_OP_PREFIX + ssd_constants.PRED_SCORES): pred_scores, # TODO(haoyuzhang): maybe use these values for visualization. # constants.UNREDUCED_ACCURACY_OP_PREFIX+'gt_boxes': gt_boxes, # constants.UNREDUCED_ACCURACY_OP_PREFIX+'gt_classes': gt_classes, (constants.UNREDUCED_ACCURACY_OP_PREFIX + ssd_constants.SOURCE_ID): source_id, (constants.UNREDUCED_ACCURACY_OP_PREFIX + ssd_constants.RAW_SHAPE): raw_shape }
def postprocess(self, prediction_dict): """Converts prediction tensors to final detections. This function converts raw predictions tensors to final detection results by slicing off the background class, decoding box predictions and applying non max suppression and clipping to the image window. See base class for output format conventions. Note also that by default, scores are to be interpreted as logits, but if a score_conversion_fn is used, then scores are remapped (and may thus have a different interpretation). Args: prediction_dict: a dictionary holding prediction tensors with 1) box_encodings: 4-D float tensor of shape [batch_size, num_anchors, box_code_dimension] containing predicted boxes. 2) class_predictions_with_background: 2-D float tensor of shape [batch_size, num_anchors, num_classes+1] containing class predictions (logits) for each of the anchors. Note that this tensor *includes* background class predictions. Returns: detections: a dictionary containing the following fields detection_boxes: [batch, max_detection, 4] detection_scores: [batch, max_detections] detection_classes: [batch, max_detections] num_detections: [batch] Raises: ValueError: if prediction_dict does not contain `box_encodings` or `class_predictions_with_background` fields. """ if ('box_encodings' not in prediction_dict or 'class_predictions_with_background' not in prediction_dict): raise ValueError( 'prediction_dict does not contain expected entries.') with tf.name_scope('Postprocessor'): box_encodings = prediction_dict['box_encodings'] class_predictions = prediction_dict[ 'class_predictions_with_background'] detection_boxes = bcoder.batch_decode(box_encodings, self._box_coder, self.anchors) detection_boxes = tf.expand_dims(detection_boxes, axis=2) class_predictions_without_background = tf.slice( class_predictions, [0, 0, 1], [-1, -1, -1]) detection_scores = self._score_conversion_fn( class_predictions_without_background) clip_window = tf.constant([0, 0, 1, 1], tf.float32) detections = self._non_max_suppression_fn(detection_boxes, detection_scores, clip_window=clip_window) return detections
def graph_fn(): mock_anchor_corners = tf.constant( [[0, 0.1, 0.2, 0.3], [0.2, 0.4, 0.4, 0.6]], tf.float32) mock_anchors = box_list.BoxList(mock_anchor_corners) mock_box_coder = MockBoxCoder() encoded_boxes_list = [mock_box_coder.encode( box_list.BoxList(tf.constant(boxes)), mock_anchors) for boxes in expected_boxes] encoded_boxes = tf.stack(encoded_boxes_list) decoded_boxes = box_coder.batch_decode( encoded_boxes, mock_box_coder, mock_anchors) return decoded_boxes
def postprocess(self, prediction_dict): """Converts prediction tensors to final detections. This function converts raw predictions tensors to final detection results by slicing off the background class, decoding box predictions and applying non max suppression and clipping to the image window. See base class for output format conventions. Note also that by default, scores are to be interpreted as logits, but if a score_conversion_fn is used, then scores are remapped (and may thus have a different interpretation). Args: prediction_dict: a dictionary holding prediction tensors with 1) box_encodings: 4-D float tensor of shape [batch_size, num_anchors, box_code_dimension] containing predicted boxes. 2) class_predictions_with_background: 2-D float tensor of shape [batch_size, num_anchors, num_classes+1] containing class predictions (logits) for each of the anchors. Note that this tensor *includes* background class predictions. Returns: detections: a dictionary containing the following fields detection_boxes: [batch, max_detection, 4] detection_scores: [batch, max_detections] detection_classes: [batch, max_detections] num_detections: [batch] Raises: ValueError: if prediction_dict does not contain `box_encodings` or `class_predictions_with_background` fields. """ if ('box_encodings' not in prediction_dict or 'class_predictions_with_background' not in prediction_dict): raise ValueError('prediction_dict does not contain expected entries.') with tf.name_scope('Postprocessor'): box_encodings = prediction_dict['box_encodings'] class_predictions = prediction_dict['class_predictions_with_background'] detection_boxes = bcoder.batch_decode(box_encodings, self._box_coder, self.anchors) detection_boxes = tf.expand_dims(detection_boxes, axis=2) class_predictions_without_background = tf.slice(class_predictions, [0, 0, 1], [-1, -1, -1]) detection_scores = self._score_conversion_fn( class_predictions_without_background) clip_window = tf.constant([0, 0, 1, 1], tf.float32) detections = self._non_max_suppression_fn(detection_boxes, detection_scores, clip_window=clip_window) return detections
def _apply_hard_mining(self, location_losses, cls_losses, prediction_dict, match_list): """Applies hard mining to anchorwise losses. Args: location_losses: Float tensor of shape [batch_size, num_anchors] representing anchorwise location losses. cls_losses: Float tensor of shape [batch_size, num_anchors] representing anchorwise classification losses. prediction_dict: p a dictionary holding prediction tensors with 1) box_encodings: 4-D float tensor of shape [batch_size, num_anchors, box_code_dimension] containing predicted boxes. 2) class_predictions_with_background: 2-D float tensor of shape [batch_size, num_anchors, num_classes+1] containing class predictions (logits) for each of the anchors. Note that this tensor *includes* background class predictions. match_list: a list of matcher.Match objects encoding the match between anchors and groundtruth boxes for each image of the batch, with rows of the Match objects corresponding to groundtruth boxes and columns corresponding to anchors. Returns: mined_location_loss: a float scalar with sum of localization losses from selected hard examples. mined_cls_loss: a float scalar with sum of classification losses from selected hard examples. """ class_pred_shape = [ -1, self.anchors.num_boxes_static(), self.num_classes ] class_predictions = tf.reshape( tf.slice(prediction_dict['class_predictions_with_background'], [0, 0, 1], class_pred_shape), class_pred_shape) decoded_boxes = bcoder.batch_decode(prediction_dict['box_encodings'], self._box_coder, self.anchors) decoded_box_tensors_list = tf.unstack(decoded_boxes) class_prediction_list = tf.unstack(class_predictions) decoded_boxlist_list = [] for box_location, box_score in zip(decoded_box_tensors_list, class_prediction_list): decoded_boxlist = box_list.BoxList(box_location) decoded_boxlist.add_field('scores', box_score) decoded_boxlist_list.append(decoded_boxlist) return self._hard_example_miner( location_losses=location_losses, cls_losses=cls_losses, decoded_boxlist_list=decoded_boxlist_list, match_list=match_list)
def batch_decode(self, batch_codes): """ :param batch_codes: list of batched codes [batched_a0_code, batched_a1_code, ...], each tensor shape must be [batch_size, H*W, 4] :return: [batchsize, H*W, 4] """ batch_boxes = [] assert(len(batch_codes) == len(self.anchors)) for i in range(len(batch_codes)): batch_code = batch_codes[i] anchor = self.anchors[i] batch_boxes_anchor_i = bcoder.batch_decode(batch_code, self._box_coder, anchor) batch_boxes.append(batch_boxes_anchor_i) batch_boxes = tf.concat(batch_boxes, axis=1) return batch_boxes
def _apply_hard_mining(self, location_losses, cls_losses, prediction_dict, match_list): """Applies hard mining to anchorwise losses. Args: location_losses: Float tensor of shape [batch_size, num_anchors] representing anchorwise location losses. cls_losses: Float tensor of shape [batch_size, num_anchors] representing anchorwise classification losses. prediction_dict: p a dictionary holding prediction tensors with 1) box_encodings: 4-D float tensor of shape [batch_size, num_anchors, box_code_dimension] containing predicted boxes. 2) class_predictions_with_background: 2-D float tensor of shape [batch_size, num_anchors, num_classes+1] containing class predictions (logits) for each of the anchors. Note that this tensor *includes* background class predictions. match_list: a list of matcher.Match objects encoding the match between anchors and groundtruth boxes for each image of the batch, with rows of the Match objects corresponding to groundtruth boxes and columns corresponding to anchors. Returns: mined_location_loss: a float scalar with sum of localization losses from selected hard examples. mined_cls_loss: a float scalar with sum of classification losses from selected hard examples. """ class_pred_shape = [-1, self.anchors.num_boxes_static(), self.num_classes] class_predictions = tf.reshape( tf.slice(prediction_dict['class_predictions_with_background'], [0, 0, 1], class_pred_shape), class_pred_shape) decoded_boxes = bcoder.batch_decode(prediction_dict['box_encodings'], self._box_coder, self.anchors) decoded_box_tensors_list = tf.unstack(decoded_boxes) class_prediction_list = tf.unstack(class_predictions) decoded_boxlist_list = [] for box_location, box_score in zip(decoded_box_tensors_list, class_prediction_list): decoded_boxlist = box_list.BoxList(box_location) decoded_boxlist.add_field('scores', box_score) decoded_boxlist_list.append(decoded_boxlist) return self._hard_example_miner( location_losses=location_losses, cls_losses=cls_losses, decoded_boxlist_list=decoded_boxlist_list, match_list=match_list)
def test_batch_decode(self): mock_anchor_corners = tf.constant( [[0, 0.1, 0.2, 0.3], [0.2, 0.4, 0.4, 0.6]], tf.float32) mock_anchors = box_list.BoxList(mock_anchor_corners) mock_box_coder = MockBoxCoder() expected_boxes = [[[0.0, 0.1, 0.5, 0.6], [0.5, 0.6, 0.7, 0.8]], [[0.1, 0.2, 0.3, 0.4], [0.7, 0.8, 0.9, 1.0]]] encoded_boxes_list = [mock_box_coder.encode( box_list.BoxList(tf.constant(boxes)), mock_anchors) for boxes in expected_boxes] encoded_boxes = tf.stack(encoded_boxes_list) decoded_boxes = box_coder.batch_decode( encoded_boxes, mock_box_coder, mock_anchors) with self.test_session() as sess: decoded_boxes_result = sess.run(decoded_boxes) self.assertAllClose(expected_boxes, decoded_boxes_result)
def batch_decode(self, batch_code, batch_score, max_out, thres): """ :param batch_codes: list of batched codes [batched_a0_code, batched_a1_code, ...], each tensor shape must be [batch_size, H*W, 4] :param batch_score: list of batched scores :param max_out: max output :param thres: threshold :return: [batchsize, *, 4] """ with tf.name_scope('batch_decode'): anchor = self.anchor code_rank_assert = tf.assert_equal(tf.rank(batch_code), 4) score_rank_assert = tf.assert_equal(tf.rank(batch_score), 3) with tf.control_dependencies([code_rank_assert, score_rank_assert]): c_shape = batch_code.shape s_shape = batch_score.shape batch_code = tf.reshape(batch_code, [c_shape[0], -1, c_shape[3]]) batch_score = tf.reshape(batch_score, [s_shape[0], -1]) batch_boxes = bcoder.batch_decode(batch_code, self._box_coder, anchor) batch_boxes, batch_scores = tf_ops.nms_batch(batch_boxes, batch_score, max_output_size=max_out, nms_thres=0.4, score_thres=thres, pad=True) return batch_boxes, batch_scores