def accuracy_function(self, logits, labels, data_type): """Returns the ops to measure the mean precision of the model.""" try: import ssd_dataloader # pylint: disable=g-import-not-at-top from object_detection.box_coders import faster_rcnn_box_coder # pylint: disable=g-import-not-at-top from object_detection.core import box_coder # pylint: disable=g-import-not-at-top from object_detection.core import box_list # pylint: disable=g-import-not-at-top except ImportError: raise ImportError('To use the COCO dataset, you must clone the ' 'repo https://github.com/tensorflow/models and add ' 'tensorflow/models and tensorflow/models/research to ' 'the PYTHONPATH, and compile the protobufs by ' 'following https://github.com/tensorflow/models/blob/' 'master/research/object_detection/g3doc/installation.md' '#protobuf-compilation ; To evaluate using COCO' 'metric, download and install Python COCO API from' 'https://github.com/cocodataset/cocoapi') # Unpack model output back to locations and confidence scores of predictions # pred_locs: relative locations (coordiates) of objects in all SSD boxes # shape: [batch_size, NUM_SSD_BOXES, 4] # pred_labels: confidence scores of objects being of all categories # shape: [batch_size, NUM_SSD_BOXES, label_num] pred_locs, pred_labels = tf.split(logits, [4, self.label_num], 2) ssd_box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( scale_factors=ssd_constants.BOX_CODER_SCALES) anchors = box_list.BoxList( tf.convert_to_tensor(ssd_dataloader.DefaultBoxes()('ltrb'))) pred_boxes = box_coder.batch_decode( encoded_boxes=pred_locs, box_coder=ssd_box_coder, anchors=anchors) pred_scores = tf.nn.softmax(pred_labels, axis=2) boxes_classes, id_shape = tf.split( labels, [ssd_constants.MAX_NUM_EVAL_BOXES, 1], 1) # TODO(haoyuzhang): maybe use these values for visualization. gt_boxes, gt_classes = tf.split(boxes_classes, [4, 1], 2) # pylint: disable=unused-variable id_shape = tf.squeeze(id_shape, 1) source_id, raw_shape, _ = tf.split(id_shape, [1, 3, 1], 1) source_id = tf.squeeze(source_id, 1) return { (constants.UNREDUCED_ACCURACY_OP_PREFIX + ssd_constants.PRED_BOXES): pred_boxes, (constants.UNREDUCED_ACCURACY_OP_PREFIX + ssd_constants.PRED_SCORES): pred_scores, # TODO(haoyuzhang): maybe use these values for visualization. # constants.UNREDUCED_ACCURACY_OP_PREFIX+'gt_boxes': gt_boxes, # constants.UNREDUCED_ACCURACY_OP_PREFIX+'gt_classes': gt_classes, (constants.UNREDUCED_ACCURACY_OP_PREFIX + ssd_constants.SOURCE_ID): source_id, (constants.UNREDUCED_ACCURACY_OP_PREFIX + ssd_constants.RAW_SHAPE): raw_shape }
def loss_function(self, build_network_result, labels): logits = build_network_result.logits # Unpack model output back to locations and confidence scores of predictions # Shape of pred_loc: [batch_size, 4, NUM_SSD_BOXES] # Shape of pred_label: [batch_size, label_num, NUM_SSD_BOXES] pred_loc, pred_label = tf.split(logits, [4, self.label_num], 1) # Unpack ground truth labels to number of boxes, locations, and classes # initial shape: [batch_size, NUM_SSD_BOXES, 5] # Shape of labels: [batch_size, NUM_SSD_BOXES, 5] # Shape of num_gt: [batch_size, 1, 5] -- 5 identical copies labels, num_gt = tf.split(labels, [ssd_constants.NUM_SSD_BOXES, 1], 1) # Shape of num_gt: [batch_size] num_gt = tf.squeeze(tf.cast(num_gt[:, :, 0], tf.int32)) # Shape of gt_loc: [batch_size, NUM_SSD_BOXES, 4] # Shape of gt_label: [batch_size, NUM_SSD_BOXES, 1] gt_loc, gt_label = tf.split(labels, [4, 1], 2) gt_label = tf.cast(gt_label, tf.int32) cross_entropy = tf.losses.sparse_softmax_cross_entropy( gt_label, tf.transpose(pred_label, [0, 2, 1]), reduction=tf.losses.Reduction.NONE) default_boxes = tf.tile( tf.convert_to_tensor( ssd_dataloader.DefaultBoxes()('xywh'))[tf.newaxis, :, :], [gt_loc.get_shape()[0], 1, 1]) # To performance people: MLPerf uses this transposed convention. # I (taylorrobie) have matched it to make it easier to compare to the # reference. If this hurts performance, feel free to adjust accordingly. gt_label = tf.squeeze(gt_label) # pred_loc, pred_label, gt_loc, default_boxes = [ # tf.transpose(i, (0, 2, 1)) for i in # [pred_loc, pred_label, gt_loc, default_boxes] # ] # Shape of gt_loc: [batch_size, 4, NUM_SSD_BOXES] gt_loc = tf.transpose(gt_loc, [0, 2, 1]) # Shape of default_boxes: [batch_size, 4, NUM_SSD_BOXES] default_boxes = tf.transpose(default_boxes, [0, 2, 1]) mask = tf.greater(gt_label, 0) float_mask = tf.cast(mask, tf.float32) gt_location_vectors = tf.concat( [ (ssd_constants.SCALE_XY * (gt_loc[:, :2, :] - default_boxes[:, :2, :]) / default_boxes[:, 2:, :]), # The gt_loc height and width have already had the log taken. # See FasterRcnnBoxCoder for more details. (ssd_constants.SCALE_HW * (gt_loc[:, 2:, :] - tf.log(default_boxes[:, 2:, :]))) ], axis=1) smooth_l1 = tf.reduce_sum(tf.losses.huber_loss( gt_location_vectors, pred_loc, reduction=tf.losses.Reduction.NONE), axis=1) smooth_l1 = tf.multiply(smooth_l1, float_mask) box_loss = tf.reduce_sum(smooth_l1, axis=1) # Hard example mining neg_masked_cross_entropy = cross_entropy * (1 - float_mask) relative_position = tf.contrib.framework.argsort( tf.contrib.framework.argsort(neg_masked_cross_entropy, direction='DESCENDING')) num_neg_boxes = num_gt * ssd_constants.NEGS_PER_POSITIVE top_k_neg_mask = tf.cast( tf.less( relative_position, tf.tile(num_neg_boxes[:, tf.newaxis], (1, ssd_constants.NUM_SSD_BOXES))), tf.float32) class_loss = tf.reduce_sum(tf.multiply(cross_entropy, float_mask + top_k_neg_mask), axis=1) class_loss = tf.reduce_mean(class_loss / tf.cast(num_gt, tf.float32)) box_loss = tf.reduce_mean(box_loss / tf.cast(num_gt, tf.float32)) return class_loss + box_loss