def decode_box(self, prior_bboxes, prior_variances): mc = self.mc print('self.mbox_loc:', self.mbox_loc) mbox_loc_reshape = tf.reshape(self.mbox_loc, [mc.BATCH_SIZE, -1, 4]) delta_xmin, delta_ymin, delta_xmax, delta_ymax = tf.unstack( mbox_loc_reshape, axis=2) prior_bboxes_reshape = tf.reshape(prior_bboxes, [-1, 4]) prior_variances_reshape = tf.reshape(prior_variances, [-1, 4]) prior_width = prior_bboxes_reshape[:, 2] - prior_bboxes_reshape[:, 0] prior_height = prior_bboxes_reshape[:, 3] - prior_bboxes_reshape[:, 1] prior_center_x = (prior_bboxes_reshape[:, 0] + prior_bboxes_reshape[:, 2]) / 2. prior_center_y = (prior_bboxes_reshape[:, 1] + prior_bboxes_reshape[:, 3]) / 2. bbox_center_x = tf.identity(prior_variances_reshape[:, 0] * delta_xmin * prior_width + prior_center_x) bbox_center_y = tf.identity(prior_variances_reshape[:, 1] * delta_ymin * prior_height + prior_center_y) bbox_width = tf.identity( util.safe_exp(prior_variances_reshape[:, 2] * delta_xmax, mc.EXP_THRESH) * prior_width) bbox_height = tf.identity( util.safe_exp(prior_variances_reshape[:, 3] * delta_ymax, mc.EXP_THRESH) * prior_height) xmins, ymins, xmaxs, ymaxs = util.bbox_transform( [bbox_center_x, bbox_center_y, bbox_width, bbox_height]) ''' xmins = tf.minimum( tf.maximum(0.0, xmins), 1., name='bbox_xmin') ymins = tf.minimum( tf.maximum(0.0, ymins), 1., name='bbox_ymin') xmaxs = tf.maximum( tf.minimum(1., xmaxs), 0.0, name='bbox_xmax') ymaxs = tf.maximum( tf.minimum(1., ymaxs), 0.0, name='bbox_ymax') ''' xmins *= mc.IMAGE_WIDTH xmaxs *= mc.IMAGE_WIDTH ymins *= mc.IMAGE_HEIGHT ymaxs *= mc.IMAGE_HEIGHT self.decode_boxes = tf.stack([xmins, ymins, xmaxs, ymaxs], axis=2) self._add_act(self.decode_boxes, 'decode_boxes')
def _add_interpretation_graph(self): """Interpret NN output.""" mc = self.mc with tf.variable_scope('interpret_output') as scope: preds = self.preds # probability num_class_probs = mc.ANCHOR_PER_GRID * mc.CLASSES self.pred_class_probs = tf.reshape( tf.nn.softmax( tf.reshape(preds[:, :, :, :num_class_probs], [-1, mc.CLASSES])), [mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES], name='pred_class_probs') # confidence num_confidence_scores = mc.ANCHOR_PER_GRID + num_class_probs self.pred_conf = tf.sigmoid(tf.reshape( preds[:, :, :, num_class_probs:num_confidence_scores], [mc.BATCH_SIZE, mc.ANCHORS]), name='pred_confidence_score') # bbox_delta self.pred_box_delta = tf.reshape(preds[:, :, :, num_confidence_scores:], [mc.BATCH_SIZE, mc.ANCHORS, 4], name='bbox_delta') # number of object. Used to normalize bbox and classification loss self.num_objects = tf.reduce_sum(self.input_mask, name='num_objects') with tf.variable_scope('bbox') as scope: with tf.variable_scope('stretching'): delta_x, delta_y, delta_w, delta_h = tf.unstack( self.pred_box_delta, axis=2) anchor_x = mc.ANCHOR_BOX[:, 0] anchor_y = mc.ANCHOR_BOX[:, 1] anchor_w = mc.ANCHOR_BOX[:, 2] anchor_h = mc.ANCHOR_BOX[:, 3] box_center_x = tf.identity(anchor_x + delta_x * anchor_w, name='bbox_cx') box_center_y = tf.identity(anchor_y + delta_y * anchor_h, name='bbox_cy') box_width = tf.identity(anchor_w * util.safe_exp(delta_w, mc.EXP_THRESH), name='bbox_width') box_height = tf.identity(anchor_h * util.safe_exp(delta_h, mc.EXP_THRESH), name='bbox_height') self._activation_summary(delta_x, 'delta_x') self._activation_summary(delta_y, 'delta_y') self._activation_summary(delta_w, 'delta_w') self._activation_summary(delta_h, 'delta_h') self._activation_summary(box_center_x, 'bbox_cx') self._activation_summary(box_center_y, 'bbox_cy') self._activation_summary(box_width, 'bbox_width') self._activation_summary(box_height, 'bbox_height') with tf.variable_scope('trimming'): xmins, ymins, xmaxs, ymaxs = util.bbox_transform( [box_center_x, box_center_y, box_width, box_height]) # The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based # pixels. Same for y. xmins = tf.minimum(tf.maximum(0.0, xmins), mc.IMAGE_WIDTH - 1.0, name='bbox_xmin') self._activation_summary(xmins, 'box_xmin') ymins = tf.minimum(tf.maximum(0.0, ymins), mc.IMAGE_HEIGHT - 1.0, name='bbox_ymin') self._activation_summary(ymins, 'box_ymin') xmaxs = tf.maximum(tf.minimum(mc.IMAGE_WIDTH - 1.0, xmaxs), 0.0, name='bbox_xmax') self._activation_summary(xmaxs, 'box_xmax') ymaxs = tf.maximum(tf.minimum(mc.IMAGE_HEIGHT - 1.0, ymaxs), 0.0, name='bbox_ymax') self._activation_summary(ymaxs, 'box_ymax') self.det_boxes = tf.transpose(tf.stack( util.bbox_transform_inv([xmins, ymins, xmaxs, ymaxs])), (1, 2, 0), name='bbox') with tf.variable_scope('IOU'): def _tensor_iou(box1, box2): with tf.variable_scope('intersection'): xmin = tf.maximum(box1[0], box2[0], name='xmin') ymin = tf.maximum(box1[1], box2[1], name='ymin') xmax = tf.minimum(box1[2], box2[2], name='xmax') ymax = tf.minimum(box1[3], box2[3], name='ymax') w = tf.maximum(0.0, xmax - xmin, name='inter_w') h = tf.maximum(0.0, ymax - ymin, name='inter_h') intersection = tf.multiply(w, h, name='intersection') with tf.variable_scope('union'): w1 = tf.subtract(box1[2], box1[0], name='w1') h1 = tf.subtract(box1[3], box1[1], name='h1') w2 = tf.subtract(box2[2], box2[0], name='w2') h2 = tf.subtract(box2[3], box2[1], name='h2') union = w1 * h1 + w2 * h2 - intersection return intersection/(union+mc.EPSILON) \ * tf.reshape(self.input_mask, [mc.BATCH_SIZE, mc.ANCHORS]) self.ious = self.ious.assign( _tensor_iou( util.bbox_transform(tf.unstack(self.det_boxes, axis=2)), util.bbox_transform(tf.unstack(self.box_input, axis=2)))) self._activation_summary(self.ious, 'conf_score') with tf.variable_scope('probability') as scope: self._activation_summary(self.pred_class_probs, 'class_probs') probs = tf.multiply(self.pred_class_probs, tf.reshape(self.pred_conf, [mc.BATCH_SIZE, mc.ANCHORS, 1]), name='final_class_prob') self._activation_summary(probs, 'final_class_prob') self.det_probs = tf.reduce_max(probs, 2, name='score') self.det_class = tf.argmax(probs, 2, name='class_idx')
def _define_bbox(pred_bbox_delta, ANCHOR_BOX): delta_x, delta_y, delta_w, delta_h = tf.unstack( pred_bbox_delta, axis=2) # set_anchors(mc, scale) anchor_x = ANCHOR_BOX[:, 0] anchor_y = ANCHOR_BOX[:, 1] anchor_w = ANCHOR_BOX[:, 2] anchor_h = ANCHOR_BOX[:, 3] box_center_x = tf.identity(anchor_x + delta_x * anchor_w, name='bbox_cx') box_center_y = tf.identity(anchor_y + delta_y * anchor_h, name='bbox_cy') box_width = tf.identity( anchor_w * util.safe_exp(delta_w, mc.EXP_THRESH), name='bbox_width') box_height = tf.identity( anchor_h * util.safe_exp(delta_h, mc.EXP_THRESH), name='bbox_height') self._activation_summary(delta_x, 'delta_x') self._activation_summary(delta_y, 'delta_y') self._activation_summary(delta_w, 'delta_w') self._activation_summary(delta_h, 'delta_h') self._activation_summary(box_center_x, 'bbox_cx') self._activation_summary(box_center_y, 'bbox_cy') self._activation_summary(box_width, 'bbox_width') self._activation_summary(box_height, 'bbox_height') with tf.variable_scope('trimming'): xmins, ymins, xmaxs, ymaxs = util.bbox_transform([ box_center_x, box_center_y, box_width, box_height ]) # The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based # pixels. Same for y. xmins = tf.minimum(tf.maximum(0.0, xmins), mc.IMAGE_WIDTH - 1.0, name='bbox_xmin') self._activation_summary(xmins, 'box_xmin') ymins = tf.minimum(tf.maximum(0.0, ymins), mc.IMAGE_HEIGHT - 1.0, name='bbox_ymin') self._activation_summary(ymins, 'box_ymin') xmaxs = tf.maximum(tf.minimum(mc.IMAGE_WIDTH - 1.0, xmaxs), 0.0, name='bbox_xmax') self._activation_summary(xmaxs, 'box_xmax') ymaxs = tf.maximum(tf.minimum(mc.IMAGE_HEIGHT - 1.0, ymaxs), 0.0, name='bbox_ymax') self._activation_summary(ymaxs, 'box_ymax') det_boxes = tf.transpose(tf.stack( util.bbox_transform_inv( [xmins, ymins, xmaxs, ymaxs])), (1, 2, 0), name='bbox') return det_boxes
def _add_interpretation_graph(self): """Interpret NN output.""" mc = self.mc with tf.variable_scope('interpret_output') as scope: preds = self.preds # probability num_class_probs = mc.ANCHOR_PER_GRID * mc.CLASSES self.pred_class_probs = tf.reshape( tf.nn.softmax( tf.reshape(preds[:, :, :, :num_class_probs], [-1, mc.CLASSES])), [mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES], name='pred_class_probs') # confidence num_confidence_scores = mc.ANCHOR_PER_GRID + num_class_probs self.pred_conf = tf.sigmoid(tf.reshape( preds[:, :, :, num_class_probs:num_confidence_scores], [mc.BATCH_SIZE, mc.ANCHORS]), name='pred_confidence_score') # bbox_delta self.pred_box_delta = tf.reshape(preds[:, :, :, num_confidence_scores:], [mc.BATCH_SIZE, mc.ANCHORS, 4], name='bbox_delta') # number of objects. Used to normalize bbox and classification loss # self.num_objects = tf.reduce_sum(self.input_mask, name='num_objects') self.num_objects = tf.cast( tf.size(self.box_input["image/object/bbox/xmin"].values), tf.float32) with tf.variable_scope('bbox') as scope: with tf.variable_scope('stretching'): delta_x, delta_y, delta_w, delta_h = tf.unstack( self.pred_box_delta, axis=2) anchor_x = mc.ANCHOR_BOX[:, 0] anchor_y = mc.ANCHOR_BOX[:, 1] anchor_w = mc.ANCHOR_BOX[:, 2] anchor_h = mc.ANCHOR_BOX[:, 3] box_center_x = tf.identity(anchor_x + delta_x * anchor_w, name='bbox_cx') box_center_y = tf.identity(anchor_y + delta_y * anchor_h, name='bbox_cy') box_width = tf.identity(anchor_w * util.safe_exp(delta_w, mc.EXP_THRESH), name='bbox_width') box_height = tf.identity(anchor_h * util.safe_exp(delta_h, mc.EXP_THRESH), name='bbox_height') self._activation_summary(delta_x, 'delta_x') self._activation_summary(delta_y, 'delta_y') self._activation_summary(delta_w, 'delta_w') self._activation_summary(delta_h, 'delta_h') self._activation_summary(box_center_x, 'bbox_cx') self._activation_summary(box_center_y, 'bbox_cy') self._activation_summary(box_width, 'bbox_width') self._activation_summary(box_height, 'bbox_height') with tf.variable_scope('trimming'): xmins, ymins, xmaxs, ymaxs = util.bbox_transform( [box_center_x, box_center_y, box_width, box_height]) # The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based # pixels. Same for y. xmins = tf.minimum( tf.maximum(0.0, xmins), mc.IMAGE_WIDTH - 1.0, name='bbox_xmin') # shape = [mc.BATCH_SIZE, mc.ANCHORS] self._activation_summary(xmins, 'box_xmin') ymins = tf.minimum( tf.maximum(0.0, ymins), mc.IMAGE_HEIGHT - 1.0, name='bbox_ymin') # shape = [mc.BATCH_SIZE, mc.ANCHORS] self._activation_summary(ymins, 'box_ymin') xmaxs = tf.maximum( tf.minimum(mc.IMAGE_WIDTH - 1.0, xmaxs), 0.0, name='bbox_xmax') # shape = [mc.BATCH_SIZE, mc.ANCHORS] self._activation_summary(xmaxs, 'box_xmax') ymaxs = tf.maximum( tf.minimum(mc.IMAGE_HEIGHT - 1.0, ymaxs), 0.0, name='bbox_ymax') # shape = [mc.BATCH_SIZE, mc.ANCHORS] self._activation_summary(ymaxs, 'box_ymax') self.det_boxes = { "xmins": xmins, "ymins": ymins, "xmaxs": xmaxs, "ymaxs": ymaxs } with tf.name_scope('IOU'): def _tensor_iou(box1, box2): with tf.name_scope('intersection'): xmin = tf.maximum(box1["xmin"], box2["xmin"], name='xmin') ymin = tf.maximum(box1["ymin"], box2["ymin"], name='ymin') xmax = tf.minimum(box1["xmax"], box2["xmax"], name='xmax') ymax = tf.minimum(box1["ymax"], box2["ymax"], name='ymax') w = tf.maximum(0.0, xmax - xmin, name='inter_w') h = tf.maximum(0.0, ymax - ymin, name='inter_h') intersection = tf.multiply(w, h, name='intersection') with tf.name_scope('union'): w1 = tf.subtract(box1["xmax"], box1["xmin"], name='w1') h1 = tf.subtract(box1["ymax"], box1["ymin"], name='h1') w2 = tf.subtract(box2["xmax"], box2["xmin"], name='w2') h2 = tf.subtract(box2["ymax"], box2["ymin"], name='h2') union = tf.cast(w1 * h1 + w2 * h2 - intersection, dtype=tf.float32) return tf.truediv( tf.cast(intersection, dtype=tf.float32), union + tf.constant(mc.EPSILON, dtype=tf.float32)) mini_ious_values = _tensor_iou( { "xmin": tf.cast(tf.gather_nd(xmins, self.paired_aidx_values), tf.float32), "ymin": tf.cast(tf.gather_nd(ymins, self.paired_aidx_values), tf.float32), "xmax": tf.cast(tf.gather_nd(xmaxs, self.paired_aidx_values), tf.float32), "ymax": tf.cast(tf.gather_nd(ymaxs, self.paired_aidx_values), tf.float32) }, # predicted boxes { "xmin": tf.cast(self.box_input["image/object/bbox/xmin"].values, tf.float32), "ymin": tf.cast(self.box_input["image/object/bbox/ymin"].values, tf.float32), "xmax": tf.cast(self.box_input["image/object/bbox/xmax"].values, tf.float32), "ymax": tf.cast(self.box_input["image/object/bbox/ymax"].values, tf.float32) }) # input boxes # after computing the ious of the responsible boxes, # put the values to a large plane containing all anchors which are responsible and those which are not self._ious = tf.scatter_nd(self.paired_aidx_values, mini_ious_values, [mc.BATCH_SIZE, mc.ANCHORS]) self._activation_summary(self._ious, 'conf_score') with tf.variable_scope('probability') as scope: self._activation_summary(self.pred_class_probs, 'class_probs') probs = tf.multiply(self.pred_class_probs, tf.reshape(self.pred_conf, [mc.BATCH_SIZE, mc.ANCHORS, 1]), name='final_class_prob') self._activation_summary(probs, 'final_class_prob') self.det_probs = tf.reduce_max(probs, axis=2, name='score') self.det_class = tf.argmax(probs, axis=2, name='class_idx') self._activation_summary( tf.gather_nd(self.det_class, self.paired_aidx_values), 'detected_classes') # get prediction boxes self.prediction_boxes, self.score,\ self.cls_idx_per_img, self.filter_summaries = self.filter_prediction()
def _add_interpretation_graph(self): """Interpret NN output.""" mc = self.mc with tf.variable_scope('interpret_output') as scope: preds = self.preds # probability num_class_probs = mc.ANCHOR_PER_GRID * mc.CLASSES if mc.CLASSES == 1: self.pred_class_probs = tf.reshape( tf.sigmoid( tf.reshape(preds[:, :, :, :num_class_probs], [-1, mc.CLASSES])), [mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES], name='pred_class_probs') else: self.pred_class_probs = tf.reshape( tf.nn.softmax( tf.reshape(preds[:, :, :, :num_class_probs], [-1, mc.CLASSES])), [mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES], name='pred_class_probs') # bbox_delta self.pred_box_delta = tf.reshape(preds[:, :, :, num_class_probs:], [mc.BATCH_SIZE, mc.ANCHORS, 4], name='bbox_delta') with tf.variable_scope('bbox') as scope: with tf.variable_scope('stretching'): delta_x, delta_y, delta_w, delta_h = tf.unstack( self.pred_box_delta, axis=2) anchor_x = mc.ANCHOR_BOX[:, 0] anchor_y = mc.ANCHOR_BOX[:, 1] anchor_w = mc.ANCHOR_BOX[:, 2] anchor_h = mc.ANCHOR_BOX[:, 3] box_center_x = tf.identity(anchor_x + delta_x * anchor_w, name='bbox_cx') box_center_y = tf.identity(anchor_y + delta_y * anchor_h, name='bbox_cy') box_width = tf.identity(anchor_w * util.safe_exp(delta_w, mc.EXP_THRESH), name='bbox_width') box_height = tf.identity(anchor_h * util.safe_exp(delta_h, mc.EXP_THRESH), name='bbox_height') self._activation_summary(delta_x, 'delta_x') self._activation_summary(delta_y, 'delta_y') self._activation_summary(delta_w, 'delta_w') self._activation_summary(delta_h, 'delta_h') self._activation_summary(box_center_x, 'bbox_cx') self._activation_summary(box_center_y, 'bbox_cy') self._activation_summary(box_width, 'bbox_width') self._activation_summary(box_height, 'bbox_height') with tf.variable_scope('trimming'): xmins, ymins, xmaxs, ymaxs = util.bbox_transform( [box_center_x, box_center_y, box_width, box_height]) # The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based # pixels. Same for y. xmins = tf.minimum(tf.maximum(0.0, xmins), mc.IMAGE_WIDTH - 1.0, name='bbox_xmin') self._activation_summary(xmins, 'box_xmin') ymins = tf.minimum(tf.maximum(0.0, ymins), mc.IMAGE_HEIGHT - 1.0, name='bbox_ymin') self._activation_summary(ymins, 'box_ymin') xmaxs = tf.maximum(tf.minimum(mc.IMAGE_WIDTH - 1.0, xmaxs), 0.0, name='bbox_xmax') self._activation_summary(xmaxs, 'box_xmax') ymaxs = tf.maximum(tf.minimum(mc.IMAGE_HEIGHT - 1.0, ymaxs), 0.0, name='bbox_ymax') self._activation_summary(ymaxs, 'box_ymax') self.det_boxes = tf.transpose(tf.stack( util.bbox_transform_inv([xmins, ymins, xmaxs, ymaxs])), (1, 2, 0), name='bbox') with tf.variable_scope('probability') as scope: self._activation_summary(self.pred_class_probs, 'class_probs') probs = self.pred_class_probs self.det_probs = tf.reduce_max(probs, 2, name='score') self.det_class = tf.argmax(probs, 2, name='class_idx')
def _add_interpretation_graph(self): """Interpret NN output.""" mc = self.mc with tf.variable_scope('interpret_output') as scope: preds = self.preds # probability num_class_probs = mc.ANCHOR_PER_GRID * mc.CLASSES self.pred_class_probs = tf.reshape( tf.nn.softmax( tf.reshape(preds[:, :, :, :num_class_probs], [-1, mc.CLASSES])), [mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES], name='pred_class_probs') # confidence num_confidence_scores = mc.ANCHOR_PER_GRID + num_class_probs self.pred_conf = tf.sigmoid(tf.reshape( preds[:, :, :, num_class_probs:num_confidence_scores], [mc.BATCH_SIZE, mc.ANCHORS]), name='pred_confidence_score') # bbox_delta self.pred_box_delta = tf.reshape( preds[:, :, :, num_confidence_scores:], [mc.BATCH_SIZE, mc.ANCHORS, self.num_mask_params], name='bbox_delta') # number of object. Used to normalize bbox and classification loss self.num_objects = tf.reduce_sum(self.input_mask, name='num_objects') with tf.variable_scope('bbox') as scope: with tf.variable_scope('stretching'): if self.mc.EIGHT_POINT_REGRESSION: if mc.ENCODING_TYPE == 'normal': delta_x, delta_y, delta_w, delta_h, \ delta_of1, delta_of2, delta_of3, delta_of4 = tf.unstack( self.pred_box_delta, axis=2) else: delta_xmin, delta_ymin, delta_xmax, delta_ymax, \ delta_of1, delta_of2, delta_of3, delta_of4 = tf.unstack( self.pred_box_delta, axis=2) else: if mc.ENCODING_TYPE == 'normal': delta_x, delta_y, delta_w, delta_h = tf.unstack( self.pred_box_delta, axis=2) else: delta_xmin, delta_ymin, delta_xmax, delta_ymax = tf.unstack( self.pred_box_delta, axis=2) anchor_x = mc.ANCHOR_BOX[:, 0] anchor_y = mc.ANCHOR_BOX[:, 1] anchor_w = mc.ANCHOR_BOX[:, 2] anchor_h = mc.ANCHOR_BOX[:, 3] if mc.ENCODING_TYPE == 'asymmetric_linear': xmins_a, ymins_a, xmaxs_a, ymaxs_a = util.bbox_transform( np.transpose(mc.ANCHOR_BOX)) xmins = tf.identity(xmins_a + delta_xmin * anchor_w, name='bbox_xmin_uncropped') ymins = tf.identity(ymins_a + delta_ymin * anchor_h, name='bbox_ymin_uncropped') xmaxs = tf.identity(xmaxs_a + delta_xmax * anchor_w, name='bbox_xmax_uncropped') ymaxs = tf.identity(ymaxs_a + delta_ymax * anchor_h, name='bbox_ymax_uncropped') box_center_x, box_center_y, box_width, box_height = util.bbox_transform_inv( [xmins, ymins, xmaxs, ymaxs]) self._activation_summary(delta_xmin, 'delta_xmin') self._activation_summary(delta_ymin, 'delta_ymin') self._activation_summary(delta_xmax, 'delta_xmax') self._activation_summary(delta_ymax, 'delta_ymax') elif mc.ENCODING_TYPE == 'asymmetric_log': EPSILON = 0.5 xmins = tf.identity( anchor_x - (anchor_w * (util.safe_exp(delta_xmin, mc.EXP_THRESH) - EPSILON)), name='bbox_xmin_uncropped') ymins = tf.identity( anchor_y - (anchor_h * (util.safe_exp(delta_ymin, mc.EXP_THRESH) - EPSILON)), name='bbox_ymin_uncropped') xmaxs = tf.identity( anchor_x + (anchor_w * (util.safe_exp(delta_xmax, mc.EXP_THRESH) - EPSILON)), name='bbox_xmax_uncropped') ymaxs = tf.identity( anchor_y + (anchor_h * (util.safe_exp(delta_ymax, mc.EXP_THRESH) - EPSILON)), name='bbox_ymax_uncropped') box_center_x, box_center_y, box_width, box_height = util.bbox_transform_inv( [xmins, ymins, xmaxs, ymaxs]) self._activation_summary(delta_xmin, 'delta_xmin') self._activation_summary(delta_ymin, 'delta_ymin') self._activation_summary(delta_xmax, 'delta_xmax') self._activation_summary(delta_ymax, 'delta_ymax') else: box_center_x = tf.identity(anchor_x + delta_x * anchor_w, name='bbox_cx') box_center_y = tf.identity(anchor_y + delta_y * anchor_h, name='bbox_cy') box_width = tf.identity( anchor_w * util.safe_exp(delta_w, mc.EXP_THRESH), name='bbox_width') box_height = tf.identity( anchor_h * util.safe_exp(delta_h, mc.EXP_THRESH), name='bbox_height') self._activation_summary(delta_x, 'delta_x') self._activation_summary(delta_y, 'delta_y') self._activation_summary(delta_w, 'delta_w') self._activation_summary(delta_h, 'delta_h') self._activation_summary(box_center_x, 'bbox_cx') self._activation_summary(box_center_y, 'bbox_cy') self._activation_summary(box_width, 'bbox_width') self._activation_summary(box_height, 'bbox_height') if self.mc.EIGHT_POINT_REGRESSION: EPSILON = 1e-8 anchor_diag = (mc.ANCHOR_BOX[:, 2]**2 + mc.ANCHOR_BOX[:, 3]**2)**(0.5) box_of1 = tf.identity( (anchor_diag * util.safe_exp(delta_of1, mc.EXP_THRESH)) - EPSILON, name='bbox_of1') box_of2 = tf.identity( (anchor_diag * util.safe_exp(delta_of2, mc.EXP_THRESH)) - EPSILON, name='bbox_of2') box_of3 = tf.identity( (anchor_diag * util.safe_exp(delta_of3, mc.EXP_THRESH)) - EPSILON, name='bbox_of3') box_of4 = tf.identity( (anchor_diag * util.safe_exp(delta_of4, mc.EXP_THRESH)) - EPSILON, name='bbox_of4') self._activation_summary(delta_of1, 'delta_of1') self._activation_summary(delta_of2, 'delta_of2') self._activation_summary(delta_of3, 'delta_of3') self._activation_summary(delta_of4, 'delta_of4') self._activation_summary(box_of1, 'box_of1') self._activation_summary(box_of2, 'box_of2') self._activation_summary(box_of3, 'box_of3') self._activation_summary(box_of4, 'box_of4') with tf.variable_scope('trimming'): if self.mc.EIGHT_POINT_REGRESSION: xmins, ymins, xmaxs, ymaxs, box_of1, box_of2, box_of3, box_of4 = util.bbox_transform2( [ box_center_x, box_center_y, box_width, box_height, box_of1, box_of2, box_of3, box_of4 ]) else: if mc.ENCODING_TYPE == 'normal': xmins, ymins, xmaxs, ymaxs = util.bbox_transform([ box_center_x, box_center_y, box_width, box_height ]) if self.mc.EIGHT_POINT_REGRESSION: self.det_boxes_uncropped = tf.transpose( tf.stack( util.bbox_transform_inv2([ xmins, ymins, xmaxs, ymaxs, box_of1, box_of2, box_of3, box_of4 ])), (1, 2, 0), name='bbox_uncropped') else: self.det_boxes_uncropped = tf.transpose( tf.stack( util.bbox_transform_inv( [xmins, ymins, xmaxs, ymaxs])), (1, 2, 0), name='bbox_uncropped') # The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based # pixels. Same for y. xmins = tf.minimum(tf.maximum(0.0, xmins), mc.IMAGE_WIDTH - 1.0, name='bbox_xmin') self._activation_summary(xmins, 'box_xmin') ymins = tf.minimum(tf.maximum(0.0, ymins), mc.IMAGE_HEIGHT - 1.0, name='bbox_ymin') self._activation_summary(ymins, 'box_ymin') xmaxs = tf.maximum(tf.minimum(mc.IMAGE_WIDTH - 1.0, xmaxs), 0.0, name='bbox_xmax') self._activation_summary(xmaxs, 'box_xmax') ymaxs = tf.maximum(tf.minimum(mc.IMAGE_HEIGHT - 1.0, ymaxs), 0.0, name='bbox_ymax') self._activation_summary(ymaxs, 'box_ymax') if self.mc.EIGHT_POINT_REGRESSION: self.det_boxes = tf.transpose(tf.stack( util.bbox_transform_inv2([ xmins, ymins, xmaxs, ymaxs, box_of1, box_of2, box_of3, box_of4 ])), (1, 2, 0), name='bbox') else: self.det_boxes = tf.transpose(tf.stack( util.bbox_transform_inv([xmins, ymins, xmaxs, ymaxs])), (1, 2, 0), name='bbox') with tf.variable_scope('IOU'): def _tensor_iou(box1, box2): with tf.variable_scope('intersection'): xmin = tf.maximum(box1[0], box2[0], name='xmin') ymin = tf.maximum(box1[1], box2[1], name='ymin') xmax = tf.minimum(box1[2], box2[2], name='xmax') ymax = tf.minimum(box1[3], box2[3], name='ymax') w = tf.maximum(0.0, xmax - xmin, name='inter_w') h = tf.maximum(0.0, ymax - ymin, name='inter_h') intersection = tf.multiply(w, h, name='intersection') with tf.variable_scope('union'): w1 = tf.subtract(box1[2], box1[0], name='w1') h1 = tf.subtract(box1[3], box1[1], name='h1') w2 = tf.subtract(box2[2], box2[0], name='w2') h2 = tf.subtract(box2[3], box2[1], name='h2') union = w1 * h1 + w2 * h2 - intersection return intersection/(union+mc.EPSILON) \ * tf.reshape(self.input_mask, [mc.BATCH_SIZE, mc.ANCHORS]) if self.mc.EIGHT_POINT_REGRESSION: tensor_det_boxes = util.bbox_transform2( tf.unstack(self.det_boxes, axis=2)) tensor_input_boxes = util.bbox_transform2( tf.unstack(self.box_input, axis=2)) else: tensor_det_boxes = util.bbox_transform( tf.unstack(self.det_boxes, axis=2)) tensor_input_boxes = util.bbox_transform( tf.unstack(self.box_input, axis=2)) self.ious = self.ious.assign( _tensor_iou(tensor_det_boxes, tensor_input_boxes)) self._activation_summary(self.ious, 'conf_score') with tf.variable_scope('probability') as scope: self._activation_summary(self.pred_class_probs, 'class_probs') probs = tf.multiply(self.pred_class_probs, tf.reshape(self.pred_conf, [mc.BATCH_SIZE, mc.ANCHORS, 1]), name='final_class_prob') self._activation_summary(probs, 'final_class_prob') self.det_probs = tf.reduce_max(probs, 2, name='score') self.det_class = tf.argmax(probs, 2, name='class_idx')