def postprocess_detctions(refine_bbox_pred, refine_cls_prob, anchors, is_training, gpu_id): boxes_pred = bbox_transform.rbbox_transform_inv( boxes=anchors, deltas=refine_bbox_pred, scale_factors=cfgs.ANCHOR_SCALE_FACTORS) return_boxes_pred = [] return_scores = [] return_labels = [] for j in range(0, cfgs.CLASS_NUM): indices = filter_detections(boxes_pred, refine_cls_prob[:, j], is_training, gpu_id) tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, indices), [-1, 5]) tmp_scores = tf.reshape(tf.gather(refine_cls_prob[:, j], indices), [ -1, ]) return_boxes_pred.append(tmp_boxes_pred) return_scores.append(tmp_scores) return_labels.append(tf.ones_like(tmp_scores) * (j + 1)) return_boxes_pred = tf.concat(return_boxes_pred, axis=0) return_scores = tf.concat(return_scores, axis=0) return_labels = tf.concat(return_labels, axis=0) return return_boxes_pred, return_scores, return_labels
def postprocess_detctions(rpn_bbox_pred, rpn_cls_prob, anchors, is_training): if cfgs.METHOD == 'H': x_c = (anchors[:, 2] + anchors[:, 0]) / 2 y_c = (anchors[:, 3] + anchors[:, 1]) / 2 h = anchors[:, 2] - anchors[:, 0] + 1 w = anchors[:, 3] - anchors[:, 1] + 1 theta = -90 * tf.ones_like(x_c) anchors = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) if cfgs.ANGLE_RANGE == 180: anchors = tf.py_func(coordinate_present_convert, inp=[anchors, -1], Tout=[tf.float32]) anchors = tf.reshape(anchors, [-1, 5]) boxes_pred = bbox_transform.rbbox_transform_inv(boxes=anchors, deltas=rpn_bbox_pred) if cfgs.ANGLE_RANGE == 180: # boxes_pred = tf.py_func(coords_regular, # inp=[boxes_pred], # Tout=[tf.float32]) # boxes_pred = tf.reshape(boxes_pred, [-1, 5]) _, _, _, _, theta = tf.unstack(boxes_pred, axis=1) indx = tf.reshape( tf.where( tf.logical_and(tf.less(theta, 0), tf.greater_equal(theta, -180))), [ -1, ]) boxes_pred = tf.gather(boxes_pred, indx) rpn_cls_prob = tf.gather(rpn_cls_prob, indx) boxes_pred = tf.py_func(coordinate_present_convert, inp=[boxes_pred, 1], Tout=[tf.float32]) boxes_pred = tf.reshape(boxes_pred, [-1, 5]) return_boxes_pred = [] return_scores = [] return_labels = [] for j in range(0, cfgs.CLASS_NUM): indices = filter_detections(boxes_pred, rpn_cls_prob[:, j], is_training) tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, indices), [-1, 5]) tmp_scores = tf.reshape(tf.gather(rpn_cls_prob[:, j], indices), [ -1, ]) return_boxes_pred.append(tmp_boxes_pred) return_scores.append(tmp_scores) return_labels.append(tf.ones_like(tmp_scores) * (j + 1)) return_boxes_pred = tf.concat(return_boxes_pred, axis=0) return_scores = tf.concat(return_scores, axis=0) return_labels = tf.concat(return_labels, axis=0) return return_boxes_pred, return_scores, return_labels
def iou_smooth_l1_loss(targets, preds, anchor_state, target_boxes, anchors, sigma=3.0, is_refine=False): if cfgs.METHOD == 'H' and not is_refine: x_c = (anchors[:, 2] + anchors[:, 0]) / 2 y_c = (anchors[:, 3] + anchors[:, 1]) / 2 h = anchors[:, 2] - anchors[:, 0] + 1 w = anchors[:, 3] - anchors[:, 1] + 1 theta = -90 * tf.ones_like(x_c) anchors = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) sigma_squared = sigma**2 indices = tf.reshape(tf.where(tf.equal(anchor_state, 1)), [ -1, ]) preds = tf.gather(preds, indices) targets = tf.gather(targets, indices) target_boxes = tf.gather(target_boxes, indices) anchors = tf.gather(anchors, indices) boxes_pred = bbox_transform.rbbox_transform_inv( boxes=anchors, deltas=preds, scale_factors=cfgs.ANCHOR_SCALE_FACTORS) # compute smooth L1 loss # f(x) = 0.5 * (sigma * x)^2 if |x| < 1 / sigma / sigma # |x| - 0.5 / sigma / sigma otherwise regression_diff = preds - targets regression_diff = tf.abs(regression_diff) regression_loss = tf.where( tf.less(regression_diff, 1.0 / sigma_squared), 0.5 * sigma_squared * tf.pow(regression_diff, 2), regression_diff - 0.5 / sigma_squared) overlaps = tf.py_func(iou_rotate_calculate2, inp=[ tf.reshape(boxes_pred, [-1, 5]), tf.reshape(target_boxes[:, :-1], [-1, 5]) ], Tout=[tf.float32]) overlaps = tf.reshape(overlaps, [-1, 1]) regression_loss = tf.reshape(tf.reduce_sum(regression_loss, axis=1), [-1, 1]) iou_factor = tf.stop_gradient(-1 * tf.log(overlaps)) / ( tf.stop_gradient(regression_loss) + cfgs.EPSILON) # iou_factor = tf.Print(iou_factor, [iou_factor], 'iou_factor', summarize=50) normalizer = tf.stop_gradient(tf.where(tf.equal(anchor_state, 1))) normalizer = tf.cast(tf.shape(normalizer)[0], tf.float32) normalizer = tf.maximum(1.0, normalizer) # normalizer = tf.stop_gradient(tf.cast(tf.equal(anchor_state, 1), tf.float32)) # normalizer = tf.maximum(tf.reduce_sum(normalizer), 1) return tf.reduce_sum(regression_loss * iou_factor) / normalizer
def postprocess_detctions(refine_bbox_pred, refine_cls_prob, refine_angle_prob, anchors, is_training): boxes_pred = bbox_transform.rbbox_transform_inv(boxes=anchors, deltas=refine_bbox_pred, scale_factors=cfgs.ANCHOR_SCALE_FACTORS) angle_cls = tf.cast(tf.argmax(refine_angle_prob, axis=1), tf.float32) angle_cls = (tf.reshape(angle_cls, [-1, ]) * -1 - 0.5) * cfgs.OMEGA x, y, w, h, theta = tf.unstack(boxes_pred, axis=1) boxes_pred_angle = tf.transpose(tf.stack([x, y, w, h, angle_cls])) return_boxes_pred = [] return_boxes_pred_angle = [] return_scores = [] return_labels = [] for j in range(0, cfgs.CLASS_NUM): indices = filter_detections(boxes_pred_angle, refine_cls_prob[:, j], is_training) tmp_boxes_pred_angle = tf.reshape(tf.gather(boxes_pred_angle, indices), [-1, 5]) tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, indices), [-1, 5]) tmp_scores = tf.reshape(tf.gather(refine_cls_prob[:, j], indices), [-1, ]) return_boxes_pred.append(tmp_boxes_pred) return_boxes_pred_angle.append(tmp_boxes_pred_angle) return_scores.append(tmp_scores) return_labels.append(tf.ones_like(tmp_scores)*(j+1)) return_boxes_pred = tf.concat(return_boxes_pred, axis=0) return_boxes_pred_angle = tf.concat(return_boxes_pred_angle, axis=0) return_scores = tf.concat(return_scores, axis=0) return_labels = tf.concat(return_labels, axis=0) return return_boxes_pred, return_scores, return_labels, return_boxes_pred_angle
def postprocess_detctions(rpn_bbox_pred, rpn_cls_prob, anchors, is_training): if cfgs.METHOD == 'H': x_c = (anchors[:, 2] + anchors[:, 0]) / 2 y_c = (anchors[:, 3] + anchors[:, 1]) / 2 h = anchors[:, 2] - anchors[:, 0] + 1 w = anchors[:, 3] - anchors[:, 1] + 1 theta = -90 * tf.ones_like(x_c) anchors = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) boxes_pred = bbox_transform.rbbox_transform_inv(boxes=anchors, deltas=rpn_bbox_pred) return_boxes_pred = [] return_scores = [] return_labels = [] for j in range(0, cfgs.CLASS_NUM): indices = filter_detections(boxes_pred, rpn_cls_prob[:, j], is_training) tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, indices), [-1, 5]) tmp_scores = tf.reshape(tf.gather(rpn_cls_prob[:, j], indices), [-1, ]) return_boxes_pred.append(tmp_boxes_pred) return_scores.append(tmp_scores) return_labels.append(tf.ones_like(tmp_scores)*(j+1)) return_boxes_pred = tf.concat(return_boxes_pred, axis=0) return_scores = tf.concat(return_scores, axis=0) return_labels = tf.concat(return_labels, axis=0) return return_boxes_pred, return_scores, return_labels
def postprocess_detctions(refine_bbox_pred, refine_cls_prob, refine_angle_prob, anchors, is_training): boxes_pred = bbox_transform.rbbox_transform_inv( boxes=anchors, deltas=refine_bbox_pred, scale_factors=cfgs.ANCHOR_SCALE_FACTORS) angle_cls = tf.cast(tf.argmax(refine_angle_prob, axis=1), tf.float32) angle_cls = (tf.reshape(angle_cls, [ -1, ]) * -1 - 0.5) * cfgs.OMEGA x, y, w, h, theta = tf.unstack(boxes_pred, axis=1) boxes_pred_angle = tf.transpose(tf.stack([x, y, w, h, angle_cls])) return_boxes_pred = [] return_boxes_pred_angle = [] return_scores = [] return_labels = [] for j in range(0, cfgs.CLASS_NUM): indices = filter_detections(boxes_pred_angle, refine_cls_prob[:, j], is_training) tmp_boxes_pred_angle = tf.reshape(tf.gather(boxes_pred_angle, indices), [-1, 5]) tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, indices), [-1, 5]) tmp_scores = tf.reshape(tf.gather(refine_cls_prob[:, j], indices), [ -1, ]) if cfgs.ANGLE_RANGE == 180: # _, _, _, _, theta = tf.unstack(boxes_pred, axis=1) # indx = tf.reshape(tf.where(tf.logical_and(tf.less(theta, 0), tf.greater_equal(theta, -180))), [-1, ]) # boxes_pred = tf.gather(boxes_pred, indx) # scores = tf.gather(scores, indx) tmp_boxes_pred_angle = tf.py_func(coordinate_present_convert, inp=[tmp_boxes_pred_angle, 1], Tout=[tf.float32]) tmp_boxes_pred_angle = tf.reshape(tmp_boxes_pred_angle, [-1, 5]) tmp_boxes_pred = tf.py_func(coordinate_present_convert, inp=[tmp_boxes_pred, 1], Tout=[tf.float32]) tmp_boxes_pred = tf.reshape(tmp_boxes_pred, [-1, 5]) return_boxes_pred.append(tmp_boxes_pred) return_boxes_pred_angle.append(tmp_boxes_pred_angle) return_scores.append(tmp_scores) return_labels.append(tf.ones_like(tmp_scores) * (j + 1)) return_boxes_pred = tf.concat(return_boxes_pred, axis=0) return_boxes_pred_angle = tf.concat(return_boxes_pred_angle, axis=0) return_scores = tf.concat(return_scores, axis=0) return_labels = tf.concat(return_labels, axis=0) # return_boxes_pred_angle = tf.Print(return_boxes_pred_angle, [return_boxes_pred_angle], 'return_boxes_pred_angle', summarize=50) return return_boxes_pred, return_scores, return_labels, return_boxes_pred_angle
def iou_smooth_l1_loss_1(preds, anchor_state, target_boxes, anchors, sigma=3.0, alpha=1.0, beta=1.0, is_refine=False): if cfgs.METHOD == 'H' and not is_refine: x_c = (anchors[:, 2] + anchors[:, 0]) / 2 y_c = (anchors[:, 3] + anchors[:, 1]) / 2 h = anchors[:, 2] - anchors[:, 0] + 1 w = anchors[:, 3] - anchors[:, 1] + 1 theta = -90 * tf.ones_like(x_c) anchors = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) sigma_squared = sigma**2 indices = tf.reshape(tf.where(tf.equal(anchor_state, 1)), [ -1, ]) preds = tf.gather(preds, indices) # targets = tf.gather(targets, indices) target_boxes = tf.gather(target_boxes, indices) anchors = tf.gather(anchors, indices) boxes_pred = bbox_transform.rbbox_transform_inv( boxes=anchors, deltas=preds, scale_factors=cfgs.ANCHOR_SCALE_FACTORS) boxes_pred = tf.reshape(boxes_pred, [-1, 5]) target_boxes = tf.reshape(target_boxes, [-1, 6]) boxes_pred_x, boxes_pred_y, boxes_pred_w, boxes_pred_h, boxes_pred_theta = tf.unstack( boxes_pred, axis=-1) target_boxes_x, target_boxes_y, target_boxes_w, target_boxes_h, target_boxes_theta, _ = tf.unstack( target_boxes, axis=-1) # compute smooth L1 loss # f(x) = 0.5 * (sigma * x)^2 if |x| < 1 / sigma / sigma # |x| - 0.5 / sigma / sigma otherwise regression_diff_angle = boxes_pred_theta - target_boxes_theta regression_diff_angle = tf.abs(regression_diff_angle) regression_diff_angle = tf.where( tf.less(regression_diff_angle, 1.0 / sigma_squared), 0.5 * sigma_squared * tf.pow(regression_diff_angle, 2), regression_diff_angle - 0.5 / sigma_squared) iou = iou_calculate( tf.transpose( tf.stack([boxes_pred_x, boxes_pred_y, boxes_pred_w, boxes_pred_h])), tf.transpose( tf.stack([ target_boxes_x, target_boxes_y, target_boxes_w, target_boxes_h ]))) iou_loss_appro = regression_diff_angle - iou overlaps = tf.py_func(iou_rotate_calculate2, inp=[ tf.reshape(boxes_pred, [-1, 5]), tf.reshape(target_boxes[:, :-1], [-1, 5]) ], Tout=[tf.float32]) overlaps = tf.reshape(overlaps, [-1, 1]) iou_loss_appro = tf.reshape(iou_loss_appro, [-1, 1]) # 1-exp(1-x) iou_factor = tf.stop_gradient(tf.exp(alpha * (1 - overlaps)**beta) - 1) / ( tf.stop_gradient(iou_loss_appro) + cfgs.EPSILON) # iou_factor = tf.stop_gradient(1-overlaps) / (tf.stop_gradient(regression_loss) + cfgs.EPSILON) # iou_factor = tf.Print(iou_factor, [iou_factor], 'iou_factor', summarize=50) normalizer = tf.stop_gradient(tf.where(tf.equal(anchor_state, 1))) normalizer = tf.cast(tf.shape(normalizer)[0], tf.float32) normalizer = tf.maximum(1.0, normalizer) # normalizer = tf.stop_gradient(tf.cast(tf.equal(anchor_state, 1), tf.float32)) # normalizer = tf.maximum(tf.reduce_sum(normalizer), 1) return tf.reduce_sum(iou_loss_appro * iou_factor) / normalizer
def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h, gtboxes_batch_r, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) # 1. build base network feature_pyramid = self.build_base_network(input_img_batch) # 2. build rpn rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net( feature_pyramid, 'rpn_net') # 3. generate_anchors anchor_list = self.make_anchors(feature_pyramid) rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0) rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0) # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0) anchors = tf.concat(anchor_list, axis=0) if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states, target_boxes = tf.py_func( func=anchor_target_layer, inp=[gtboxes_batch_h, gtboxes_batch_r, anchors], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) if self.method == 'H': self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) else: self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = losses.focal_loss(labels, rpn_cls_score, anchor_states) if cfgs.USE_IOU_FACTOR: reg_loss = losses.iou_smooth_l1_loss( target_delta, rpn_box_pred, anchor_states, target_boxes, anchors) else: reg_loss = losses.smooth_l1_loss(target_delta, rpn_box_pred, anchor_states) self.losses_dict['cls_loss'] = cls_loss * cfgs.CLS_WEIGHT self.losses_dict['reg_loss'] = reg_loss * cfgs.REG_WEIGHT with tf.variable_scope('refine_feature_pyramid'): refine_feature_pyramid = {} for level in cfgs.LEVEL: feature_1x5 = slim.conv2d( inputs=feature_pyramid[level], num_outputs=256, kernel_size=[1, 5], weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=None, scope='refine_1x5_{}'.format(level)) feature5x1 = slim.conv2d( inputs=feature_1x5, num_outputs=256, kernel_size=[5, 1], weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=None, scope='refine_5x1_{}'.format(level)) feature_1x1 = slim.conv2d( inputs=feature_pyramid[level], num_outputs=256, kernel_size=[1, 1], weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=None, scope='refine_1x1_{}'.format(level)) refine_feature_pyramid[level] = feature5x1 + feature_1x1 refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list = self.refine_net( refine_feature_pyramid, 'refine_net') # refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list = self.refine_net(feature_pyramid, 'refine_net') refine_box_pred = tf.concat(refine_box_pred_list, axis=0) refine_cls_score = tf.concat(refine_cls_score_list, axis=0) refine_cls_prob = tf.concat(refine_cls_prob_list, axis=0) # refine_boxes = tf.concat(refine_boxes_list, axis=0) if cfgs.METHOD == 'H': x_c = (anchors[:, 2] + anchors[:, 0]) / 2 y_c = (anchors[:, 3] + anchors[:, 1]) / 2 h = anchors[:, 2] - anchors[:, 0] + 1 w = anchors[:, 3] - anchors[:, 1] + 1 theta = -90 * tf.ones_like(x_c) anchors = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) refine_boxes = bbox_transform.rbbox_transform_inv(boxes=anchors, deltas=rpn_box_pred) # 4. postprocess rpn proposals. such as: decode, clip, filter if not self.is_training: with tf.variable_scope('postprocess_detctions'): boxes, scores, category = postprocess_detctions( refine_bbox_pred=refine_box_pred, refine_cls_prob=refine_cls_prob, anchors=refine_boxes, is_training=self.is_training) return boxes, scores, category # 5. build loss else: with tf.variable_scope('build_refine_loss'): refine_labels, refine_target_delta, refine_box_states, refine_target_boxes = tf.py_func( func=refinebox_target_layer, inp=[ gtboxes_batch_r, refine_boxes, cfgs.REFINE_IOU_POSITIVE_THRESHOLD[0], cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[0], gpu_id ], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) self.add_anchor_img_smry(input_img_batch, refine_boxes, refine_box_states, 1) refine_cls_loss = losses.focal_loss(refine_labels, refine_cls_score, refine_box_states) if cfgs.USE_IOU_FACTOR: refine_reg_loss = losses.iou_smooth_l1_loss( refine_target_delta, refine_box_pred, refine_box_states, refine_target_boxes, refine_boxes) else: refine_reg_loss = losses.smooth_l1_loss( refine_target_delta, refine_box_pred, refine_box_states) self.losses_dict[ 'refine_cls_loss'] = refine_cls_loss * cfgs.CLS_WEIGHT self.losses_dict[ 'refine_reg_loss'] = refine_reg_loss * cfgs.REG_WEIGHT with tf.variable_scope('postprocess_detctions'): boxes, scores, category = postprocess_detctions( refine_bbox_pred=refine_box_pred, refine_cls_prob=refine_cls_prob, anchors=refine_boxes, is_training=self.is_training) boxes = tf.stop_gradient(boxes) scores = tf.stop_gradient(scores) category = tf.stop_gradient(category) return boxes, scores, category, self.losses_dict
def refine_stage(self, input_img_batch, gtboxes_batch_r, gt_smooth_label, box_pred_list, cls_prob_list, proposal_list, angle_cls_list, feature_pyramid, gpu_id, pos_threshold, neg_threshold, stage, proposal_filter=False): with tf.variable_scope('refine_feature_pyramid{}'.format(stage)): refine_feature_pyramid = {} refine_boxes_list = [] refine_boxes_angle_list = [] for box_pred, cls_prob, proposal, angle_prob, stride, level in \ zip(box_pred_list, cls_prob_list, proposal_list, angle_cls_list, cfgs.ANCHOR_STRIDE, cfgs.LEVEL): if proposal_filter: box_pred = tf.reshape( box_pred, [-1, self.num_anchors_per_location, 5]) proposal = tf.reshape(proposal, [ -1, self.num_anchors_per_location, 5 if self.method == 'R' else 4 ]) cls_prob = tf.reshape( cls_prob, [-1, self.num_anchors_per_location, cfgs.CLASS_NUM]) cls_max_prob = tf.reduce_max(cls_prob, axis=-1) box_pred_argmax = tf.cast( tf.reshape(tf.argmax(cls_max_prob, axis=-1), [-1, 1]), tf.int32) indices = tf.cast( tf.cumsum(tf.ones_like(box_pred_argmax), axis=0), tf.int32) - tf.constant(1, tf.int32) indices = tf.concat([indices, box_pred_argmax], axis=-1) box_pred = tf.reshape(tf.gather_nd(box_pred, indices), [-1, 5]) proposal = tf.reshape(tf.gather_nd(proposal, indices), [-1, 5 if self.method == 'R' else 4]) if cfgs.METHOD == 'H': x_c = (proposal[:, 2] + proposal[:, 0]) / 2 y_c = (proposal[:, 3] + proposal[:, 1]) / 2 h = proposal[:, 2] - proposal[:, 0] + 1 w = proposal[:, 3] - proposal[:, 1] + 1 theta = -90 * tf.ones_like(x_c) proposal = tf.transpose( tf.stack([x_c, y_c, w, h, theta])) else: box_pred = tf.reshape(box_pred, [-1, 5]) proposal = tf.reshape(proposal, [-1, 5]) bboxes = bbox_transform.rbbox_transform_inv(boxes=proposal, deltas=box_pred) if angle_prob is not None: angle_cls = tf.cast( tf.argmax(tf.sigmoid(angle_prob), axis=1), tf.float32) angle_cls = tf.reshape(angle_cls, [ -1, ]) * -1 - 0.5 x, y, w, h, theta = tf.unstack(bboxes, axis=1) bboxes_angle = tf.transpose( tf.stack([x, y, w, h, angle_cls])) refine_boxes_angle_list.append(bboxes_angle) center_point = bboxes_angle[:, :2] / stride else: center_point = bboxes[:, :2] / stride refine_boxes_list.append(bboxes) refine_feature_pyramid[level] = self.refine_feature_op( points=center_point, feature_map=feature_pyramid[level], name=level) refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list, refine_angle_cls_list = self.refine_net( refine_feature_pyramid, 'refine_net{}'.format(stage)) refine_box_pred = tf.concat(refine_box_pred_list, axis=0) refine_cls_score = tf.concat(refine_cls_score_list, axis=0) # refine_cls_prob = tf.concat(refine_cls_prob_list, axis=0) refine_boxes = tf.concat(refine_boxes_list, axis=0) refine_angle_cls = tf.concat(refine_angle_cls_list, axis=0) if self.is_training: with tf.variable_scope('build_refine_loss{}'.format(stage)): refine_labels, refine_target_delta, refine_box_states, refine_target_boxes, refine_target_smooth_label = tf.py_func( func=refinebox_target_layer, inp=[ gtboxes_batch_r, gt_smooth_label, refine_boxes, pos_threshold, neg_threshold, gpu_id ], Tout=[ tf.float32, tf.float32, tf.float32, tf.float32, tf.float32 ]) self.add_anchor_img_smry(input_img_batch, refine_boxes, refine_box_states, 1) refine_cls_loss = losses.focal_loss(refine_labels, refine_cls_score, refine_box_states) if False: # cfgs.USE_IOU_FACTOR: refine_reg_loss = losses.iou_smooth_l1_loss( refine_target_delta, refine_box_pred, refine_box_states, refine_target_boxes, refine_boxes, is_refine=True) else: refine_reg_loss = losses.smooth_l1_loss( refine_target_delta, refine_box_pred, refine_box_states) angle_cls_loss = losses.angle_focal_loss( refine_target_smooth_label, refine_angle_cls, refine_box_states) self.losses_dict['refine_cls_loss{}'.format( stage)] = refine_cls_loss * cfgs.CLS_WEIGHT self.losses_dict['refine_reg_loss{}'.format( stage)] = refine_reg_loss * cfgs.REG_WEIGHT self.losses_dict['angle_cls_loss{}'.format( stage)] = angle_cls_loss * cfgs.ANGLE_CLS_WEIGHT return refine_box_pred_list, refine_cls_prob_list, refine_boxes_list, refine_angle_cls_list
def postprocess_detctions(rpn_bbox_pred, rpn_cls_prob, rpn_angle_prob, anchors, is_training): return_boxes_pred = [] return_boxes_pred_angle = [] return_scores = [] return_labels = [] for j in range(0, cfgs.CLASS_NUM): scores = rpn_cls_prob[:, j] if is_training: indices = tf.reshape(tf.where(tf.greater(scores, cfgs.VIS_SCORE)), [-1, ]) else: indices = tf.reshape(tf.where(tf.greater(scores, cfgs.FILTERED_SCORE)), [-1, ]) anchors_ = tf.gather(anchors, indices) rpn_bbox_pred_ = tf.gather(rpn_bbox_pred, indices) scores = tf.gather(scores, indices) rpn_angle_prob_ = tf.gather(rpn_angle_prob, indices) angle_cls = tf.cast(tf.argmax(rpn_angle_prob_, axis=1), tf.float32) if cfgs.METHOD == 'H': x_c = (anchors_[:, 2] + anchors_[:, 0]) / 2 y_c = (anchors_[:, 3] + anchors_[:, 1]) / 2 h = anchors_[:, 2] - anchors_[:, 0] + 1 w = anchors_[:, 3] - anchors_[:, 1] + 1 theta = -90 * tf.ones_like(x_c) anchors_ = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) if cfgs.ANGLE_RANGE == 180: anchors_ = tf.py_func(coordinate_present_convert, inp=[anchors_, -1], Tout=[tf.float32]) anchors_ = tf.reshape(anchors_, [-1, 5]) boxes_pred = bbox_transform.rbbox_transform_inv(boxes=anchors_, deltas=rpn_bbox_pred_) boxes_pred = tf.reshape(boxes_pred, [-1, 5]) angle_cls = (tf.reshape(angle_cls, [-1, ]) * -1 - 0.5) * cfgs.OMEGA x, y, w, h, theta = tf.unstack(boxes_pred, axis=1) boxes_pred_angle = tf.transpose(tf.stack([x, y, w, h, angle_cls])) if cfgs.ANGLE_RANGE == 180: # _, _, _, _, theta = tf.unstack(boxes_pred, axis=1) # indx = tf.reshape(tf.where(tf.logical_and(tf.less(theta, 0), tf.greater_equal(theta, -180))), [-1, ]) # boxes_pred = tf.gather(boxes_pred, indx) # scores = tf.gather(scores, indx) boxes_pred = tf.py_func(coordinate_present_convert, inp=[boxes_pred, 1], Tout=[tf.float32]) boxes_pred = tf.reshape(boxes_pred, [-1, 5]) boxes_pred_angle = tf.py_func(coordinate_present_convert, inp=[boxes_pred_angle, 1], Tout=[tf.float32]) boxes_pred_angle = tf.reshape(boxes_pred_angle, [-1, 5]) nms_indices = nms_rotate.nms_rotate(decode_boxes=boxes_pred_angle, scores=scores, iou_threshold=cfgs.NMS_IOU_THRESHOLD, max_output_size=100, use_angle_condition=False, angle_threshold=15, use_gpu=False) tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, nms_indices), [-1, 5]) tmp_boxes_pred_angle = tf.reshape(tf.gather(boxes_pred_angle, nms_indices), [-1, 5]) tmp_scores = tf.reshape(tf.gather(scores, nms_indices), [-1, ]) return_boxes_pred.append(tmp_boxes_pred) return_boxes_pred_angle.append(tmp_boxes_pred_angle) return_scores.append(tmp_scores) return_labels.append(tf.ones_like(tmp_scores)*(j+1)) return_boxes_pred = tf.concat(return_boxes_pred, axis=0) return_boxes_pred_angle = tf.concat(return_boxes_pred_angle, axis=0) return_scores = tf.concat(return_scores, axis=0) return_labels = tf.concat(return_labels, axis=0) return return_boxes_pred, return_scores, return_labels, return_boxes_pred_angle
def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h, gtboxes_batch_r, gt_encode_label, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) gt_encode_label = tf.reshape(gt_encode_label, [-1, self.coding_len]) gt_encode_label = tf.cast(gt_encode_label, tf.float32) # 1. build base network feature_pyramid = self.build_base_network(input_img_batch) # 2. build rpn rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net( feature_pyramid, 'rpn_net') # 3. generate_anchors anchor_list = self.make_anchors(feature_pyramid) rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0) rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0) # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0) anchors = tf.concat(anchor_list, axis=0) if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states, target_boxes = tf.py_func( func=anchor_target_layer, inp=[gtboxes_batch_h, gtboxes_batch_r, anchors], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) if self.method == 'H': self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) else: self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = losses.focal_loss(labels, rpn_cls_score, anchor_states) if cfgs.USE_IOU_FACTOR: reg_loss = losses.iou_smooth_l1_loss_( target_delta, rpn_box_pred, anchor_states, target_boxes, anchors) else: reg_loss = losses.smooth_l1_loss(target_delta, rpn_box_pred, anchor_states) self.losses_dict['cls_loss'] = cls_loss * cfgs.CLS_WEIGHT self.losses_dict['reg_loss'] = reg_loss * cfgs.REG_WEIGHT with tf.variable_scope('refine_feature_pyramid'): refine_feature_pyramid = {} refine_boxes_list = [] for box_pred, cls_prob, anchor, stride, level in \ zip(rpn_box_pred_list, rpn_cls_prob_list, anchor_list, cfgs.ANCHOR_STRIDE, cfgs.LEVEL): box_pred = tf.reshape(box_pred, [-1, self.num_anchors_per_location, 5]) anchor = tf.reshape(anchor, [ -1, self.num_anchors_per_location, 5 if self.method == 'R' else 4 ]) cls_prob = tf.reshape( cls_prob, [-1, self.num_anchors_per_location, cfgs.CLASS_NUM]) cls_max_prob = tf.reduce_max(cls_prob, axis=-1) box_pred_argmax = tf.cast( tf.reshape(tf.argmax(cls_max_prob, axis=-1), [-1, 1]), tf.int32) indices = tf.cast( tf.cumsum(tf.ones_like(box_pred_argmax), axis=0), tf.int32) - tf.constant(1, tf.int32) indices = tf.concat([indices, box_pred_argmax], axis=-1) box_pred_filter = tf.reshape(tf.gather_nd(box_pred, indices), [-1, 5]) anchor_filter = tf.reshape(tf.gather_nd( anchor, indices), [-1, 5 if self.method == 'R' else 4]) if cfgs.METHOD == 'H': x_c = (anchor_filter[:, 2] + anchor_filter[:, 0]) / 2 y_c = (anchor_filter[:, 3] + anchor_filter[:, 1]) / 2 h = anchor_filter[:, 2] - anchor_filter[:, 0] + 1 w = anchor_filter[:, 3] - anchor_filter[:, 1] + 1 theta = -90 * tf.ones_like(x_c) anchor_filter = tf.transpose( tf.stack([x_c, y_c, w, h, theta])) boxes_filter = bbox_transform.rbbox_transform_inv( boxes=anchor_filter, deltas=box_pred_filter) refine_boxes_list.append(boxes_filter) center_point = boxes_filter[:, :2] / stride refine_feature_pyramid[level] = self.refine_feature_op( points=center_point, feature_map=feature_pyramid[level], name=level) refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list, refine_angle_cls_list = self.refine_net( refine_feature_pyramid, 'refine_net') refine_box_pred = tf.concat(refine_box_pred_list, axis=0) refine_cls_score = tf.concat(refine_cls_score_list, axis=0) refine_cls_prob = tf.concat(refine_cls_prob_list, axis=0) refine_angle_cls = tf.concat(refine_angle_cls_list, axis=0) refine_boxes = tf.concat(refine_boxes_list, axis=0) # 4. postprocess rpn proposals. such as: decode, clip, filter if self.is_training: with tf.variable_scope('build_refine_loss'): refine_labels, refine_target_delta, refine_box_states, refine_target_boxes, refine_target_encode_label = tf.py_func( func=refinebox_target_layer, inp=[ gtboxes_batch_r, gt_encode_label, refine_boxes, cfgs.REFINE_IOU_POSITIVE_THRESHOLD[0], cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[0], gpu_id ], Tout=[ tf.float32, tf.float32, tf.float32, tf.float32, tf.float32 ]) self.add_anchor_img_smry(input_img_batch, refine_boxes, refine_box_states, 1) refine_cls_loss = losses.focal_loss(refine_labels, refine_cls_score, refine_box_states) refine_reg_loss = losses.smooth_l1_loss( refine_target_delta, refine_box_pred, refine_box_states) angle_cls_loss = losses_dcl.angle_cls_period_focal_loss( refine_target_encode_label, refine_angle_cls, refine_box_states, refine_target_boxes, decimal_weight=cfgs.DATASET_NAME.startswith('DOTA')) self.losses_dict[ 'refine_cls_loss'] = refine_cls_loss * cfgs.CLS_WEIGHT self.losses_dict[ 'refine_reg_loss'] = refine_reg_loss * cfgs.REG_WEIGHT self.losses_dict[ 'angle_cls_loss'] = angle_cls_loss * cfgs.ANGLE_WEIGHT with tf.variable_scope('postprocess_detctions'): scores, category, boxes_angle = postprocess_detctions( refine_bbox_pred=refine_box_pred, refine_cls_prob=refine_cls_prob, refine_angle_prob=tf.sigmoid(refine_angle_cls), refine_boxes=refine_boxes, is_training=self.is_training) scores = tf.stop_gradient(scores) category = tf.stop_gradient(category) boxes_angle = tf.stop_gradient(boxes_angle) if self.is_training: return scores, category, boxes_angle, self.losses_dict else: return scores, category, boxes_angle
def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h, gtboxes_batch_r, gt_encode_label, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) gt_encode_label = tf.reshape(gt_encode_label, [-1, self.coding_len]) gt_encode_label = tf.cast(gt_encode_label, tf.float32) # 1. build base network feature_pyramid = self.build_base_network(input_img_batch) # 2. build rpn rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net( feature_pyramid, 'rpn_net') # 3. generate_anchors anchor_list = self.make_anchors(feature_pyramid) rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0) rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0) # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0) anchors = tf.concat(anchor_list, axis=0) if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states, target_boxes = tf.py_func( func=anchor_target_layer, inp=[gtboxes_batch_h, gtboxes_batch_r, anchors], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) if self.method == 'H': self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) else: self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = losses.focal_loss(labels, rpn_cls_score, anchor_states) if cfgs.USE_IOU_FACTOR: reg_loss = losses.iou_smooth_l1_loss_( target_delta, rpn_box_pred, anchor_states, target_boxes, anchors) else: reg_loss = losses.smooth_l1_loss(target_delta, rpn_box_pred, anchor_states) self.losses_dict['cls_loss'] = cls_loss * cfgs.CLS_WEIGHT self.losses_dict['reg_loss'] = reg_loss * cfgs.REG_WEIGHT refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list, refine_angle_cls_list = self.refine_net( feature_pyramid, 'refine_net') refine_box_pred = tf.concat(refine_box_pred_list, axis=0) refine_cls_score = tf.concat(refine_cls_score_list, axis=0) refine_cls_prob = tf.concat(refine_cls_prob_list, axis=0) refine_angle_cls = tf.concat(refine_angle_cls_list, axis=0) if cfgs.METHOD == 'H': x_c = (anchors[:, 2] + anchors[:, 0]) / 2 y_c = (anchors[:, 3] + anchors[:, 1]) / 2 h = anchors[:, 2] - anchors[:, 0] + 1 w = anchors[:, 3] - anchors[:, 1] + 1 theta = -90 * tf.ones_like(x_c) anchors = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) # if cfgs.ANGLE_RANGE == 180: # anchors = tf.py_func(coordinate_present_convert, # inp=[anchors, -1], # Tout=[tf.float32]) # anchors = tf.reshape(anchors, [-1, 5]) refine_boxes = bbox_transform.rbbox_transform_inv(boxes=anchors, deltas=rpn_box_pred) # 4. postprocess rpn proposals. such as: decode, clip, filter if self.is_training: with tf.variable_scope('build_refine_loss'): refine_labels, refine_target_delta, refine_box_states, refine_target_boxes, refine_target_encode_label = tf.py_func( func=refinebox_target_layer, inp=[ gtboxes_batch_r, gt_encode_label, refine_boxes, cfgs.REFINE_IOU_POSITIVE_THRESHOLD[0], cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[0], gpu_id ], Tout=[ tf.float32, tf.float32, tf.float32, tf.float32, tf.float32 ]) self.add_anchor_img_smry(input_img_batch, refine_boxes, refine_box_states, 1) refine_cls_loss = losses.focal_loss(refine_labels, refine_cls_score, refine_box_states) refine_reg_loss = losses.smooth_l1_loss( refine_target_delta, refine_box_pred, refine_box_states) angle_cls_loss = losses_dcl.angle_cls_period_focal_loss( refine_target_encode_label, refine_angle_cls, refine_box_states, refine_target_boxes, decimal_weight=True) self.losses_dict[ 'refine_cls_loss'] = refine_cls_loss * cfgs.CLS_WEIGHT self.losses_dict[ 'refine_reg_loss'] = refine_reg_loss * cfgs.REG_WEIGHT self.losses_dict[ 'angle_cls_loss'] = angle_cls_loss * cfgs.ANGLE_WEIGHT with tf.variable_scope('postprocess_detctions'): scores, category, boxes_angle = postprocess_detctions( refine_bbox_pred=refine_box_pred, refine_cls_prob=refine_cls_prob, refine_angle_prob=tf.sigmoid(refine_angle_cls), refine_boxes=refine_boxes, is_training=self.is_training) scores = tf.stop_gradient(scores) category = tf.stop_gradient(category) boxes_angle = tf.stop_gradient(boxes_angle) if self.is_training: return scores, category, boxes_angle, self.losses_dict else: return scores, category, boxes_angle