def __init__(self, cfgs, is_training): super(DetectionNetworkR3DetDCL, self).__init__(cfgs, is_training) self.anchor_sampler_retinenet = AnchorSamplerR3DetDCL(cfgs) self.refine_anchor_sampler_r3det_dcl = RefineAnchorSamplerR3DetDCL( cfgs) self.losses = LossDCL(self.cfgs) self.coding_len = get_code_len(int(cfgs.ANGLE_RANGE / cfgs.OMEGA), mode=cfgs.ANGLE_MODE)
class DetectionNetworkR3DetDCL(DetectionNetworkBase): def __init__(self, cfgs, is_training): super(DetectionNetworkR3DetDCL, self).__init__(cfgs, is_training) self.anchor_sampler_retinenet = AnchorSamplerRetinaNet(cfgs) self.refine_anchor_sampler_r3det_dcl = RefineAnchorSamplerR3DetDCL(cfgs) self.losses = LossDCL(self.cfgs) self.coding_len = get_code_len(int(cfgs.ANGLE_RANGE / cfgs.OMEGA), mode=cfgs.ANGLE_MODE) def refine_cls_net(self, inputs, scope_list, reuse_flag, level): rpn_conv2d_3x3 = inputs for i in range(self.cfgs.NUM_SUBNET_CONV): rpn_conv2d_3x3 = slim.conv2d(inputs=rpn_conv2d_3x3, num_outputs=self.cfgs.FPN_CHANNEL, kernel_size=[3, 3], stride=1, activation_fn=None if self.cfgs.USE_GN else tf.nn.relu, weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, trainable=self.is_training, scope='{}_{}'.format(scope_list[0], i), reuse=reuse_flag) if self.cfgs.USE_GN: rpn_conv2d_3x3 = tf.contrib.layers.group_norm(rpn_conv2d_3x3) rpn_conv2d_3x3 = tf.nn.relu(rpn_conv2d_3x3) rpn_box_scores = slim.conv2d(rpn_conv2d_3x3, num_outputs=self.cfgs.CLASS_NUM, kernel_size=[3, 3], stride=1, weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.FINAL_CONV_BIAS_INITIALIZER, trainable=self.is_training, scope=scope_list[2], activation_fn=None, reuse=reuse_flag) rpn_box_scores = tf.reshape(rpn_box_scores, [-1, self.cfgs.CLASS_NUM], name='refine_{}_classification_reshape'.format(level)) rpn_box_probs = tf.sigmoid(rpn_box_scores, name='refine_{}_classification_sigmoid'.format(level)) return rpn_box_scores, rpn_box_probs def refine_reg_net(self, inputs, scope_list, reuse_flag, level): rpn_conv2d_3x3 = inputs for i in range(self.cfgs.NUM_SUBNET_CONV): rpn_conv2d_3x3 = slim.conv2d(inputs=rpn_conv2d_3x3, num_outputs=self.cfgs.FPN_CHANNEL, kernel_size=[3, 3], weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=None if self.cfgs.USE_GN else tf.nn.relu, trainable=self.is_training, scope='{}_{}'.format(scope_list[1], i), reuse=reuse_flag) if self.cfgs.USE_GN: rpn_conv2d_3x3 = tf.contrib.layers.group_norm(rpn_conv2d_3x3) rpn_conv2d_3x3 = tf.nn.relu(rpn_conv2d_3x3) rpn_delta_boxes = slim.conv2d(rpn_conv2d_3x3, num_outputs=4, kernel_size=[3, 3], stride=1, weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, trainable=self.is_training, scope=scope_list[3], activation_fn=None, reuse=reuse_flag) rpn_angle_cls = slim.conv2d(rpn_conv2d_3x3, num_outputs=self.coding_len, kernel_size=[3, 3], stride=1, weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, trainable=self.is_training, scope=scope_list[4], activation_fn=None, reuse=reuse_flag) rpn_delta_boxes = tf.reshape(rpn_delta_boxes, [-1, 4], name='refine_{}_regression_reshape'.format(level)) rpn_angle_cls = tf.reshape(rpn_angle_cls, [-1, self.coding_len], name='rpn_{}_angle_cls_reshape'.format(level)) return rpn_delta_boxes, rpn_angle_cls def refine_net(self, feature_pyramid, name): refine_delta_boxes_list = [] refine_scores_list = [] refine_probs_list = [] refine_angle_cls_list = [] with tf.variable_scope(name): with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(self.cfgs.WEIGHT_DECAY)): for level in self.cfgs.LEVEL: if self.cfgs.SHARE_NET: reuse_flag = None if level == 'P3' else True scope_list = ['conv2d_3x3_cls', 'conv2d_3x3_reg', 'refine_classification', 'refine_regression', 'refine_angle_cls'] else: reuse_flag = None scope_list = ['conv2d_3x3_cls_' + level, 'conv2d_3x3_reg_' + level, 'refine_classification_' + level, 'refine_regression_' + level, 'refine_angle_cls_' + level] refine_box_scores, refine_box_probs = self.refine_cls_net(feature_pyramid[level], scope_list, reuse_flag, level) refine_delta_boxes, refine_angle_cls = self.refine_reg_net(feature_pyramid[level], scope_list, reuse_flag, level) refine_scores_list.append(refine_box_scores) refine_probs_list.append(refine_box_probs) refine_delta_boxes_list.append(refine_delta_boxes) refine_angle_cls_list.append(refine_angle_cls) return refine_delta_boxes_list, refine_scores_list, refine_probs_list, refine_angle_cls_list def refine_feature_op(self, points, feature_map): h, w = tf.cast(tf.shape(feature_map)[1], tf.int32), tf.cast(tf.shape(feature_map)[2], tf.int32) xmin = tf.maximum(0.0, tf.floor(points[:, 0])) xmin = tf.minimum(tf.cast(w - 1, tf.float32), tf.ceil(xmin)) ymin = tf.maximum(0.0, tf.floor(points[:, 1])) ymin = tf.minimum(tf.cast(h - 1, tf.float32), tf.ceil(ymin)) xmax = tf.minimum(tf.cast(w - 1, tf.float32), tf.ceil(points[:, 0])) xmax = tf.maximum(0.0, tf.floor(xmax)) ymax = tf.minimum(tf.cast(h - 1, tf.float32), tf.ceil(points[:, 1])) ymax = tf.maximum(0.0, tf.floor(ymax)) left_top = tf.cast(tf.transpose(tf.stack([ymin, xmin], axis=0)), tf.int32) right_bottom = tf.cast(tf.transpose(tf.stack([ymax, xmax], axis=0)), tf.int32) left_bottom = tf.cast(tf.transpose(tf.stack([ymax, xmin], axis=0)), tf.int32) right_top = tf.cast(tf.transpose(tf.stack([ymin, xmax], axis=0)), tf.int32) feature = feature_map left_top_feature = tf.gather_nd(tf.squeeze(feature), left_top) right_bottom_feature = tf.gather_nd(tf.squeeze(feature), right_bottom) left_bottom_feature = tf.gather_nd(tf.squeeze(feature), left_bottom) right_top_feature = tf.gather_nd(tf.squeeze(feature), right_top) refine_feature = right_bottom_feature * tf.tile( tf.reshape((tf.abs((points[:, 0] - xmin) * (points[:, 1] - ymin))), [-1, 1]), [1, self.cfgs.FPN_CHANNEL]) \ + left_top_feature * tf.tile( tf.reshape((tf.abs((xmax - points[:, 0]) * (ymax - points[:, 1]))), [-1, 1]), [1, self.cfgs.FPN_CHANNEL]) \ + right_top_feature * tf.tile( tf.reshape((tf.abs((points[:, 0] - xmin) * (ymax - points[:, 1]))), [-1, 1]), [1, self.cfgs.FPN_CHANNEL]) \ + left_bottom_feature * tf.tile( tf.reshape((tf.abs((xmax - points[:, 0]) * (points[:, 1] - ymin))), [-1, 1]), [1, self.cfgs.FPN_CHANNEL]) refine_feature = tf.reshape(refine_feature, [1, tf.cast(h, tf.int32), tf.cast(w, tf.int32), self.cfgs.FPN_CHANNEL]) # refine_feature = tf.reshape(refine_feature, [1, tf.cast(feature_size[1], tf.int32), # tf.cast(feature_size[0], tf.int32), 256]) return refine_feature + feature def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None, gt_encode_label=None, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) gt_encode_label = tf.reshape(gt_encode_label, [-1, self.coding_len]) gt_encode_label = tf.cast(gt_encode_label, tf.float32) if self.cfgs.USE_GN: input_img_batch = tf.reshape(input_img_batch, [1, self.cfgs.IMG_SHORT_SIDE_LEN, self.cfgs.IMG_MAX_LENGTH, 3]) # 1. build backbone feature_pyramid = self.build_backbone(input_img_batch) # 2. build rpn rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net(feature_pyramid, 'rpn_net') rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0) rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0) # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0) # 3. generate anchors anchor_list = self.make_anchors(feature_pyramid) anchors = tf.concat(anchor_list, axis=0) # 4. build loss if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states, target_boxes = tf.py_func(func=self.anchor_sampler_retinenet.anchor_target_layer, inp=[gtboxes_batch_h, gtboxes_batch_r, anchors, gpu_id], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) if self.method == 'H': self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) else: self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = self.losses.focal_loss(labels, rpn_cls_score, anchor_states) if self.cfgs.USE_IOU_FACTOR: reg_loss = self.losses.iou_smooth_l1_loss_exp(target_delta, rpn_box_pred, anchor_states, target_boxes, anchors, alpha=self.cfgs.ALPHA, beta=self.cfgs.BETA) else: reg_loss = self.losses.smooth_l1_loss(target_delta, rpn_box_pred, anchor_states) self.losses_dict['cls_loss'] = cls_loss * self.cfgs.CLS_WEIGHT self.losses_dict['reg_loss'] = reg_loss * self.cfgs.REG_WEIGHT with tf.variable_scope('refine_feature_pyramid'): refine_feature_pyramid = {} refine_boxes_list = [] for box_pred, cls_prob, anchor, stride, level in \ zip(rpn_box_pred_list, rpn_cls_prob_list, anchor_list, self.cfgs.ANCHOR_STRIDE, self.cfgs.LEVEL): box_pred = tf.reshape(box_pred, [-1, self.num_anchors_per_location, 5]) anchor = tf.reshape(anchor, [-1, self.num_anchors_per_location, 5 if self.method == 'R' else 4]) cls_prob = tf.reshape(cls_prob, [-1, self.num_anchors_per_location, self.cfgs.CLASS_NUM]) cls_max_prob = tf.reduce_max(cls_prob, axis=-1) box_pred_argmax = tf.cast(tf.reshape(tf.argmax(cls_max_prob, axis=-1), [-1, 1]), tf.int32) indices = tf.cast(tf.cumsum(tf.ones_like(box_pred_argmax), axis=0), tf.int32) - tf.constant(1, tf.int32) indices = tf.concat([indices, box_pred_argmax], axis=-1) box_pred_filter = tf.reshape(tf.gather_nd(box_pred, indices), [-1, 5]) anchor_filter = tf.reshape(tf.gather_nd(anchor, indices), [-1, 5 if self.method == 'R' else 4]) if self.cfgs.METHOD == 'H': x_c = (anchor_filter[:, 2] + anchor_filter[:, 0]) / 2 y_c = (anchor_filter[:, 3] + anchor_filter[:, 1]) / 2 h = anchor_filter[:, 2] - anchor_filter[:, 0] + 1 w = anchor_filter[:, 3] - anchor_filter[:, 1] + 1 theta = -90 * tf.ones_like(x_c) anchor_filter = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) boxes_filter = bbox_transform.rbbox_transform_inv(boxes=anchor_filter, deltas=box_pred_filter) refine_boxes_list.append(boxes_filter) center_point = boxes_filter[:, :2] / stride refine_feature_pyramid[level] = self.refine_feature_op(points=center_point, feature_map=feature_pyramid[level]) refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list, refine_angle_cls_list = self.refine_net(refine_feature_pyramid, 'refine_net') refine_box_pred = tf.concat(refine_box_pred_list, axis=0) refine_cls_score = tf.concat(refine_cls_score_list, axis=0) refine_cls_prob = tf.concat(refine_cls_prob_list, axis=0) refine_angle_cls = tf.concat(refine_angle_cls_list, axis=0) refine_boxes = tf.concat(refine_boxes_list, axis=0) # 4. postprocess rpn proposals. such as: decode, clip, filter if self.is_training: with tf.variable_scope('build_refine_loss'): refine_labels, refine_target_delta, refine_box_states, refine_target_boxes, refine_target_encode_label = tf.py_func( func=self.refine_anchor_sampler_r3det_dcl.refine_anchor_target_layer, inp=[gtboxes_batch_r, gt_encode_label, refine_boxes, self.cfgs.REFINE_IOU_POSITIVE_THRESHOLD[0], self.cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[0], gpu_id], Tout=[tf.float32, tf.float32, tf.float32, tf.float32, tf.float32]) self.add_anchor_img_smry(input_img_batch, refine_boxes, refine_box_states, 1) refine_cls_loss = self.losses.focal_loss(refine_labels, refine_cls_score, refine_box_states) refine_reg_loss = self.losses.smooth_l1_loss(refine_target_delta, refine_box_pred, refine_box_states) angle_cls_loss = self.losses.angle_cls_period_focal_loss(refine_target_encode_label, refine_angle_cls, refine_box_states, refine_target_boxes, decimal_weight=self.cfgs.DATASET_NAME.startswith('DOTA')) self.losses_dict['refine_cls_loss'] = refine_cls_loss * self.cfgs.CLS_WEIGHT self.losses_dict['refine_reg_loss'] = refine_reg_loss * self.cfgs.REG_WEIGHT self.losses_dict['angle_cls_loss'] = angle_cls_loss * self.cfgs.ANGLE_WEIGHT # 5. postprocess with tf.variable_scope('postprocess_detctions'): scores, category, boxes_angle = self.postprocess_detctions(refine_bbox_pred=refine_box_pred, refine_cls_prob=refine_cls_prob, refine_angle_prob=tf.sigmoid(refine_angle_cls), refine_boxes=refine_boxes, gpu_id=gpu_id) scores = tf.stop_gradient(scores) category = tf.stop_gradient(category) boxes_angle = tf.stop_gradient(boxes_angle) if self.is_training: return boxes_angle, scores, category, self.losses_dict else: return boxes_angle, scores, category def postprocess_detctions(self, refine_bbox_pred, refine_cls_prob, refine_angle_prob, refine_boxes, gpu_id): # return_boxes_pred = [] return_boxes_pred_angle = [] return_scores = [] return_labels = [] for j in range(0, self.cfgs.CLASS_NUM): scores = refine_cls_prob[:, j] if self.is_training: indices = tf.reshape(tf.where(tf.greater(scores, self.cfgs.VIS_SCORE)), [-1, ]) else: indices = tf.reshape(tf.where(tf.greater(scores, self.cfgs.FILTERED_SCORE)), [-1, ]) refine_boxes_ = tf.gather(refine_boxes, indices) refine_bbox_pred_ = tf.gather(refine_bbox_pred, indices) scores = tf.gather(scores, indices) refine_angle_prob_ = tf.gather(refine_angle_prob, indices) angle_cls = tf.py_func(angle_label_decode, inp=[refine_angle_prob_, self.cfgs.ANGLE_RANGE, self.cfgs.OMEGA, self.cfgs.ANGLE_MODE], Tout=[tf.float32]) angle_cls = tf.reshape(angle_cls, [-1, ]) * -1 if self.cfgs.ANGLE_RANGE == 180: refine_boxes_ = tf.py_func(coordinate_present_convert, inp=[refine_boxes_, -1], Tout=[tf.float32]) refine_boxes_ = tf.reshape(refine_boxes_, [-1, 5]) refine_boxes_pred = bbox_transform.rbbox_transform_inv_dcl(boxes=refine_boxes_, deltas=refine_bbox_pred_) refine_boxes_pred = tf.reshape(refine_boxes_pred, [-1, 4]) x, y, w, h = tf.unstack(refine_boxes_pred, axis=1) refine_boxes_pred_angle = tf.transpose(tf.stack([x, y, w, h, angle_cls])) if self.cfgs.ANGLE_RANGE == 180: # _, _, _, _, theta = tf.unstack(boxes_pred, axis=1) # indx = tf.reshape(tf.where(tf.logical_and(tf.less(theta, 0), tf.greater_equal(theta, -180))), [-1, ]) # boxes_pred = tf.gather(boxes_pred, indx) # scores = tf.gather(scores, indx) # boxes_pred = tf.py_func(coordinate_present_convert, # inp=[boxes_pred, 1], # Tout=[tf.float32]) # boxes_pred = tf.reshape(boxes_pred, [-1, 5]) refine_boxes_pred_angle = tf.py_func(coordinate_present_convert, inp=[refine_boxes_pred_angle, 1], Tout=[tf.float32]) refine_boxes_pred_angle = tf.reshape(refine_boxes_pred_angle, [-1, 5]) nms_indices = nms_rotate.nms_rotate(decode_boxes=refine_boxes_pred_angle, scores=scores, iou_threshold=self.cfgs.NMS_IOU_THRESHOLD, max_output_size=100 if self.is_training else 1000, use_gpu=True, gpu_id=gpu_id) # tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, nms_indices), [-1, 5]) tmp_refine_boxes_pred_angle = tf.reshape(tf.gather(refine_boxes_pred_angle, nms_indices), [-1, 5]) tmp_scores = tf.reshape(tf.gather(scores, nms_indices), [-1, ]) # return_boxes_pred.append(tmp_boxes_pred) return_boxes_pred_angle.append(tmp_refine_boxes_pred_angle) return_scores.append(tmp_scores) return_labels.append(tf.ones_like(tmp_scores) * (j + 1)) # return_boxes_pred = tf.concat(return_boxes_pred, axis=0) return_boxes_pred_angle = tf.concat(return_boxes_pred_angle, axis=0) return_scores = tf.concat(return_scores, axis=0) return_labels = tf.concat(return_labels, axis=0) return return_scores, return_labels, return_boxes_pred_angle
def __init__(self, cfgs, is_training): super(DetectionNetworkDCL, self).__init__(cfgs, is_training) self.anchor_sampler_dcl = AnchorSamplerDCL(cfgs) self.losses = LossDCL(self.cfgs) self.coding_len = get_code_len(int(cfgs.ANGLE_RANGE / cfgs.OMEGA), mode=cfgs.ANGLE_MODE)
class DetectionNetworkDCL(DetectionNetworkBase): def __init__(self, cfgs, is_training): super(DetectionNetworkDCL, self).__init__(cfgs, is_training) self.anchor_sampler_dcl = AnchorSamplerDCL(cfgs) self.losses = LossDCL(self.cfgs) self.coding_len = get_code_len(int(cfgs.ANGLE_RANGE / cfgs.OMEGA), mode=cfgs.ANGLE_MODE) def rpn_reg_net(self, inputs, scope_list, reuse_flag, level): rpn_conv2d_3x3 = inputs for i in range(self.cfgs.NUM_SUBNET_CONV): rpn_conv2d_3x3 = slim.conv2d( inputs=rpn_conv2d_3x3, num_outputs=self.cfgs.FPN_CHANNEL, kernel_size=[3, 3], weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=None if self.cfgs.USE_GN else tf.nn.relu, scope='{}_{}'.format(scope_list[1], i), trainable=self.is_training, reuse=reuse_flag) if self.cfgs.USE_GN: rpn_conv2d_3x3 = tf.contrib.layers.group_norm(rpn_conv2d_3x3) rpn_conv2d_3x3 = tf.nn.relu(rpn_conv2d_3x3) rpn_delta_boxes = slim.conv2d( rpn_conv2d_3x3, num_outputs=4 * self.num_anchors_per_location, kernel_size=[3, 3], stride=1, weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, scope=scope_list[3], activation_fn=None, trainable=self.is_training, reuse=reuse_flag) rpn_angle_cls = slim.conv2d( rpn_conv2d_3x3, num_outputs=self.coding_len * self.num_anchors_per_location, kernel_size=[3, 3], stride=1, weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, scope=scope_list[4], activation_fn=None, trainable=self.is_training, reuse=reuse_flag) rpn_delta_boxes = tf.reshape( rpn_delta_boxes, [-1, 4], name='rpn_{}_regression_reshape'.format(level)) rpn_angle_cls = tf.reshape( rpn_angle_cls, [-1, self.coding_len], name='rpn_{}_angle_cls_reshape'.format(level)) return rpn_delta_boxes, rpn_angle_cls def rpn_net(self, feature_pyramid, name): rpn_delta_boxes_list = [] rpn_scores_list = [] rpn_probs_list = [] rpn_angle_cls_list = [] with tf.variable_scope(name): with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer( self.cfgs.WEIGHT_DECAY)): for level in self.cfgs.LEVEL: if self.cfgs.SHARE_NET: reuse_flag = None if level == self.cfgs.LEVEL[ 0] else True scope_list = [ 'conv2d_3x3_cls', 'conv2d_3x3_reg', 'rpn_classification', 'rpn_regression', 'rpn_angle_cls' ] else: reuse_flag = None scope_list = [ 'conv2d_3x3_cls_' + level, 'conv2d_3x3_reg_' + level, 'rpn_classification_' + level, 'rpn_regression_' + level, 'rpn_angle_cls_' + level ] rpn_box_scores, rpn_box_probs = self.rpn_cls_net( feature_pyramid[level], scope_list, reuse_flag, level) rpn_delta_boxes, rpn_angle_cls = self.rpn_reg_net( feature_pyramid[level], scope_list, reuse_flag, level) rpn_scores_list.append(rpn_box_scores) rpn_probs_list.append(rpn_box_probs) rpn_delta_boxes_list.append(rpn_delta_boxes) rpn_angle_cls_list.append(rpn_angle_cls) return rpn_delta_boxes_list, rpn_scores_list, rpn_probs_list, rpn_angle_cls_list def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None, gt_encode_label=None, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) gt_encode_label = tf.reshape(gt_encode_label, [-1, self.coding_len]) gt_encode_label = tf.cast(gt_encode_label, tf.float32) if self.cfgs.USE_GN: input_img_batch = tf.reshape( input_img_batch, [1, self.cfgs.IMG_SHORT_SIDE_LEN, self.cfgs.IMG_MAX_LENGTH, 3]) # 1. build backbone feature_pyramid = self.build_backbone(input_img_batch) # 2. build rpn rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list, rpn_angle_cls_list = self.rpn_net( feature_pyramid, 'rpn_net') rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0) rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0) rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0) rpn_angle_cls = tf.concat(rpn_angle_cls_list, axis=0) # 3. generate anchors anchor_list = self.make_anchors(feature_pyramid) anchors = tf.concat(anchor_list, axis=0) # 4. build loss if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states, target_boxes, target_encode_label = tf.py_func( func=self.anchor_sampler_dcl.anchor_target_layer, inp=[ gtboxes_batch_h, gtboxes_batch_r, gt_encode_label, anchors, gpu_id ], Tout=[ tf.float32, tf.float32, tf.float32, tf.float32, tf.float32 ]) if self.method == 'H': self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) else: self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = self.losses.focal_loss(labels, rpn_cls_score, anchor_states) if self.cfgs.REG_LOSS_MODE == 0: reg_loss = self.losses.iou_smooth_l1_loss_log( target_delta, rpn_box_pred, anchor_states, target_boxes, anchors) elif self.cfgs.REG_LOSS_MODE == 1: reg_loss = self.losses.iou_smooth_l1_loss_exp( target_delta, rpn_box_pred, anchor_states, target_boxes, anchors, alpha=self.cfgs.ALPHA, beta=self.cfgs.BETA) else: reg_loss = self.losses.smooth_l1_loss( target_delta, rpn_box_pred, anchor_states) angle_cls_loss = self.losses.angle_cls_period_focal_loss( target_encode_label, rpn_angle_cls, anchor_states, target_boxes, decimal_weight=self.cfgs.DATASET_NAME.startswith('DOTA')) self.losses_dict['cls_loss'] = cls_loss * self.cfgs.CLS_WEIGHT self.losses_dict['reg_loss'] = reg_loss * self.cfgs.REG_WEIGHT self.losses_dict[ 'angle_cls_loss'] = angle_cls_loss * self.cfgs.ANGLE_WEIGHT # 5. postprocess with tf.variable_scope('postprocess_detctions'): scores, category, boxes_angle = self.postprocess_detctions( rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, rpn_angle_prob=tf.sigmoid(rpn_angle_cls), anchors=anchors, gpu_id=gpu_id) scores = tf.stop_gradient(scores) category = tf.stop_gradient(category) boxes_angle = tf.stop_gradient(boxes_angle) if self.is_training: return boxes_angle, scores, category, self.losses_dict else: return boxes_angle, scores, category def postprocess_detctions(self, rpn_bbox_pred, rpn_cls_prob, rpn_angle_prob, anchors, gpu_id): # return_boxes_pred = [] return_boxes_pred_angle = [] return_scores = [] return_labels = [] for j in range(0, self.cfgs.CLASS_NUM): scores = rpn_cls_prob[:, j] if self.is_training: indices = tf.reshape( tf.where(tf.greater(scores, self.cfgs.VIS_SCORE)), [ -1, ]) else: indices = tf.reshape( tf.where(tf.greater(scores, self.cfgs.FILTERED_SCORE)), [ -1, ]) anchors_ = tf.gather(anchors, indices) rpn_bbox_pred_ = tf.gather(rpn_bbox_pred, indices) scores = tf.gather(scores, indices) rpn_angle_prob_ = tf.gather(rpn_angle_prob, indices) angle_cls = tf.py_func(angle_label_decode, inp=[ rpn_angle_prob_, self.cfgs.ANGLE_RANGE, self.cfgs.OMEGA, self.cfgs.ANGLE_MODE ], Tout=[tf.float32]) angle_cls = tf.reshape(angle_cls, [ -1, ]) * -1 if self.cfgs.METHOD == 'H': x_c = (anchors_[:, 2] + anchors_[:, 0]) / 2 y_c = (anchors_[:, 3] + anchors_[:, 1]) / 2 h = anchors_[:, 2] - anchors_[:, 0] + 1 w = anchors_[:, 3] - anchors_[:, 1] + 1 theta = -90 * tf.ones_like(x_c) anchors_ = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) if self.cfgs.ANGLE_RANGE == 180: anchors_ = tf.py_func(coordinate_present_convert, inp=[anchors_, -1], Tout=[tf.float32]) anchors_ = tf.reshape(anchors_, [-1, 5]) boxes_pred = bbox_transform.rbbox_transform_inv_dcl( boxes=anchors_, deltas=rpn_bbox_pred_) boxes_pred = tf.reshape(boxes_pred, [-1, 4]) x, y, w, h = tf.unstack(boxes_pred, axis=1) boxes_pred_angle = tf.transpose(tf.stack([x, y, w, h, angle_cls])) if self.cfgs.ANGLE_RANGE == 180: # _, _, _, _, theta = tf.unstack(boxes_pred, axis=1) # indx = tf.reshape(tf.where(tf.logical_and(tf.less(theta, 0), tf.greater_equal(theta, -180))), [-1, ]) # boxes_pred = tf.gather(boxes_pred, indx) # scores = tf.gather(scores, indx) # boxes_pred = tf.py_func(coordinate_present_convert, # inp=[boxes_pred, 1], # Tout=[tf.float32]) # boxes_pred = tf.reshape(boxes_pred, [-1, 5]) boxes_pred_angle = tf.py_func(coordinate_present_convert, inp=[boxes_pred_angle, 1], Tout=[tf.float32]) boxes_pred_angle = tf.reshape(boxes_pred_angle, [-1, 5]) max_output_size = 4000 if 'DOTA' in self.cfgs.NET_NAME else 200 nms_indices = nms_rotate.nms_rotate( decode_boxes=boxes_pred_angle, scores=scores, iou_threshold=self.cfgs.NMS_IOU_THRESHOLD, max_output_size=100 if self.is_training else max_output_size, use_gpu=True, gpu_id=gpu_id) # tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, nms_indices), [-1, 5]) tmp_boxes_pred_angle = tf.reshape( tf.gather(boxes_pred_angle, nms_indices), [-1, 5]) tmp_scores = tf.reshape(tf.gather(scores, nms_indices), [ -1, ]) # return_boxes_pred.append(tmp_boxes_pred) return_boxes_pred_angle.append(tmp_boxes_pred_angle) return_scores.append(tmp_scores) return_labels.append(tf.ones_like(tmp_scores) * (j + 1)) # return_boxes_pred = tf.concat(return_boxes_pred, axis=0) return_boxes_pred_angle = tf.concat(return_boxes_pred_angle, axis=0) return_scores = tf.concat(return_scores, axis=0) return_labels = tf.concat(return_labels, axis=0) return return_scores, return_labels, return_boxes_pred_angle