def __init__(self, cfgs, is_training): super(DetectionNetworkR2CNN, self).__init__(cfgs, is_training) self.anchor_sampler_r2cnn = AnchorSamplerR2CNN(cfgs) self.proposal_sampler_r2cnn = ProposalSamplerR2CNN(cfgs) self.losses = Loss(cfgs) self.roi_extractor = RoIExtractor(cfgs) self.box_head = BoxHead(cfgs)
def __init__(self, cfgs, is_training): super(DetectionNetworkRefineRetinaNet, self).__init__(cfgs, is_training) self.anchor_sampler_retinenet = AnchorSamplerRetinaNet(cfgs) self.refine_anchor_sampler_r3det = RefineAnchorSamplerR3Det(cfgs) self.losses = Loss(self.cfgs)
class DetectionNetworkRefineRetinaNet(DetectionNetworkBase): def __init__(self, cfgs, is_training): super(DetectionNetworkRefineRetinaNet, self).__init__(cfgs, is_training) self.anchor_sampler_retinenet = AnchorSamplerRetinaNet(cfgs) self.refine_anchor_sampler_r3det = RefineAnchorSamplerR3Det(cfgs) self.losses = Loss(self.cfgs) def refine_cls_net(self, inputs, scope_list, reuse_flag, level): rpn_conv2d_3x3 = inputs for i in range(self.cfgs.NUM_SUBNET_CONV): rpn_conv2d_3x3 = slim.conv2d( inputs=rpn_conv2d_3x3, num_outputs=self.cfgs.FPN_CHANNEL, kernel_size=[3, 3], stride=1, activation_fn=tf.nn.relu, weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, trainable=self.is_training, scope='{}_{}'.format(scope_list[0], i), reuse=reuse_flag) rpn_box_scores = slim.conv2d( rpn_conv2d_3x3, num_outputs=self.cfgs.CLASS_NUM * self.num_anchors_per_location, kernel_size=[3, 3], stride=1, weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.FINAL_CONV_BIAS_INITIALIZER, trainable=self.is_training, scope=scope_list[2], activation_fn=None, reuse=reuse_flag) rpn_box_scores = tf.reshape( rpn_box_scores, [-1, self.cfgs.CLASS_NUM], name='refine_{}_classification_reshape'.format(level)) rpn_box_probs = tf.sigmoid( rpn_box_scores, name='refine_{}_classification_sigmoid'.format(level)) return rpn_box_scores, rpn_box_probs def refine_reg_net(self, inputs, scope_list, reuse_flag, level): rpn_conv2d_3x3 = inputs for i in range(self.cfgs.NUM_SUBNET_CONV): rpn_conv2d_3x3 = slim.conv2d( inputs=rpn_conv2d_3x3, num_outputs=self.cfgs.FPN_CHANNEL, kernel_size=[3, 3], weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=tf.nn.relu, trainable=self.is_training, scope='{}_{}'.format(scope_list[1], i), reuse=reuse_flag) rpn_delta_boxes = slim.conv2d( rpn_conv2d_3x3, num_outputs=5 * self.num_anchors_per_location, kernel_size=[3, 3], stride=1, weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, trainable=self.is_training, scope=scope_list[3], activation_fn=None, reuse=reuse_flag) rpn_delta_boxes = tf.reshape( rpn_delta_boxes, [-1, 5], name='refine_{}_regression_reshape'.format(level)) return rpn_delta_boxes def refine_net(self, feature_pyramid, name): refine_delta_boxes_list = [] refine_scores_list = [] refine_probs_list = [] with tf.variable_scope(name): with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer( self.cfgs.WEIGHT_DECAY)): for level in self.cfgs.LEVEL: if self.cfgs.SHARE_NET: reuse_flag = None if level == self.cfgs.LEVEL[ 0] else True scope_list = [ 'conv2d_3x3_cls', 'conv2d_3x3_reg', 'refine_classification', 'refine_regression' ] else: reuse_flag = None scope_list = [ 'conv2d_3x3_cls_' + level, 'conv2d_3x3_reg_' + level, 'refine_classification_' + level, 'refine_regression_' + level ] refine_box_scores, refine_box_probs = self.refine_cls_net( feature_pyramid[level], scope_list, reuse_flag, level) refine_delta_boxes = self.refine_reg_net( feature_pyramid[level], scope_list, reuse_flag, level) refine_scores_list.append(refine_box_scores) refine_probs_list.append(refine_box_probs) refine_delta_boxes_list.append(refine_delta_boxes) return refine_delta_boxes_list, refine_scores_list, refine_probs_list def refine_stage(self, input_img_batch, gtboxes_batch_r, box_pred_list, cls_prob_list, proposal_list, feature_pyramid, gpu_id, pos_threshold, neg_threshold, stage): with tf.variable_scope('refine_feature_pyramid{}'.format(stage)): refine_boxes_list = [] for box_pred, cls_prob, proposal, stride, level in \ zip(box_pred_list, cls_prob_list, proposal_list, self.cfgs.ANCHOR_STRIDE, self.cfgs.LEVEL): if stage == '' and self.cfgs.METHOD == 'H': x_c = (proposal[:, 2] + proposal[:, 0]) / 2 y_c = (proposal[:, 3] + proposal[:, 1]) / 2 h = proposal[:, 2] - proposal[:, 0] + 1 w = proposal[:, 3] - proposal[:, 1] + 1 theta = -90 * tf.ones_like(x_c) proposal = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) bboxes = bbox_transform.rbbox_transform_inv(boxes=proposal, deltas=box_pred) refine_boxes_list.append(bboxes) refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list = self.refine_net( feature_pyramid, 'refine_net{}'.format(stage)) refine_box_pred = tf.concat(refine_box_pred_list, axis=0) refine_cls_score = tf.concat(refine_cls_score_list, axis=0) # refine_cls_prob = tf.concat(refine_cls_prob_list, axis=0) refine_boxes = tf.concat(refine_boxes_list, axis=0) if self.is_training: with tf.variable_scope('build_refine_loss{}'.format(stage)): refine_labels, refine_target_delta, refine_box_states, refine_target_boxes = tf.py_func( func=self.refine_anchor_sampler_r3det. refine_anchor_target_layer, inp=[ gtboxes_batch_r, refine_boxes, pos_threshold, neg_threshold, gpu_id ], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) self.add_anchor_img_smry(input_img_batch, refine_boxes, refine_box_states, 1) refine_cls_loss = self.losses.focal_loss( refine_labels, refine_cls_score, refine_box_states) if self.cfgs.USE_IOU_FACTOR: refine_reg_loss = self.losses.iou_smooth_l1_loss_exp( refine_target_delta, refine_box_pred, refine_box_states, refine_target_boxes, refine_boxes, is_refine=True) else: refine_reg_loss = self.losses.smooth_l1_loss( refine_target_delta, refine_box_pred, refine_box_states) self.losses_dict['refine_cls_loss{}'.format( stage)] = refine_cls_loss * self.cfgs.CLS_WEIGHT self.losses_dict['refine_reg_loss{}'.format( stage)] = refine_reg_loss * self.cfgs.REG_WEIGHT return refine_box_pred_list, refine_cls_prob_list, refine_boxes_list def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) # 1. build backbone feature_pyramid = self.build_backbone(input_img_batch) # 2. build rpn rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net( feature_pyramid, 'rpn_net') rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0) rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0) # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0) # 3. generate anchors anchor_list = self.make_anchors(feature_pyramid) anchors = tf.concat(anchor_list, axis=0) # 4. build loss if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states, target_boxes = tf.py_func( func=self.anchor_sampler_retinenet.anchor_target_layer, inp=[gtboxes_batch_h, gtboxes_batch_r, anchors, gpu_id], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) if self.method == 'H': self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) else: self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = self.losses.focal_loss(labels, rpn_cls_score, anchor_states) if self.cfgs.USE_IOU_FACTOR: reg_loss = self.losses.iou_smooth_l1_loss_exp( target_delta, rpn_box_pred, anchor_states, target_boxes, anchors, alpha=self.cfgs.ALPHA, beta=self.cfgs.BETA) else: reg_loss = self.losses.smooth_l1_loss( target_delta, rpn_box_pred, anchor_states) self.losses_dict['cls_loss'] = cls_loss * self.cfgs.CLS_WEIGHT self.losses_dict['reg_loss'] = reg_loss * self.cfgs.REG_WEIGHT box_pred_list, cls_prob_list, proposal_list = rpn_box_pred_list, rpn_cls_prob_list, anchor_list all_box_pred_list, all_cls_prob_list, all_proposal_list = [], [], [] for i in range(self.cfgs.NUM_REFINE_STAGE): box_pred_list, cls_prob_list, proposal_list = self.refine_stage( input_img_batch, gtboxes_batch_r, box_pred_list, cls_prob_list, proposal_list, feature_pyramid, gpu_id, pos_threshold=self.cfgs.REFINE_IOU_POSITIVE_THRESHOLD[i], neg_threshold=self.cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[i], stage='' if i == 0 else '_stage{}'.format(i + 2)) if not self.is_training: all_box_pred_list.extend(box_pred_list) all_cls_prob_list.extend(cls_prob_list) all_proposal_list.extend(proposal_list) else: all_box_pred_list, all_cls_prob_list, all_proposal_list = box_pred_list, cls_prob_list, proposal_list # 5. postprocess with tf.variable_scope('postprocess_detctions'): box_pred = tf.concat(all_box_pred_list, axis=0) cls_prob = tf.concat(all_cls_prob_list, axis=0) proposal = tf.concat(all_proposal_list, axis=0) boxes, scores, category = self.postprocess_detctions( refine_bbox_pred=box_pred, refine_cls_prob=cls_prob, anchors=proposal) boxes = tf.stop_gradient(boxes) scores = tf.stop_gradient(scores) category = tf.stop_gradient(category) if self.is_training: return boxes, scores, category, self.losses_dict else: return boxes, scores, category def postprocess_detctions(self, refine_bbox_pred, refine_cls_prob, anchors): def filter_detections(boxes, scores): """ :param boxes: [-1, 4] :param scores: [-1, ] :param labels: [-1, ] :return: """ if self.is_training: indices = tf.reshape( tf.where(tf.greater(scores, self.cfgs.VIS_SCORE)), [ -1, ]) else: indices = tf.reshape( tf.where(tf.greater(scores, self.cfgs.FILTERED_SCORE)), [ -1, ]) if self.cfgs.NMS: filtered_boxes = tf.gather(boxes, indices) filtered_scores = tf.gather(scores, indices) # perform NMS nms_indices = nms_rotate.nms_rotate( decode_boxes=filtered_boxes, scores=filtered_scores, iou_threshold=self.cfgs.NMS_IOU_THRESHOLD, max_output_size=100 if self.is_training else 1000, use_gpu=False) # filter indices based on NMS indices = tf.gather(indices, nms_indices) # add indices to list of all indices return indices boxes_pred = bbox_transform.rbbox_transform_inv( boxes=anchors, deltas=refine_bbox_pred, scale_factors=self.cfgs.ANCHOR_SCALE_FACTORS) return_boxes_pred = [] return_scores = [] return_labels = [] for j in range(0, self.cfgs.CLASS_NUM): indices = filter_detections(boxes_pred, refine_cls_prob[:, j]) tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, indices), [-1, 5]) tmp_scores = tf.reshape(tf.gather(refine_cls_prob[:, j], indices), [ -1, ]) return_boxes_pred.append(tmp_boxes_pred) return_scores.append(tmp_scores) return_labels.append(tf.ones_like(tmp_scores) * (j + 1)) return_boxes_pred = tf.concat(return_boxes_pred, axis=0) return_scores = tf.concat(return_scores, axis=0) return_labels = tf.concat(return_labels, axis=0) return return_boxes_pred, return_scores, return_labels
class DetectionNetworkR3Det(DetectionNetworkBase): def __init__(self, cfgs, is_training): super(DetectionNetworkR3Det, self).__init__(cfgs, is_training) self.anchor_sampler_retinenet = AnchorSamplerRetinaNet(cfgs) self.refine_anchor_sampler_r3det = RefineAnchorSamplerR3Det(cfgs) self.losses = Loss(self.cfgs) def refine_cls_net(self, inputs, scope_list, reuse_flag, level): rpn_conv2d_3x3 = inputs for i in range(self.cfgs.NUM_SUBNET_CONV): rpn_conv2d_3x3 = slim.conv2d( inputs=rpn_conv2d_3x3, num_outputs=self.cfgs.FPN_CHANNEL, kernel_size=[3, 3], stride=1, activation_fn=None if self.cfgs.USE_GN else tf.nn.relu, weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, trainable=self.is_training, scope='{}_{}'.format(scope_list[0], i), reuse=reuse_flag) if self.cfgs.USE_GN: rpn_conv2d_3x3 = tf.contrib.layers.group_norm(rpn_conv2d_3x3) rpn_conv2d_3x3 = tf.nn.relu(rpn_conv2d_3x3) rpn_box_scores = slim.conv2d( rpn_conv2d_3x3, num_outputs=self.cfgs.CLASS_NUM, kernel_size=[3, 3], stride=1, weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.FINAL_CONV_BIAS_INITIALIZER, scope=scope_list[2], trainable=self.is_training, activation_fn=None, reuse=reuse_flag) rpn_box_scores = tf.reshape( rpn_box_scores, [-1, self.cfgs.CLASS_NUM], name='refine_{}_classification_reshape'.format(level)) rpn_box_probs = tf.sigmoid( rpn_box_scores, name='refine_{}_classification_sigmoid'.format(level)) return rpn_box_scores, rpn_box_probs def refine_reg_net(self, inputs, scope_list, reuse_flag, level): rpn_conv2d_3x3 = inputs for i in range(self.cfgs.NUM_SUBNET_CONV): rpn_conv2d_3x3 = slim.conv2d( inputs=rpn_conv2d_3x3, num_outputs=self.cfgs.FPN_CHANNEL, kernel_size=[3, 3], weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=None if self.cfgs.USE_GN else tf.nn.relu, scope='{}_{}'.format(scope_list[1], i), trainable=self.is_training, reuse=reuse_flag) if self.cfgs.USE_GN: rpn_conv2d_3x3 = tf.contrib.layers.group_norm(rpn_conv2d_3x3) rpn_conv2d_3x3 = tf.nn.relu(rpn_conv2d_3x3) rpn_delta_boxes = slim.conv2d( rpn_conv2d_3x3, num_outputs=5, kernel_size=[3, 3], stride=1, weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, scope=scope_list[3], trainable=self.is_training, activation_fn=None, reuse=reuse_flag) rpn_delta_boxes = tf.reshape( rpn_delta_boxes, [-1, 5], name='refine_{}_regression_reshape'.format(level)) return rpn_delta_boxes def refine_net(self, feature_pyramid, name): refine_delta_boxes_list = [] refine_scores_list = [] refine_probs_list = [] with tf.variable_scope(name): with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer( self.cfgs.WEIGHT_DECAY)): for level in self.cfgs.LEVEL: if self.cfgs.SHARE_NET: reuse_flag = None if level == self.cfgs.LEVEL[ 0] else True scope_list = [ 'conv2d_3x3_cls', 'conv2d_3x3_reg', 'refine_classification', 'refine_regression' ] else: reuse_flag = None scope_list = [ 'conv2d_3x3_cls_' + level, 'conv2d_3x3_reg_' + level, 'refine_classification_' + level, 'refine_regression_' + level ] refine_box_scores, refine_box_probs = self.refine_cls_net( feature_pyramid[level], scope_list, reuse_flag, level) refine_delta_boxes = self.refine_reg_net( feature_pyramid[level], scope_list, reuse_flag, level) refine_scores_list.append(refine_box_scores) refine_probs_list.append(refine_box_probs) refine_delta_boxes_list.append(refine_delta_boxes) return refine_delta_boxes_list, refine_scores_list, refine_probs_list def refine_feature_op(self, points, feature_map, name): h, w = tf.cast(tf.shape(feature_map)[1], tf.int32), tf.cast(tf.shape(feature_map)[2], tf.int32) xmin = tf.maximum(0.0, tf.floor(points[:, 0])) xmin = tf.minimum(tf.cast(w - 1, tf.float32), tf.ceil(xmin)) ymin = tf.maximum(0.0, tf.floor(points[:, 1])) ymin = tf.minimum(tf.cast(h - 1, tf.float32), tf.ceil(ymin)) xmax = tf.minimum(tf.cast(w - 1, tf.float32), tf.ceil(points[:, 0])) xmax = tf.maximum(0.0, tf.floor(xmax)) ymax = tf.minimum(tf.cast(h - 1, tf.float32), tf.ceil(points[:, 1])) ymax = tf.maximum(0.0, tf.floor(ymax)) left_top = tf.cast(tf.transpose(tf.stack([ymin, xmin], axis=0)), tf.int32) right_bottom = tf.cast(tf.transpose(tf.stack([ymax, xmax], axis=0)), tf.int32) left_bottom = tf.cast(tf.transpose(tf.stack([ymax, xmin], axis=0)), tf.int32) right_top = tf.cast(tf.transpose(tf.stack([ymin, xmax], axis=0)), tf.int32) feature_1x5 = slim.conv2d( inputs=feature_map, num_outputs=self.cfgs.FPN_CHANNEL, kernel_size=[1, 5], weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=None, trainable=self.is_training, scope='refine_1x5_{}'.format(name)) feature5x1 = slim.conv2d( inputs=feature_1x5, num_outputs=self.cfgs.FPN_CHANNEL, kernel_size=[5, 1], weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=None, trainable=self.is_training, scope='refine_5x1_{}'.format(name)) feature_1x1 = slim.conv2d( inputs=feature_map, num_outputs=self.cfgs.FPN_CHANNEL, kernel_size=[1, 1], weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=None, trainable=self.is_training, scope='refine_1x1_{}'.format(name)) feature = feature5x1 + feature_1x1 # feature = feature_map left_top_feature = tf.gather_nd(tf.squeeze(feature), left_top) right_bottom_feature = tf.gather_nd(tf.squeeze(feature), right_bottom) left_bottom_feature = tf.gather_nd(tf.squeeze(feature), left_bottom) right_top_feature = tf.gather_nd(tf.squeeze(feature), right_top) refine_feature = right_bottom_feature * tf.tile( tf.reshape((tf.abs((points[:, 0] - xmin) * (points[:, 1] - ymin))), [-1, 1]), [1, self.cfgs.FPN_CHANNEL]) \ + left_top_feature * tf.tile( tf.reshape((tf.abs((xmax - points[:, 0]) * (ymax - points[:, 1]))), [-1, 1]), [1, self.cfgs.FPN_CHANNEL]) \ + right_top_feature * tf.tile( tf.reshape((tf.abs((points[:, 0] - xmin) * (ymax - points[:, 1]))), [-1, 1]), [1, self.cfgs.FPN_CHANNEL]) \ + left_bottom_feature * tf.tile( tf.reshape((tf.abs((xmax - points[:, 0]) * (points[:, 1] - ymin))), [-1, 1]), [1, self.cfgs.FPN_CHANNEL]) refine_feature = tf.reshape(refine_feature, [ 1, tf.cast(h, tf.int32), tf.cast(w, tf.int32), self.cfgs.FPN_CHANNEL ]) # refine_feature = tf.reshape(refine_feature, [1, tf.cast(feature_size[1], tf.int32), # tf.cast(feature_size[0], tf.int32), 256]) return refine_feature + feature def refine_feature_five_op(self, points, feature_map, name): h, w = tf.cast(tf.shape(feature_map)[1], tf.int32), tf.cast(tf.shape(feature_map)[2], tf.int32) feature_1x5 = slim.conv2d( inputs=feature_map, num_outputs=self.cfgs.FPN_CHANNEL, kernel_size=[1, 5], weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=None, trainable=self.is_training, scope='refine_1x5_{}'.format(name)) feature5x1 = slim.conv2d( inputs=feature_1x5, num_outputs=self.cfgs.FPN_CHANNEL, kernel_size=[5, 1], weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=None, trainable=self.is_training, scope='refine_5x1_{}'.format(name)) feature_1x1 = slim.conv2d( inputs=feature_map, num_outputs=self.cfgs.FPN_CHANNEL, kernel_size=[1, 1], weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=None, trainable=self.is_training, scope='refine_1x1_{}'.format(name)) feature = feature5x1 + feature_1x1 for i in range(5): xmin = tf.maximum(0.0, tf.floor(points[:, 0 + 2 * (i - 1)])) ymin = tf.maximum(0.0, tf.floor(points[:, 1 + 2 * (i - 1)])) xmax = tf.minimum(tf.cast(w - 1, tf.float32), tf.ceil(points[:, 0 + 2 * (i - 1)])) ymax = tf.minimum(tf.cast(h - 1, tf.float32), tf.ceil(points[:, 1 + 2 * (i - 1)])) left_top = tf.cast(tf.transpose(tf.stack([ymin, xmin], axis=0)), tf.int32) right_bottom = tf.cast( tf.transpose(tf.stack([ymax, xmax], axis=0)), tf.int32) left_bottom = tf.cast(tf.transpose(tf.stack([ymax, xmin], axis=0)), tf.int32) right_top = tf.cast(tf.transpose(tf.stack([ymin, xmax], axis=0)), tf.int32) left_top_feature = tf.gather_nd(tf.squeeze(feature), left_top) right_bottom_feature = tf.gather_nd(tf.squeeze(feature), right_bottom) left_bottom_feature = tf.gather_nd(tf.squeeze(feature), left_bottom) right_top_feature = tf.gather_nd(tf.squeeze(feature), right_top) refine_feature = right_bottom_feature * tf.tile( tf.reshape((tf.abs((points[:, 0+2*(i-1)] - xmin) * (points[:, 1+2*(i-1)] - ymin))), [-1, 1]), [1, self.cfgs.FPN_CHANNEL]) \ + left_top_feature * tf.tile( tf.reshape((tf.abs((xmax - points[:, 0+2*(i-1)]) * (ymax - points[:, 1+2*(i-1)]))), [-1, 1]), [1, self.cfgs.FPN_CHANNEL]) \ + right_top_feature * tf.tile( tf.reshape((tf.abs((points[:, 0+2*(i-1)] - xmin) * (ymax - points[:, 1+2*(i-1)]))), [-1, 1]), [1, self.cfgs.FPN_CHANNEL]) \ + left_bottom_feature * tf.tile( tf.reshape((tf.abs((xmax - points[:, 0+2*(i-1)]) * (points[:, 1+2*(i-1)] - ymin))), [-1, 1]), [1, self.cfgs.FPN_CHANNEL]) refine_feature = tf.reshape(refine_feature, [ 1, tf.cast(h, tf.int32), tf.cast(w, tf.int32), self.cfgs.FPN_CHANNEL ]) feature += refine_feature return feature def refine_stage(self, input_img_batch, gtboxes_batch_r, box_pred_list, cls_prob_list, proposal_list, feature_pyramid, gpu_id, pos_threshold, neg_threshold, stage, proposal_filter=False): with tf.variable_scope('refine_feature_pyramid{}'.format(stage)): refine_feature_pyramid = {} refine_boxes_list = [] for box_pred, cls_prob, proposal, stride, level in \ zip(box_pred_list, cls_prob_list, proposal_list, self.cfgs.ANCHOR_STRIDE, self.cfgs.LEVEL): if proposal_filter: box_pred = tf.reshape( box_pred, [-1, self.num_anchors_per_location, 5]) proposal = tf.reshape(proposal, [ -1, self.num_anchors_per_location, 5 if self.method == 'R' else 4 ]) cls_prob = tf.reshape(cls_prob, [ -1, self.num_anchors_per_location, self.cfgs.CLASS_NUM ]) cls_max_prob = tf.reduce_max(cls_prob, axis=-1) box_pred_argmax = tf.cast( tf.reshape(tf.argmax(cls_max_prob, axis=-1), [-1, 1]), tf.int32) indices = tf.cast( tf.cumsum(tf.ones_like(box_pred_argmax), axis=0), tf.int32) - tf.constant(1, tf.int32) indices = tf.concat([indices, box_pred_argmax], axis=-1) box_pred = tf.reshape(tf.gather_nd(box_pred, indices), [-1, 5]) proposal = tf.reshape(tf.gather_nd(proposal, indices), [-1, 5 if self.method == 'R' else 4]) if self.cfgs.METHOD == 'H': x_c = (proposal[:, 2] + proposal[:, 0]) / 2 y_c = (proposal[:, 3] + proposal[:, 1]) / 2 h = proposal[:, 2] - proposal[:, 0] + 1 w = proposal[:, 3] - proposal[:, 1] + 1 theta = -90 * tf.ones_like(x_c) proposal = tf.transpose( tf.stack([x_c, y_c, w, h, theta])) else: box_pred = tf.reshape(box_pred, [-1, 5]) proposal = tf.reshape(proposal, [-1, 5]) bboxes = bbox_transform.rbbox_transform_inv(boxes=proposal, deltas=box_pred) refine_boxes_list.append(bboxes) center_point = bboxes[:, :2] / stride refine_feature_pyramid[level] = self.refine_feature_op( points=center_point, feature_map=feature_pyramid[level], name=level) # points = coordinate5_2_8_tf(bboxes) / stride # refine_feature_pyramid[level] = self.refine_feature_five_op(points=points, # feature_map=feature_pyramid[level], # name=level) refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list = self.refine_net( refine_feature_pyramid, 'refine_net{}'.format(stage)) refine_box_pred = tf.concat(refine_box_pred_list, axis=0) refine_cls_score = tf.concat(refine_cls_score_list, axis=0) # refine_cls_prob = tf.concat(refine_cls_prob_list, axis=0) refine_boxes = tf.concat(refine_boxes_list, axis=0) if self.is_training: with tf.variable_scope('build_refine_loss{}'.format(stage)): refine_labels, refine_target_delta, refine_box_states, refine_target_boxes = tf.py_func( func=self.refine_anchor_sampler_r3det. refine_anchor_target_layer, inp=[ gtboxes_batch_r, refine_boxes, pos_threshold, neg_threshold, gpu_id ], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) self.add_anchor_img_smry(input_img_batch, refine_boxes, refine_box_states, 1) refine_cls_loss = self.losses.focal_loss( refine_labels, refine_cls_score, refine_box_states) if self.cfgs.USE_IOU_FACTOR: refine_reg_loss = self.losses.iou_smooth_l1_loss_exp( refine_target_delta, refine_box_pred, refine_box_states, refine_target_boxes, refine_boxes, is_refine=True) else: refine_reg_loss = self.losses.smooth_l1_loss( refine_target_delta, refine_box_pred, refine_box_states) self.losses_dict['refine_cls_loss{}'.format( stage)] = refine_cls_loss * self.cfgs.CLS_WEIGHT self.losses_dict['refine_reg_loss{}'.format( stage)] = refine_reg_loss * self.cfgs.REG_WEIGHT return refine_box_pred_list, refine_cls_prob_list, refine_boxes_list def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) if self.cfgs.USE_GN: input_img_batch = tf.reshape( input_img_batch, [1, self.cfgs.IMG_SHORT_SIDE_LEN, self.cfgs.IMG_MAX_LENGTH, 3]) # 1. build backbone feature_pyramid = self.build_backbone(input_img_batch) # 2. build rpn rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net( feature_pyramid, 'rpn_net') rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0) rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0) # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0) # 3. generate anchors anchor_list = self.make_anchors(feature_pyramid, use_tf=True) anchors = tf.concat(anchor_list, axis=0) # 4. build loss if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states, target_boxes = tf.py_func( func=self.anchor_sampler_retinenet.anchor_target_layer, inp=[gtboxes_batch_h, gtboxes_batch_r, anchors, gpu_id], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) if self.method == 'H': self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) else: self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = self.losses.focal_loss(labels, rpn_cls_score, anchor_states) if self.cfgs.USE_IOU_FACTOR: reg_loss = self.losses.iou_smooth_l1_loss_exp( target_delta, rpn_box_pred, anchor_states, target_boxes, anchors, alpha=self.cfgs.ALPHA, beta=self.cfgs.BETA) else: reg_loss = self.losses.smooth_l1_loss( target_delta, rpn_box_pred, anchor_states) self.losses_dict['cls_loss'] = cls_loss * self.cfgs.CLS_WEIGHT self.losses_dict['reg_loss'] = reg_loss * self.cfgs.REG_WEIGHT box_pred_list, cls_prob_list, proposal_list = rpn_box_pred_list, rpn_cls_prob_list, anchor_list all_box_pred_list, all_cls_prob_list, all_proposal_list = [], [], [] for i in range(self.cfgs.NUM_REFINE_STAGE): box_pred_list, cls_prob_list, proposal_list = self.refine_stage( input_img_batch, gtboxes_batch_r, box_pred_list, cls_prob_list, proposal_list, feature_pyramid, gpu_id, pos_threshold=self.cfgs.REFINE_IOU_POSITIVE_THRESHOLD[i], neg_threshold=self.cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[i], stage='' if i == 0 else '_stage{}'.format(i + 2), proposal_filter=True if i == 0 else False) if not self.is_training: all_box_pred_list.extend(box_pred_list) all_cls_prob_list.extend(cls_prob_list) all_proposal_list.extend(proposal_list) else: all_box_pred_list, all_cls_prob_list, all_proposal_list = box_pred_list, cls_prob_list, proposal_list box_pred = tf.concat(all_box_pred_list, axis=0) cls_prob = tf.concat(all_cls_prob_list, axis=0) proposal = tf.concat(all_proposal_list, axis=0) return box_pred, cls_prob, proposal
class DetectionNetworkRetinaNet(DetectionNetworkBase): def __init__(self, cfgs, is_training): super(DetectionNetworkRetinaNet, self).__init__(cfgs, is_training) self.anchor_sampler_retinenet = AnchorSamplerRetinaNet(cfgs) self.losses = Loss(self.cfgs) def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) if self.cfgs.USE_GN: input_img_batch = tf.reshape( input_img_batch, [1, self.cfgs.IMG_SHORT_SIDE_LEN, self.cfgs.IMG_MAX_LENGTH, 3]) # 1. build backbone feature_pyramid = self.build_backbone(input_img_batch) # 2. build rpn rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net( feature_pyramid, 'rpn_net') rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0) rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0) rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0) # 3. generate anchors anchor_list = self.make_anchors(feature_pyramid) anchors = tf.concat(anchor_list, axis=0) # 4. build loss if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states, target_boxes = tf.py_func( func=self.anchor_sampler_retinenet.anchor_target_layer, inp=[gtboxes_batch_h, gtboxes_batch_r, anchors, gpu_id], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) if self.method == 'H': self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) else: self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = self.losses.focal_loss(labels, rpn_cls_score, anchor_states) if self.cfgs.REG_LOSS_MODE == 0: reg_loss = self.losses.iou_smooth_l1_loss_log( target_delta, rpn_box_pred, anchor_states, target_boxes, anchors) elif self.cfgs.REG_LOSS_MODE == 1: reg_loss = self.losses.iou_smooth_l1_loss_exp( target_delta, rpn_box_pred, anchor_states, target_boxes, anchors, alpha=self.cfgs.ALPHA, beta=self.cfgs.BETA) else: reg_loss = self.losses.smooth_l1_loss( target_delta, rpn_box_pred, anchor_states) self.losses_dict['cls_loss'] = cls_loss * self.cfgs.CLS_WEIGHT self.losses_dict['reg_loss'] = reg_loss * self.cfgs.REG_WEIGHT # 5. postprocess with tf.variable_scope('postprocess_detctions'): boxes, scores, category = self.postprocess_detctions( rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, anchors=anchors, gpu_id=gpu_id) boxes = tf.stop_gradient(boxes) scores = tf.stop_gradient(scores) category = tf.stop_gradient(category) if self.is_training: return boxes, scores, category, self.losses_dict else: return boxes, scores, category def postprocess_detctions(self, rpn_bbox_pred, rpn_cls_prob, anchors, gpu_id): return_boxes_pred = [] return_scores = [] return_labels = [] for j in range(0, self.cfgs.CLASS_NUM): scores = rpn_cls_prob[:, j] if self.is_training: indices = tf.reshape( tf.where(tf.greater(scores, self.cfgs.VIS_SCORE)), [ -1, ]) else: indices = tf.reshape( tf.where(tf.greater(scores, self.cfgs.FILTERED_SCORE)), [ -1, ]) anchors_ = tf.gather(anchors, indices) rpn_bbox_pred_ = tf.gather(rpn_bbox_pred, indices) scores = tf.gather(scores, indices) if self.method == 'H': x_c = (anchors_[:, 2] + anchors_[:, 0]) / 2 y_c = (anchors_[:, 3] + anchors_[:, 1]) / 2 h = anchors_[:, 2] - anchors_[:, 0] + 1 w = anchors_[:, 3] - anchors_[:, 1] + 1 theta = -90 * tf.ones_like(x_c) anchors_ = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) if self.cfgs.ANGLE_RANGE == 180: anchors_ = tf.py_func(coordinate_present_convert, inp=[anchors_, -1], Tout=[tf.float32]) anchors_ = tf.reshape(anchors_, [-1, 5]) boxes_pred = bbox_transform.rbbox_transform_inv( boxes=anchors_, deltas=rpn_bbox_pred_) if self.cfgs.ANGLE_RANGE == 180: _, _, _, _, theta = tf.unstack(boxes_pred, axis=1) indx = tf.reshape( tf.where( tf.logical_and(tf.less(theta, 0), tf.greater_equal(theta, -180))), [ -1, ]) boxes_pred = tf.gather(boxes_pred, indx) scores = tf.gather(scores, indx) boxes_pred = tf.py_func(coordinate_present_convert, inp=[boxes_pred, 1], Tout=[tf.float32]) boxes_pred = tf.reshape(boxes_pred, [-1, 5]) nms_indices = nms_rotate.nms_rotate( decode_boxes=boxes_pred, scores=scores, iou_threshold=self.cfgs.NMS_IOU_THRESHOLD, max_output_size=100 if self.is_training else 1000, use_gpu=True, gpu_id=gpu_id) tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, nms_indices), [-1, 5]) tmp_scores = tf.reshape(tf.gather(scores, nms_indices), [ -1, ]) return_boxes_pred.append(tmp_boxes_pred) return_scores.append(tmp_scores) return_labels.append(tf.ones_like(tmp_scores) * (j + 1)) return_boxes_pred = tf.concat(return_boxes_pred, axis=0) return_scores = tf.concat(return_scores, axis=0) return_labels = tf.concat(return_labels, axis=0) return return_boxes_pred, return_scores, return_labels
class DetectionNetworkR2CNN(DetectionNetworkBase): def __init__(self, cfgs, is_training): super(DetectionNetworkR2CNN, self).__init__(cfgs, is_training) self.anchor_sampler_r2cnn = AnchorSamplerR2CNN(cfgs) self.proposal_sampler_r2cnn = ProposalSamplerR2CNN(cfgs) self.losses = Loss(cfgs) self.roi_extractor = RoIExtractor(cfgs) self.box_head = BoxHead(cfgs) def build_loss(self, rpn_box_pred, rpn_bbox_targets, rpn_cls_score, rpn_labels, bbox_pred, bbox_targets, cls_score, labels): with tf.variable_scope('build_loss'): with tf.variable_scope('rpn_loss'): rpn_reg_loss = self.losses.smooth_l1_loss_rpn( bbox_pred=rpn_box_pred, bbox_targets=rpn_bbox_targets, label=rpn_labels, sigma=self.cfgs.RPN_SIGMA) rpn_select = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)), [-1]) rpn_cls_score = tf.reshape( tf.gather(rpn_cls_score, rpn_select), [-1, 2]) rpn_labels = tf.reshape(tf.gather(rpn_labels, rpn_select), [-1]) rpn_cls_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=rpn_cls_score, labels=rpn_labels)) self.losses_dict[ 'rpn_cls_loss'] = rpn_cls_loss * self.cfgs.RPN_CLASSIFICATION_LOSS_WEIGHT self.losses_dict[ 'rpn_reg_loss'] = rpn_reg_loss * self.cfgs.RPN_LOCATION_LOSS_WEIGHT with tf.variable_scope('FastRCNN_loss'): reg_loss = self.losses.smooth_l1_loss_rcnn_r( bbox_pred=bbox_pred, bbox_targets=bbox_targets, label=labels, num_classes=self.cfgs.CLASS_NUM + 1, sigma=self.cfgs.FASTRCNN_SIGMA) # cls_score = tf.reshape(cls_score, [-1, cfgs.CLASS_NUM + 1]) # labels = tf.reshape(labels, [-1]) cls_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=cls_score, labels=labels)) # beacause already sample before self.losses_dict[ 'fast_cls_loss'] = cls_loss * self.cfgs.FAST_RCNN_CLASSIFICATION_LOSS_WEIGHT self.losses_dict[ 'fast_reg_loss'] = reg_loss * self.cfgs.FAST_RCNN_LOCATION_LOSS_WEIGHT def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build backbone feature_pyramid = self.build_backbone(input_img_batch) # 2. build rpn fpn_box_pred, fpn_cls_score, fpn_cls_prob = self.rpn(feature_pyramid) # 3. generate anchors anchor_list = self.make_anchors(feature_pyramid) anchors = tf.concat(anchor_list, axis=0) # 4. postprocess rpn proposals. such as: decode, clip, NMS with tf.variable_scope('postprocess_FPN'): rois, roi_scores = self.postprocess_rpn_proposals( rpn_bbox_pred=fpn_box_pred, rpn_cls_prob=fpn_cls_prob, img_shape=img_shape, anchors=anchors, is_training=self.is_training) # 5. sample minibatch if self.is_training: with tf.variable_scope('sample_anchors_minibatch'): fpn_labels, fpn_bbox_targets = \ tf.py_func( self.anchor_sampler_r2cnn.anchor_target_layer, [gtboxes_batch_h, img_shape, anchors], [tf.float32, tf.float32]) fpn_bbox_targets = tf.reshape(fpn_bbox_targets, [-1, 4]) fpn_labels = tf.to_int32(fpn_labels, name="to_int32") fpn_labels = tf.reshape(fpn_labels, [-1]) self.add_anchor_img_smry(input_img_batch, anchors, fpn_labels, method=0) fpn_cls_category = tf.argmax(fpn_cls_prob, axis=1) kept_rpppn = tf.reshape(tf.where(tf.not_equal(fpn_labels, -1)), [-1]) fpn_cls_category = tf.gather(fpn_cls_category, kept_rpppn) acc = tf.reduce_mean( tf.to_float( tf.equal(fpn_cls_category, tf.to_int64(tf.gather(fpn_labels, kept_rpppn))))) tf.summary.scalar('ACC/fpn_accuracy', acc) with tf.control_dependencies([fpn_labels]): with tf.variable_scope('sample_RCNN_minibatch'): rois, labels, _, bbox_targets, _, _ = \ tf.py_func(self.proposal_sampler_r2cnn.proposal_target_layer, [rois, gtboxes_batch_h, gtboxes_batch_r], [tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32]) rois = tf.reshape(rois, [-1, 4]) labels = tf.to_int32(labels) labels = tf.reshape(labels, [-1]) bbox_targets = tf.reshape( bbox_targets, [-1, 5 * (self.cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry(input_img_batch, rois, labels, method=0) # 6. assign level if self.is_training: rois_list, labels, bbox_targets = self.assign_levels( all_rois=rois, labels=labels, bbox_targets=bbox_targets) else: rois_list = self.assign_levels(all_rois=rois) # 7. build Fast-RCNN, include roi align/pooling, box head bbox_pred, cls_score = self.box_head.fpn_fc_head( self.roi_extractor, rois_list, feature_pyramid, img_shape, self.is_training) rois = tf.concat(rois_list, axis=0, name='concat_rois') cls_prob = slim.softmax(cls_score, 'cls_prob') if self.is_training: cls_category = tf.argmax(cls_prob, axis=1) fast_acc = tf.reduce_mean( tf.to_float(tf.equal(cls_category, tf.to_int64(labels)))) tf.summary.scalar('ACC/fast_acc', fast_acc) # 8. build loss if self.is_training: self.build_loss(rpn_box_pred=fpn_box_pred, rpn_bbox_targets=fpn_bbox_targets, rpn_cls_score=fpn_cls_score, rpn_labels=fpn_labels, bbox_pred=bbox_pred, bbox_targets=bbox_targets, cls_score=cls_score, labels=labels) # 9. postprocess_fastrcnn final_bbox, final_scores, final_category = self.postprocess_fastrcnn( rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, gpu_id=gpu_id) if self.is_training: return final_bbox, final_scores, final_category, self.losses_dict else: return final_bbox, final_scores, final_category def postprocess_fastrcnn(self, rois, bbox_ppred, scores, gpu_id): ''' :param rois:[-1, 4] :param bbox_ppred: [-1, (cfgs.Class_num+1) * 5] :param scores: [-1, cfgs.Class_num + 1] :return: ''' with tf.name_scope('postprocess_fastrcnn'): rois = tf.stop_gradient(rois) scores = tf.stop_gradient(scores) bbox_ppred = tf.reshape(bbox_ppred, [-1, self.cfgs.CLASS_NUM + 1, 5]) bbox_ppred = tf.stop_gradient(bbox_ppred) bbox_pred_list = tf.unstack(bbox_ppred, axis=1) score_list = tf.unstack(scores, axis=1) allclasses_boxes = [] allclasses_scores = [] categories = [] x_c = (rois[:, 2] + rois[:, 0]) / 2 y_c = (rois[:, 3] + rois[:, 1]) / 2 h = rois[:, 2] - rois[:, 0] + 1 w = rois[:, 3] - rois[:, 1] + 1 theta = -90 * tf.ones_like(x_c) rois = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) for i in range(1, self.cfgs.CLASS_NUM + 1): # 1. decode boxes in each class tmp_encoded_box = bbox_pred_list[i] tmp_score = score_list[i] tmp_decoded_boxes = bbox_transform.rbbox_transform_inv( boxes=rois, deltas=tmp_encoded_box, scale_factors=self.cfgs.ROI_SCALE_FACTORS) # 2. clip to img boundaries # tmp_decoded_boxes = boxes_utils.clip_boxes_to_img_boundaries(decode_boxes=tmp_decoded_boxes, # img_shape=img_shape) # 3. NMS if self.cfgs.SOFT_NMS: print("Using Soft NMS.......") raise NotImplementedError( "soft NMS for rotate has not implemented") else: max_output_size = 4000 if 'DOTA' in self.cfgs.NET_NAME else 200 keep = nms_rotate.nms_rotate( decode_boxes=tmp_decoded_boxes, scores=tmp_score, iou_threshold=self.cfgs.FAST_RCNN_NMS_IOU_THRESHOLD, max_output_size=100 if self.is_training else max_output_size, use_gpu=self.cfgs.ROTATE_NMS_USE_GPU, gpu_id=gpu_id) perclass_boxes = tf.gather(tmp_decoded_boxes, keep) perclass_scores = tf.gather(tmp_score, keep) allclasses_boxes.append(perclass_boxes) allclasses_scores.append(perclass_scores) categories.append(tf.ones_like(perclass_scores) * i) final_boxes = tf.concat(allclasses_boxes, axis=0) final_scores = tf.concat(allclasses_scores, axis=0) final_category = tf.concat(categories, axis=0) if self.is_training: ''' in training. We should show the detecitons in the tensorboard. So we add this. ''' kept_indices = tf.reshape( tf.where( tf.greater_equal(final_scores, self.cfgs.VIS_SCORE)), [-1]) else: kept_indices = tf.reshape( tf.where( tf.greater_equal(final_scores, self.cfgs.FILTERED_SCORE)), [-1]) final_boxes = tf.gather(final_boxes, kept_indices) final_scores = tf.gather(final_scores, kept_indices) final_category = tf.gather(final_category, kept_indices) return final_boxes, final_scores, final_category
def __init__(self, cfgs, is_training): super(DetectionNetwork, self).__init__(cfgs, is_training) self.anchor_sampler_csl = AnchorSamplerCSL(cfgs) self.losses = Loss(self.cfgs) self.coding_len = cfgs.ANGLE_RANGE // cfgs.OMEGA
class DetectionNetwork(DetectionNetworkBase): def __init__(self, cfgs, is_training): super(DetectionNetwork, self).__init__(cfgs, is_training) self.anchor_sampler_csl = AnchorSamplerCSL(cfgs) self.losses = Loss(self.cfgs) self.coding_len = cfgs.ANGLE_RANGE // cfgs.OMEGA def rpn_reg_net(self, inputs, scope_list, reuse_flag, level): rpn_conv2d_3x3 = inputs for i in range(self.cfgs.NUM_SUBNET_CONV): rpn_conv2d_3x3 = slim.conv2d(inputs=rpn_conv2d_3x3, num_outputs=self.cfgs.FPN_CHANNEL, kernel_size=[3, 3], weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=tf.nn.relu, scope='{}_{}'.format(scope_list[1], i), reuse=reuse_flag) rpn_delta_boxes = slim.conv2d(rpn_conv2d_3x3, num_outputs=5 * self.num_anchors_per_location, kernel_size=[3, 3], stride=1, weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, scope=scope_list[3], activation_fn=None, reuse=reuse_flag) rpn_angle_cls = slim.conv2d(rpn_conv2d_3x3, num_outputs=self.coding_len * self.num_anchors_per_location, kernel_size=[3, 3], stride=1, weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, scope=scope_list[4], activation_fn=None, reuse=reuse_flag) rpn_delta_boxes = tf.reshape(rpn_delta_boxes, [-1, 5], name='rpn_{}_regression_reshape'.format(level)) rpn_angle_cls = tf.reshape(rpn_angle_cls, [-1, self.coding_len], name='rpn_{}_angle_cls_reshape'.format(level)) return rpn_delta_boxes, rpn_angle_cls def rpn_net(self, feature_pyramid, name): rpn_delta_boxes_list = [] rpn_scores_list = [] rpn_probs_list = [] rpn_angle_cls_list = [] with tf.variable_scope(name): with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(self.cfgs.WEIGHT_DECAY)): for level in self.cfgs.LEVEL: if self.cfgs.SHARE_NET: reuse_flag = None if level == self.cfgs.LEVEL[0] else True scope_list = ['conv2d_3x3_cls', 'conv2d_3x3_reg', 'rpn_classification', 'rpn_regression', 'rpn_angle_cls'] else: reuse_flag = None scope_list = ['conv2d_3x3_cls_' + level, 'conv2d_3x3_reg_' + level, 'rpn_classification_' + level, 'rpn_regression_' + level, 'rpn_angle_cls_' + level] rpn_box_scores, rpn_box_probs = self.rpn_cls_net(feature_pyramid[level], scope_list, reuse_flag, level) rpn_delta_boxes, rpn_angle_cls = self.rpn_reg_net(feature_pyramid[level], scope_list, reuse_flag, level) rpn_scores_list.append(rpn_box_scores) rpn_probs_list.append(rpn_box_probs) rpn_delta_boxes_list.append(rpn_delta_boxes) rpn_angle_cls_list.append(rpn_angle_cls) return rpn_delta_boxes_list, rpn_scores_list, rpn_probs_list, rpn_angle_cls_list def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None, gt_smooth_label=None, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) gt_smooth_label = tf.reshape(gt_smooth_label, [-1, self.coding_len]) gt_smooth_label = tf.cast(gt_smooth_label, tf.float32) # 1. build backbone feature_pyramid = self.build_backbone(input_img_batch) # 2. build rpn rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list, rpn_angle_cls_list = self.rpn_net(feature_pyramid, 'rpn_net') rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0) rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0) rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0) rpn_angle_cls = tf.concat(rpn_angle_cls_list, axis=0) # 3. generate anchors anchor_list = self.make_anchors(feature_pyramid) anchors = tf.concat(anchor_list, axis=0) # 4. build loss if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states, target_boxes, target_smooth_label = tf.py_func( func=self.anchor_sampler_csl.anchor_target_layer, inp=[gtboxes_batch_h, gtboxes_batch_r, gt_smooth_label, anchors, gpu_id], Tout=[tf.float32, tf.float32, tf.float32, tf.float32, tf.float32]) if self.method == 'H': self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) else: self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = self.losses.focal_loss(labels, rpn_cls_score, anchor_states) if self.cfgs.REG_LOSS_MODE == 0: reg_loss = self.losses.iou_smooth_l1_loss_log(target_delta, rpn_box_pred, anchor_states, target_boxes, anchors) elif self.cfgs.REG_LOSS_MODE == 1: reg_loss = self.losses.iou_smooth_l1_loss_exp(target_delta, rpn_box_pred, anchor_states, target_boxes, anchors, alpha=self.cfgs.ALPHA, beta=self.cfgs.BETA) else: reg_loss = self.losses.smooth_l1_loss(target_delta, rpn_box_pred, anchor_states) angle_cls_loss = self.losses.angle_focal_loss(target_smooth_label, rpn_angle_cls, anchor_states) self.losses_dict['cls_loss'] = cls_loss * self.cfgs.CLS_WEIGHT self.losses_dict['reg_loss'] = reg_loss * self.cfgs.REG_WEIGHT self.losses_dict['angle_cls_loss'] = angle_cls_loss * self.cfgs.ANGLE_WEIGHT # 5. postprocess with tf.variable_scope('postprocess_detctions'): boxes, scores, category, boxes_angle = self.postprocess_detctions(rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, rpn_angle_prob=tf.sigmoid(rpn_angle_cls), anchors=anchors) boxes = tf.stop_gradient(boxes) scores = tf.stop_gradient(scores) category = tf.stop_gradient(category) boxes_angle = tf.stop_gradient(boxes_angle) if self.is_training: return boxes, scores, category, boxes, self.losses_dict else: return boxes_angle, scores, category def postprocess_detctions(self, rpn_bbox_pred, rpn_cls_prob, rpn_angle_prob, anchors): return_boxes_pred = [] return_boxes_pred_angle = [] return_scores = [] return_labels = [] for j in range(0, self.cfgs.CLASS_NUM): scores = rpn_cls_prob[:, j] if self.is_training: indices = tf.reshape(tf.where(tf.greater(scores, self.cfgs.VIS_SCORE)), [-1, ]) else: indices = tf.reshape(tf.where(tf.greater(scores, self.cfgs.FILTERED_SCORE)), [-1, ]) anchors_ = tf.gather(anchors, indices) rpn_bbox_pred_ = tf.gather(rpn_bbox_pred, indices) scores = tf.gather(scores, indices) rpn_angle_prob_ = tf.gather(rpn_angle_prob, indices) angle_cls = tf.cast(tf.argmax(rpn_angle_prob_, axis=1), tf.float32) if self.cfgs.METHOD == 'H': x_c = (anchors_[:, 2] + anchors_[:, 0]) / 2 y_c = (anchors_[:, 3] + anchors_[:, 1]) / 2 h = anchors_[:, 2] - anchors_[:, 0] + 1 w = anchors_[:, 3] - anchors_[:, 1] + 1 theta = -90 * tf.ones_like(x_c) anchors_ = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) if self.cfgs.ANGLE_RANGE == 180: anchors_ = tf.py_func(coordinate_present_convert, inp=[anchors_, -1], Tout=[tf.float32]) anchors_ = tf.reshape(anchors_, [-1, 5]) boxes_pred = bbox_transform.rbbox_transform_inv(boxes=anchors_, deltas=rpn_bbox_pred_) boxes_pred = tf.reshape(boxes_pred, [-1, 5]) angle_cls = (tf.reshape(angle_cls, [-1, ]) * -1 - 0.5) * self.cfgs.OMEGA x, y, w, h, theta = tf.unstack(boxes_pred, axis=1) boxes_pred_angle = tf.transpose(tf.stack([x, y, w, h, angle_cls])) if self.cfgs.ANGLE_RANGE == 180: # _, _, _, _, theta = tf.unstack(boxes_pred, axis=1) # indx = tf.reshape(tf.where(tf.logical_and(tf.less(theta, 0), tf.greater_equal(theta, -180))), [-1, ]) # boxes_pred = tf.gather(boxes_pred, indx) # scores = tf.gather(scores, indx) boxes_pred = tf.py_func(coordinate_present_convert, inp=[boxes_pred, 1], Tout=[tf.float32]) boxes_pred = tf.reshape(boxes_pred, [-1, 5]) boxes_pred_angle = tf.py_func(coordinate_present_convert, inp=[boxes_pred_angle, 1], Tout=[tf.float32]) boxes_pred_angle = tf.reshape(boxes_pred_angle, [-1, 5]) nms_indices = nms_rotate.nms_rotate(decode_boxes=boxes_pred_angle, scores=scores, iou_threshold=self.cfgs.NMS_IOU_THRESHOLD, max_output_size=100 if self.is_training else 1000, use_gpu=False) tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, nms_indices), [-1, 5]) tmp_boxes_pred_angle = tf.reshape(tf.gather(boxes_pred_angle, nms_indices), [-1, 5]) tmp_scores = tf.reshape(tf.gather(scores, nms_indices), [-1, ]) return_boxes_pred.append(tmp_boxes_pred) return_boxes_pred_angle.append(tmp_boxes_pred_angle) return_scores.append(tmp_scores) return_labels.append(tf.ones_like(tmp_scores) * (j + 1)) return_boxes_pred = tf.concat(return_boxes_pred, axis=0) return_boxes_pred_angle = tf.concat(return_boxes_pred_angle, axis=0) return_scores = tf.concat(return_scores, axis=0) return_labels = tf.concat(return_labels, axis=0) return return_boxes_pred, return_scores, return_labels, return_boxes_pred_angle
class DetectionNetworkSCRDet(DetectionNetworkBase): def __init__(self, cfgs, is_training): super(DetectionNetworkSCRDet, self).__init__(cfgs, is_training) self.proposal_sampler_r2cnn = ProposalSamplerR2CNN(cfgs) self.anchor_sampler_r2cnn = AnchorSamplerR2CNN(cfgs) self.losses = Loss(cfgs) self.roi_extractor = RoIExtractor(cfgs) self.box_head = BoxHead(cfgs) def rpn(self, inputs): rpn_conv3x3 = slim.conv2d(inputs, 512, [3, 3], trainable=self.is_training, weights_initializer=self.cfgs.INITIALIZER, activation_fn=tf.nn.relu, scope='rpn_conv/3x3') rpn_cls_score = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location * 2, [1, 1], stride=1, trainable=self.is_training, weights_initializer=self.cfgs.INITIALIZER, activation_fn=None, scope='rpn_cls_score') rpn_box_pred = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location * 4, [1, 1], stride=1, trainable=self.is_training, weights_initializer=self.cfgs.BBOX_INITIALIZER, activation_fn=None, scope='rpn_bbox_pred') rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob') return rpn_box_pred, rpn_cls_score, rpn_cls_prob def make_anchors(self, feature_to_cropped): featuremap_height, featuremap_width = tf.shape( feature_to_cropped)[1], tf.shape(feature_to_cropped)[2] featuremap_height = tf.cast(featuremap_height, tf.float32) featuremap_width = tf.cast(featuremap_width, tf.float32) anchors = anchor_utils.make_anchors( base_anchor_size=self.cfgs.BASE_ANCHOR_SIZE_LIST, anchor_scales=self.cfgs.ANCHOR_SCALES, anchor_ratios=self.cfgs.ANCHOR_RATIOS, featuremap_height=featuremap_height, featuremap_width=featuremap_width, stride=self.cfgs.ANCHOR_STRIDE, name="make_anchors_forRPN") return anchors def build_loss(self, rpn_box_pred, rpn_bbox_targets, rpn_cls_score, rpn_labels, bbox_pred_h, bbox_targets_h, cls_score_h, bbox_pred_r, bbox_targets_r, rois, target_gt_r, cls_score_r, labels, mask_gt, pa_mask_pred): ''' :param rpn_box_pred: [-1, 4] :param rpn_bbox_targets: [-1, 4] :param rpn_cls_score: [-1] :param rpn_labels: [-1] :param bbox_pred_h: [-1, 4*(cls_num+1)] :param bbox_targets_h: [-1, 4*(cls_num+1)] :param cls_score_h: [-1, cls_num+1] :param bbox_pred_r: [-1, 5*(cls_num+1)] :param bbox_targets_r: [-1, 5*(cls_num+1)] :param cls_score_r: [-1, cls_num+1] :param labels: [-1] :return: ''' with tf.variable_scope('build_loss'): with tf.variable_scope('rpn_loss'): rpn_reg_loss = self.losses.smooth_l1_loss_rpn( bbox_pred=rpn_box_pred, bbox_targets=rpn_bbox_targets, label=rpn_labels, sigma=self.cfgs.RPN_SIGMA) rpn_select = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)), [-1]) rpn_cls_score = tf.reshape( tf.gather(rpn_cls_score, rpn_select), [-1, 2]) rpn_labels = tf.reshape(tf.gather(rpn_labels, rpn_select), [-1]) rpn_cls_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=rpn_cls_score, labels=rpn_labels)) self.losses_dict[ 'rpn_cls_loss'] = rpn_cls_loss * self.cfgs.RPN_CLASSIFICATION_LOSS_WEIGHT self.losses_dict[ 'rpn_reg_loss'] = rpn_reg_loss * self.cfgs.RPN_LOCATION_LOSS_WEIGHT with tf.variable_scope('FastRCNN_loss'): reg_loss_h = self.losses.smooth_l1_loss_rcnn_h( bbox_pred=bbox_pred_h, bbox_targets=bbox_targets_h, label=labels, num_classes=self.cfgs.CLASS_NUM + 1, sigma=self.cfgs.FASTRCNN_SIGMA) if self.cfgs.USE_IOU_FACTOR: reg_loss_r = self.losses.iou_smooth_l1_loss_rcnn_r( bbox_pred=bbox_pred_r, bbox_targets=bbox_targets_r, label=labels, rois=rois, target_gt_r=target_gt_r, num_classes=self.cfgs.CLASS_NUM + 1, sigma=self.cfgs.FASTRCNN_SIGMA) else: reg_loss_r = self.losses.smooth_l1_loss_rcnn_r( bbox_pred=bbox_pred_r, bbox_targets=bbox_targets_r, label=labels, num_classes=self.cfgs.CLASS_NUM + 1, sigma=self.cfgs.FASTRCNN_SIGMA) cls_loss_h = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=cls_score_h, labels=labels)) # beacause already sample before cls_loss_r = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=cls_score_r, labels=labels)) self.losses_dict[ 'fast_cls_loss_h'] = cls_loss_h * self.cfgs.FAST_RCNN_CLASSIFICATION_LOSS_WEIGHT self.losses_dict[ 'fast_reg_loss_h'] = reg_loss_h * self.cfgs.FAST_RCNN_LOCATION_LOSS_WEIGHT self.losses_dict[ 'fast_cls_loss_r'] = cls_loss_r * self.cfgs.FAST_RCNN_CLASSIFICATION_LOSS_WEIGHT self.losses_dict[ 'fast_reg_loss_r'] = reg_loss_r * self.cfgs.FAST_RCNN_LOCATION_LOSS_WEIGHT with tf.variable_scope('build_attention_loss', regularizer=slim.l2_regularizer( self.cfgs.WEIGHT_DECAY)): attention_loss = self.losses.build_attention_loss( mask_gt, pa_mask_pred) self.losses_dict['attention_loss'] = attention_loss def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None, mask_batch=None, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build backbone feature, pa_mask = self.build_backbone(input_img_batch) # 2. build rpn rpn_box_pred, rpn_cls_score, rpn_cls_prob = self.rpn(feature) rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob') # 3. generate anchors anchors = self.make_anchors(feature) # 4. postprocess rpn proposals. such as: decode, clip, NMS with tf.variable_scope('postprocess_RPN'): rois, roi_scores = self.postprocess_rpn_proposals( rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, img_shape=img_shape, anchors=anchors, is_training=self.is_training) # 5. sample minibatch if self.is_training: with tf.variable_scope('sample_anchors_minibatch'): rpn_labels, rpn_bbox_targets = \ tf.py_func( self.anchor_sampler_r2cnn.anchor_target_layer, [gtboxes_batch_h, img_shape, anchors], [tf.float32, tf.float32]) rpn_bbox_targets = tf.reshape(rpn_bbox_targets, [-1, 4]) rpn_labels = tf.to_int32(rpn_labels, name="to_int32") rpn_labels = tf.reshape(rpn_labels, [-1]) self.add_anchor_img_smry(input_img_batch, anchors, rpn_labels, method=0) rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1) kept_rpppn = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)), [-1]) rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn) acc = tf.reduce_mean( tf.to_float( tf.equal(rpn_cls_category, tf.to_int64(tf.gather(rpn_labels, kept_rpppn))))) tf.summary.scalar('ACC/fpn_accuracy', acc) with tf.control_dependencies([rpn_labels]): with tf.variable_scope('sample_RCNN_minibatch'): rois, labels, bbox_targets_h, bbox_targets_r, target_gt_h, target_gt_r = \ tf.py_func(self.proposal_sampler_r2cnn.proposal_target_layer, [rois, gtboxes_batch_h, gtboxes_batch_r], [tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32]) rois = tf.reshape(rois, [-1, 4]) labels = tf.to_int32(labels) labels = tf.reshape(labels, [-1]) bbox_targets_h = tf.reshape( bbox_targets_h, [-1, 4 * (self.cfgs.CLASS_NUM + 1)]) bbox_targets_r = tf.reshape( bbox_targets_r, [-1, 5 * (self.cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry(input_img_batch, rois, labels, method=0) # 6. build Fast-RCNN, include roi align/pooling, box head bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r = self.box_head.fc_head( self.roi_extractor, rois, feature, img_shape, self.is_training, mode=0) cls_prob_h = slim.softmax(cls_score_h, 'cls_prob_h') cls_prob_r = slim.softmax(cls_score_r, 'cls_prob_r') if self.is_training: cls_category_h = tf.argmax(cls_prob_h, axis=1) fast_acc_h = tf.reduce_mean( tf.to_float(tf.equal(cls_category_h, tf.to_int64(labels)))) tf.summary.scalar('ACC/fast_acc_h', fast_acc_h) cls_category_r = tf.argmax(cls_prob_r, axis=1) fast_acc_r = tf.reduce_mean( tf.to_float(tf.equal(cls_category_r, tf.to_int64(labels)))) tf.summary.scalar('ACC/fast_acc_r', fast_acc_r) # 8. build loss if self.is_training: self.build_loss(rpn_box_pred=rpn_box_pred, rpn_bbox_targets=rpn_bbox_targets, rpn_cls_score=rpn_cls_score, rpn_labels=rpn_labels, bbox_pred_h=bbox_pred_h, bbox_targets_h=bbox_targets_h, cls_score_h=cls_score_h, bbox_pred_r=bbox_pred_r, bbox_targets_r=bbox_targets_r, rois=rois, target_gt_r=target_gt_r, cls_score_r=cls_score_r, labels=labels, mask_gt=mask_batch, pa_mask_pred=pa_mask) # 9. postprocess_fastrcnn final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h( rois=rois, bbox_ppred=bbox_pred_h, scores=cls_prob_h, img_shape=img_shape) final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r( rois=rois, bbox_ppred=bbox_pred_r, scores=cls_prob_r, gpu_id=gpu_id) if self.is_training: return final_boxes_h, final_scores_h, final_category_h, \ final_boxes_r, final_scores_r, final_category_r, self.losses_dict else: return final_boxes_h, final_scores_h, final_category_h, \ final_boxes_r, final_scores_r, final_category_r, def postprocess_fastrcnn_r(self, rois, bbox_ppred, scores, gpu_id): ''' :param rois:[-1, 4] :param bbox_ppred: [-1, (cfgs.Class_num+1) * 5] :param scores: [-1, cfgs.Class_num + 1] :return: ''' with tf.name_scope('postprocess_fastrcnn'): rois = tf.stop_gradient(rois) scores = tf.stop_gradient(scores) bbox_ppred = tf.reshape(bbox_ppred, [-1, self.cfgs.CLASS_NUM + 1, 5]) bbox_ppred = tf.stop_gradient(bbox_ppred) bbox_pred_list = tf.unstack(bbox_ppred, axis=1) score_list = tf.unstack(scores, axis=1) allclasses_boxes = [] allclasses_scores = [] categories = [] x_c = (rois[:, 2] + rois[:, 0]) / 2 y_c = (rois[:, 3] + rois[:, 1]) / 2 h = rois[:, 2] - rois[:, 0] + 1 w = rois[:, 3] - rois[:, 1] + 1 theta = -90 * tf.ones_like(x_c) rois = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) for i in range(1, self.cfgs.CLASS_NUM + 1): # 1. decode boxes in each class tmp_encoded_box = bbox_pred_list[i] tmp_score = score_list[i] tmp_decoded_boxes = bbox_transform.rbbox_transform_inv( boxes=rois, deltas=tmp_encoded_box, scale_factors=self.cfgs.ROI_SCALE_FACTORS) # 2. clip to img boundaries # tmp_decoded_boxes = boxes_utils.clip_boxes_to_img_boundaries(decode_boxes=tmp_decoded_boxes, # img_shape=img_shape) # 3. NMS if self.cfgs.SOFT_NMS: print("Using Soft NMS.......") raise NotImplementedError( "soft NMS for rotate has not implemented") else: keep = nms_rotate.nms_rotate( decode_boxes=tmp_decoded_boxes, scores=tmp_score, iou_threshold=self.cfgs.FAST_RCNN_R_NMS_IOU_THRESHOLD, max_output_size=self.cfgs. FAST_RCNN_NMS_MAX_BOXES_PER_CLASS, use_gpu=self.cfgs.ROTATE_NMS_USE_GPU, gpu_id=gpu_id) perclass_boxes = tf.gather(tmp_decoded_boxes, keep) perclass_scores = tf.gather(tmp_score, keep) allclasses_boxes.append(perclass_boxes) allclasses_scores.append(perclass_scores) categories.append(tf.ones_like(perclass_scores) * i) final_boxes = tf.concat(allclasses_boxes, axis=0) final_scores = tf.concat(allclasses_scores, axis=0) final_category = tf.concat(categories, axis=0) if self.is_training: ''' in training. We should show the detecitons in the tensorboard. So we add this. ''' kept_indices = tf.reshape( tf.where( tf.greater_equal(final_scores, self.cfgs.VIS_SCORE)), [-1]) else: kept_indices = tf.reshape( tf.where( tf.greater_equal(final_scores, self.cfgs.FILTERED_SCORE)), [-1]) final_boxes = tf.gather(final_boxes, kept_indices) final_scores = tf.gather(final_scores, kept_indices) final_category = tf.gather(final_category, kept_indices) return final_boxes, final_scores, final_category def postprocess_fastrcnn_h(self, rois, bbox_ppred, scores, img_shape): ''' :param rois:[-1, 4] :param bbox_ppred: [-1, (cfgs.Class_num+1) * 4] :param scores: [-1, cfgs.Class_num + 1] :return: ''' with tf.name_scope('postprocess_fastrcnn_h'): rois = tf.stop_gradient(rois) scores = tf.stop_gradient(scores) bbox_ppred = tf.reshape(bbox_ppred, [-1, self.cfgs.CLASS_NUM + 1, 4]) bbox_ppred = tf.stop_gradient(bbox_ppred) bbox_pred_list = tf.unstack(bbox_ppred, axis=1) score_list = tf.unstack(scores, axis=1) allclasses_boxes = [] allclasses_scores = [] categories = [] for i in range(1, self.cfgs.CLASS_NUM + 1): # 1. decode boxes in each class tmp_encoded_box = bbox_pred_list[i] tmp_score = score_list[i] tmp_decoded_boxes = bbox_transform.bbox_transform_inv( boxes=rois, deltas=tmp_encoded_box, scale_factors=self.cfgs.ROI_SCALE_FACTORS) # 2. clip to img boundaries tmp_decoded_boxes = clip_boxes_to_img_boundaries( decode_boxes=tmp_decoded_boxes, img_shape=img_shape) # 3. NMS keep = tf.image.non_max_suppression( boxes=tmp_decoded_boxes, scores=tmp_score, max_output_size=self.cfgs. FAST_RCNN_NMS_MAX_BOXES_PER_CLASS, iou_threshold=self.cfgs.FAST_RCNN_H_NMS_IOU_THRESHOLD) perclass_boxes = tf.gather(tmp_decoded_boxes, keep) perclass_scores = tf.gather(tmp_score, keep) allclasses_boxes.append(perclass_boxes) allclasses_scores.append(perclass_scores) categories.append(tf.ones_like(perclass_scores) * i) final_boxes = tf.concat(allclasses_boxes, axis=0) final_scores = tf.concat(allclasses_scores, axis=0) final_category = tf.concat(categories, axis=0) # if self.is_training: ''' in training. We should show the detecitons in the tensorboard. So we add this. ''' if self.is_training: ''' in training. We should show the detecitons in the tensorboard. So we add this. ''' kept_indices = tf.reshape( tf.where( tf.greater_equal(final_scores, self.cfgs.VIS_SCORE)), [-1]) else: kept_indices = tf.reshape( tf.where( tf.greater_equal(final_scores, self.cfgs.FILTERED_SCORE)), [-1]) final_boxes = tf.gather(final_boxes, kept_indices) final_scores = tf.gather(final_scores, kept_indices) final_category = tf.gather(final_category, kept_indices) return final_boxes, final_scores, final_category
class DetectionNetworkRetinaNet(DetectionNetworkBase): def __init__(self, cfgs, is_training): super(DetectionNetworkRetinaNet, self).__init__(cfgs, is_training) self.anchor_sampler_retinenet = AnchorSamplerRetinaNet(cfgs) self.losses = Loss(self.cfgs) def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) if self.cfgs.USE_GN: input_img_batch = tf.reshape( input_img_batch, [1, self.cfgs.IMG_SHORT_SIDE_LEN, self.cfgs.IMG_MAX_LENGTH, 3]) # 1. build backbone feature_pyramid = self.build_backbone(input_img_batch) # 2. build rpn rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net( feature_pyramid, 'rpn_net') rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0) rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0) rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0) # 3. generate anchors anchor_list = self.make_anchors(feature_pyramid) anchors = tf.concat(anchor_list, axis=0) # 4. build loss if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states, target_boxes = tf.py_func( func=self.anchor_sampler_retinenet.anchor_target_layer, inp=[gtboxes_batch_h, gtboxes_batch_r, anchors, gpu_id], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) if self.method == 'H': self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) else: self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = self.losses.focal_loss(labels, rpn_cls_score, anchor_states) if self.cfgs.REG_LOSS_MODE == 0: reg_loss = self.losses.iou_smooth_l1_loss_log( target_delta, rpn_box_pred, anchor_states, target_boxes, anchors) elif self.cfgs.REG_LOSS_MODE == 1: reg_loss = self.losses.iou_smooth_l1_loss_exp( target_delta, rpn_box_pred, anchor_states, target_boxes, anchors, alpha=self.cfgs.ALPHA, beta=self.cfgs.BETA) else: reg_loss = self.losses.smooth_l1_loss( target_delta, rpn_box_pred, anchor_states) self.losses_dict['cls_loss'] = cls_loss * self.cfgs.CLS_WEIGHT self.losses_dict['reg_loss'] = reg_loss * self.cfgs.REG_WEIGHT return rpn_box_pred, rpn_cls_prob
class DetectionNetworkRetinaNet(DetectionNetworkBase): def __init__(self, cfgs, is_training): super(DetectionNetworkRetinaNet, self).__init__(cfgs, is_training) self.anchor_sampler_retinenet = AnchorSamplerRetinaNet(cfgs) self.losses = Loss(self.cfgs) def rpn_reg_net(self, inputs, scope_list, reuse_flag, level): rpn_conv2d_3x3 = inputs for i in range(self.cfgs.NUM_SUBNET_CONV): rpn_conv2d_3x3 = slim.conv2d( inputs=rpn_conv2d_3x3, num_outputs=self.cfgs.FPN_CHANNEL, kernel_size=[3, 3], weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, stride=1, activation_fn=None if self.cfgs.USE_GN else tf.nn.relu, scope='{}_{}'.format(scope_list[1], i), trainable=self.is_training, reuse=reuse_flag) if self.cfgs.USE_GN: rpn_conv2d_3x3 = tf.contrib.layers.group_norm(rpn_conv2d_3x3) rpn_conv2d_3x3 = tf.nn.relu(rpn_conv2d_3x3) rpn_delta_boxes = slim.conv2d( rpn_conv2d_3x3, num_outputs=4 * self.num_anchors_per_location, kernel_size=[3, 3], stride=1, weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, scope=scope_list[3], activation_fn=None, trainable=self.is_training, reuse=reuse_flag) rpn_delta_sin = slim.conv2d( rpn_conv2d_3x3, num_outputs=self.num_anchors_per_location, kernel_size=[3, 3], stride=1, weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, scope=scope_list[3] + '_sin', activation_fn=tf.nn.sigmoid, trainable=self.is_training, reuse=reuse_flag) rpn_delta_cos = slim.conv2d( rpn_conv2d_3x3, num_outputs=self.num_anchors_per_location, kernel_size=[3, 3], stride=1, weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER, biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER, scope=scope_list[3] + '_cos', activation_fn=tf.nn.sigmoid, trainable=self.is_training, reuse=reuse_flag) if self.cfgs.ANGLE_RANGE == 180: # [-90, 90] sin in [-1, 1] cos in [0, 1] # rpn_delta_sin = 2 * (rpn_delta_sin - 0.5) # [-90, 90] sin in [-1, 1] cos in [-1, 1] rpn_delta_sin, rpn_delta_cos = 2 * (rpn_delta_sin - 0.5), 2 * ( rpn_delta_cos - 0.5) # better else: # [-90, 0] sin in [-1, 0] cos in [0, 1] rpn_delta_sin *= -1 rpn_delta_boxes = tf.reshape( rpn_delta_boxes, [-1, 4], name='rpn_{}_regression_reshape'.format(level)) rpn_delta_sin = tf.reshape(rpn_delta_sin, [-1, 1], name='rpn_{}_sin_reshape'.format(level)) rpn_delta_cos = tf.reshape(rpn_delta_cos, [-1, 1], name='rpn_{}_cos_reshape'.format(level)) rpn_delta_boxes = tf.concat( [rpn_delta_boxes, rpn_delta_sin, rpn_delta_cos], axis=-1) return rpn_delta_boxes def rpn_net(self, feature_pyramid, name): rpn_delta_boxes_list = [] rpn_scores_list = [] rpn_probs_list = [] with tf.variable_scope(name): with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer( self.cfgs.WEIGHT_DECAY)): for level in self.cfgs.LEVEL: if self.cfgs.SHARE_NET: reuse_flag = None if level == self.cfgs.LEVEL[ 0] else True scope_list = [ 'conv2d_3x3_cls', 'conv2d_3x3_reg', 'rpn_classification', 'rpn_regression' ] else: reuse_flag = None scope_list = [ 'conv2d_3x3_cls_' + level, 'conv2d_3x3_reg_' + level, 'rpn_classification_' + level, 'rpn_regression_' + level ] rpn_box_scores, rpn_box_probs = self.rpn_cls_net( feature_pyramid[level], scope_list, reuse_flag, level) rpn_delta_boxes = self.rpn_reg_net(feature_pyramid[level], scope_list, reuse_flag, level) rpn_scores_list.append(rpn_box_scores) rpn_probs_list.append(rpn_box_probs) rpn_delta_boxes_list.append(rpn_delta_boxes) # rpn_all_delta_boxes = tf.concat(rpn_delta_boxes_list, axis=0) # rpn_all_boxes_scores = tf.concat(rpn_scores_list, axis=0) # rpn_all_boxes_probs = tf.concat(rpn_probs_list, axis=0) return rpn_delta_boxes_list, rpn_scores_list, rpn_probs_list def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None, gpu_id=0): if self.is_training: gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5]) gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32) gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6]) gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32) if self.cfgs.USE_GN: input_img_batch = tf.reshape( input_img_batch, [1, self.cfgs.IMG_SHORT_SIDE_LEN, self.cfgs.IMG_MAX_LENGTH, 3]) # 1. build backbone feature_pyramid = self.build_backbone(input_img_batch) # 2. build rpn rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net( feature_pyramid, 'rpn_net') rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0) rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0) rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0) # 3. generate anchors anchor_list = self.make_anchors(feature_pyramid) anchors = tf.concat(anchor_list, axis=0) # 4. build loss if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states, target_boxes = tf.py_func( func=self.anchor_sampler_retinenet.anchor_target_layer, inp=[gtboxes_batch_h, gtboxes_batch_r, anchors, gpu_id], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) if self.method == 'H': self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0) else: self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1) cls_loss = self.losses.focal_loss(labels, rpn_cls_score, anchor_states) reg_xywh_loss = self.losses.smooth_l1_loss( target_delta[:, :-1], rpn_box_pred[:, :-2], anchor_states) target_theta = tf.reshape(target_boxes, [ -1, 6 ])[:, -2] + 90. if self.cfgs.ANGLE_RANGE == 180 else 0. target_theta = target_theta * 3.1415926 / 180. target_theta_sin = tf.reshape(tf.sin(target_theta), [-1, 1]) target_theta_cos = tf.reshape(tf.cos(target_theta), [-1, 1]) reg_theta_loss = self.losses.smooth_l1_loss( tf.concat([target_theta_sin, target_theta_cos], axis=-1), rpn_box_pred[:, -2:], anchor_states) reg_loss = reg_xywh_loss + reg_theta_loss self.losses_dict['cls_loss'] = cls_loss * self.cfgs.CLS_WEIGHT self.losses_dict['reg_loss'] = reg_loss * self.cfgs.REG_WEIGHT # 5. postprocess with tf.variable_scope('postprocess_detctions'): boxes, scores, category = self.postprocess_detctions( rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, anchors=anchors, gpu_id=gpu_id) boxes = tf.stop_gradient(boxes) scores = tf.stop_gradient(scores) category = tf.stop_gradient(category) if self.is_training: return boxes, scores, category, self.losses_dict else: return boxes, scores, category def postprocess_detctions(self, rpn_bbox_pred, rpn_cls_prob, anchors, gpu_id): return_boxes_pred = [] return_scores = [] return_labels = [] for j in range(0, self.cfgs.CLASS_NUM): scores = rpn_cls_prob[:, j] if self.is_training: indices = tf.reshape( tf.where(tf.greater(scores, self.cfgs.VIS_SCORE)), [ -1, ]) else: indices = tf.reshape( tf.where(tf.greater(scores, self.cfgs.FILTERED_SCORE)), [ -1, ]) anchors_ = tf.gather(anchors, indices) rpn_bbox_pred_ = tf.gather(rpn_bbox_pred, indices) scores = tf.gather(scores, indices) if self.method == 'H': x_c = (anchors_[:, 2] + anchors_[:, 0]) / 2 y_c = (anchors_[:, 3] + anchors_[:, 1]) / 2 h = anchors_[:, 2] - anchors_[:, 0] + 1 w = anchors_[:, 3] - anchors_[:, 1] + 1 theta = -90 * tf.ones_like(x_c) anchors_ = tf.transpose(tf.stack([x_c, y_c, w, h, theta])) if self.cfgs.ANGLE_RANGE == 180: anchors_ = tf.py_func(coordinate_present_convert, inp=[anchors_, -1], Tout=[tf.float32]) anchors_ = tf.reshape(anchors_, [-1, 5]) boxes_pred = bbox_transform.rbbox_transform_inv( boxes=anchors_, deltas=rpn_bbox_pred_) x, y, w, h, _ = tf.unstack(boxes_pred, axis=1) theta = tf.atan(rpn_bbox_pred_[:, -2] / rpn_bbox_pred_[:, -1]) * 180 / 3.1415926 boxes_pred = tf.transpose(tf.stack([x, y, w, h, theta])) if self.cfgs.ANGLE_RANGE == 180: boxes_pred = tf.py_func(coordinate_present_convert, inp=[boxes_pred, 1, False], Tout=[tf.float32]) boxes_pred = tf.reshape(boxes_pred, [-1, 5]) # max_output_size = 4000 if 'DOTA' in self.cfgs.NET_NAME else 200 max_output_size = 100 nms_indices = nms_rotate.nms_rotate( decode_boxes=boxes_pred, scores=scores, iou_threshold=self.cfgs.NMS_IOU_THRESHOLD, max_output_size=100 if self.is_training else max_output_size, use_gpu=True, gpu_id=gpu_id) tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, nms_indices), [-1, 5]) tmp_scores = tf.reshape(tf.gather(scores, nms_indices), [ -1, ]) return_boxes_pred.append(tmp_boxes_pred) return_scores.append(tmp_scores) return_labels.append(tf.ones_like(tmp_scores) * (j + 1)) return_boxes_pred = tf.concat(return_boxes_pred, axis=0) return_scores = tf.concat(return_scores, axis=0) return_labels = tf.concat(return_labels, axis=0) return return_boxes_pred, return_scores, return_labels