Exemple #1
0
 def __init__(self, cfgs, is_training):
     super(DetectionNetworkR2CNN, self).__init__(cfgs, is_training)
     self.anchor_sampler_r2cnn = AnchorSamplerR2CNN(cfgs)
     self.proposal_sampler_r2cnn = ProposalSamplerR2CNN(cfgs)
     self.losses = Loss(cfgs)
     self.roi_extractor = RoIExtractor(cfgs)
     self.box_head = BoxHead(cfgs)
Exemple #2
0
 def __init__(self, cfgs, is_training):
     super(DetectionNetworkRefineRetinaNet,
           self).__init__(cfgs, is_training)
     self.anchor_sampler_retinenet = AnchorSamplerRetinaNet(cfgs)
     self.refine_anchor_sampler_r3det = RefineAnchorSamplerR3Det(cfgs)
     self.losses = Loss(self.cfgs)
Exemple #3
0
class DetectionNetworkRefineRetinaNet(DetectionNetworkBase):
    def __init__(self, cfgs, is_training):
        super(DetectionNetworkRefineRetinaNet,
              self).__init__(cfgs, is_training)
        self.anchor_sampler_retinenet = AnchorSamplerRetinaNet(cfgs)
        self.refine_anchor_sampler_r3det = RefineAnchorSamplerR3Det(cfgs)
        self.losses = Loss(self.cfgs)

    def refine_cls_net(self, inputs, scope_list, reuse_flag, level):
        rpn_conv2d_3x3 = inputs
        for i in range(self.cfgs.NUM_SUBNET_CONV):
            rpn_conv2d_3x3 = slim.conv2d(
                inputs=rpn_conv2d_3x3,
                num_outputs=self.cfgs.FPN_CHANNEL,
                kernel_size=[3, 3],
                stride=1,
                activation_fn=tf.nn.relu,
                weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
                biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER,
                trainable=self.is_training,
                scope='{}_{}'.format(scope_list[0], i),
                reuse=reuse_flag)

        rpn_box_scores = slim.conv2d(
            rpn_conv2d_3x3,
            num_outputs=self.cfgs.CLASS_NUM * self.num_anchors_per_location,
            kernel_size=[3, 3],
            stride=1,
            weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
            biases_initializer=self.cfgs.FINAL_CONV_BIAS_INITIALIZER,
            trainable=self.is_training,
            scope=scope_list[2],
            activation_fn=None,
            reuse=reuse_flag)

        rpn_box_scores = tf.reshape(
            rpn_box_scores, [-1, self.cfgs.CLASS_NUM],
            name='refine_{}_classification_reshape'.format(level))
        rpn_box_probs = tf.sigmoid(
            rpn_box_scores,
            name='refine_{}_classification_sigmoid'.format(level))

        return rpn_box_scores, rpn_box_probs

    def refine_reg_net(self, inputs, scope_list, reuse_flag, level):
        rpn_conv2d_3x3 = inputs
        for i in range(self.cfgs.NUM_SUBNET_CONV):
            rpn_conv2d_3x3 = slim.conv2d(
                inputs=rpn_conv2d_3x3,
                num_outputs=self.cfgs.FPN_CHANNEL,
                kernel_size=[3, 3],
                weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
                biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER,
                stride=1,
                activation_fn=tf.nn.relu,
                trainable=self.is_training,
                scope='{}_{}'.format(scope_list[1], i),
                reuse=reuse_flag)

        rpn_delta_boxes = slim.conv2d(
            rpn_conv2d_3x3,
            num_outputs=5 * self.num_anchors_per_location,
            kernel_size=[3, 3],
            stride=1,
            weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
            biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER,
            trainable=self.is_training,
            scope=scope_list[3],
            activation_fn=None,
            reuse=reuse_flag)

        rpn_delta_boxes = tf.reshape(
            rpn_delta_boxes, [-1, 5],
            name='refine_{}_regression_reshape'.format(level))
        return rpn_delta_boxes

    def refine_net(self, feature_pyramid, name):

        refine_delta_boxes_list = []
        refine_scores_list = []
        refine_probs_list = []
        with tf.variable_scope(name):
            with slim.arg_scope([slim.conv2d],
                                weights_regularizer=slim.l2_regularizer(
                                    self.cfgs.WEIGHT_DECAY)):
                for level in self.cfgs.LEVEL:

                    if self.cfgs.SHARE_NET:
                        reuse_flag = None if level == self.cfgs.LEVEL[
                            0] else True
                        scope_list = [
                            'conv2d_3x3_cls', 'conv2d_3x3_reg',
                            'refine_classification', 'refine_regression'
                        ]
                    else:
                        reuse_flag = None
                        scope_list = [
                            'conv2d_3x3_cls_' + level,
                            'conv2d_3x3_reg_' + level,
                            'refine_classification_' + level,
                            'refine_regression_' + level
                        ]

                    refine_box_scores, refine_box_probs = self.refine_cls_net(
                        feature_pyramid[level], scope_list, reuse_flag, level)
                    refine_delta_boxes = self.refine_reg_net(
                        feature_pyramid[level], scope_list, reuse_flag, level)

                    refine_scores_list.append(refine_box_scores)
                    refine_probs_list.append(refine_box_probs)
                    refine_delta_boxes_list.append(refine_delta_boxes)

            return refine_delta_boxes_list, refine_scores_list, refine_probs_list

    def refine_stage(self, input_img_batch, gtboxes_batch_r, box_pred_list,
                     cls_prob_list, proposal_list, feature_pyramid, gpu_id,
                     pos_threshold, neg_threshold, stage):
        with tf.variable_scope('refine_feature_pyramid{}'.format(stage)):
            refine_boxes_list = []

            for box_pred, cls_prob, proposal, stride, level in \
                    zip(box_pred_list, cls_prob_list, proposal_list,
                        self.cfgs.ANCHOR_STRIDE, self.cfgs.LEVEL):

                if stage == '' and self.cfgs.METHOD == 'H':
                    x_c = (proposal[:, 2] + proposal[:, 0]) / 2
                    y_c = (proposal[:, 3] + proposal[:, 1]) / 2
                    h = proposal[:, 2] - proposal[:, 0] + 1
                    w = proposal[:, 3] - proposal[:, 1] + 1
                    theta = -90 * tf.ones_like(x_c)
                    proposal = tf.transpose(tf.stack([x_c, y_c, w, h, theta]))

                bboxes = bbox_transform.rbbox_transform_inv(boxes=proposal,
                                                            deltas=box_pred)
                refine_boxes_list.append(bboxes)

            refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list = self.refine_net(
                feature_pyramid, 'refine_net{}'.format(stage))

            refine_box_pred = tf.concat(refine_box_pred_list, axis=0)
            refine_cls_score = tf.concat(refine_cls_score_list, axis=0)
            # refine_cls_prob = tf.concat(refine_cls_prob_list, axis=0)
            refine_boxes = tf.concat(refine_boxes_list, axis=0)

        if self.is_training:
            with tf.variable_scope('build_refine_loss{}'.format(stage)):
                refine_labels, refine_target_delta, refine_box_states, refine_target_boxes = tf.py_func(
                    func=self.refine_anchor_sampler_r3det.
                    refine_anchor_target_layer,
                    inp=[
                        gtboxes_batch_r, refine_boxes, pos_threshold,
                        neg_threshold, gpu_id
                    ],
                    Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

                self.add_anchor_img_smry(input_img_batch, refine_boxes,
                                         refine_box_states, 1)

                refine_cls_loss = self.losses.focal_loss(
                    refine_labels, refine_cls_score, refine_box_states)
                if self.cfgs.USE_IOU_FACTOR:
                    refine_reg_loss = self.losses.iou_smooth_l1_loss_exp(
                        refine_target_delta,
                        refine_box_pred,
                        refine_box_states,
                        refine_target_boxes,
                        refine_boxes,
                        is_refine=True)
                else:
                    refine_reg_loss = self.losses.smooth_l1_loss(
                        refine_target_delta, refine_box_pred,
                        refine_box_states)

                self.losses_dict['refine_cls_loss{}'.format(
                    stage)] = refine_cls_loss * self.cfgs.CLS_WEIGHT
                self.losses_dict['refine_reg_loss{}'.format(
                    stage)] = refine_reg_loss * self.cfgs.REG_WEIGHT

        return refine_box_pred_list, refine_cls_prob_list, refine_boxes_list

    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_batch_h=None,
                                      gtboxes_batch_r=None,
                                      gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)

            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

        # 1. build backbone
        feature_pyramid = self.build_backbone(input_img_batch)

        # 2. build rpn
        rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net(
            feature_pyramid, 'rpn_net')
        rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0)
        rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0)
        # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0)

        # 3. generate anchors
        anchor_list = self.make_anchors(feature_pyramid)
        anchors = tf.concat(anchor_list, axis=0)

        # 4. build loss
        if self.is_training:
            with tf.variable_scope('build_loss'):
                labels, target_delta, anchor_states, target_boxes = tf.py_func(
                    func=self.anchor_sampler_retinenet.anchor_target_layer,
                    inp=[gtboxes_batch_h, gtboxes_batch_r, anchors, gpu_id],
                    Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

                if self.method == 'H':
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 0)
                else:
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 1)

                cls_loss = self.losses.focal_loss(labels, rpn_cls_score,
                                                  anchor_states)

                if self.cfgs.USE_IOU_FACTOR:
                    reg_loss = self.losses.iou_smooth_l1_loss_exp(
                        target_delta,
                        rpn_box_pred,
                        anchor_states,
                        target_boxes,
                        anchors,
                        alpha=self.cfgs.ALPHA,
                        beta=self.cfgs.BETA)
                else:
                    reg_loss = self.losses.smooth_l1_loss(
                        target_delta, rpn_box_pred, anchor_states)

                self.losses_dict['cls_loss'] = cls_loss * self.cfgs.CLS_WEIGHT
                self.losses_dict['reg_loss'] = reg_loss * self.cfgs.REG_WEIGHT

        box_pred_list, cls_prob_list, proposal_list = rpn_box_pred_list, rpn_cls_prob_list, anchor_list

        all_box_pred_list, all_cls_prob_list, all_proposal_list = [], [], []

        for i in range(self.cfgs.NUM_REFINE_STAGE):
            box_pred_list, cls_prob_list, proposal_list = self.refine_stage(
                input_img_batch,
                gtboxes_batch_r,
                box_pred_list,
                cls_prob_list,
                proposal_list,
                feature_pyramid,
                gpu_id,
                pos_threshold=self.cfgs.REFINE_IOU_POSITIVE_THRESHOLD[i],
                neg_threshold=self.cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[i],
                stage='' if i == 0 else '_stage{}'.format(i + 2))

            if not self.is_training:
                all_box_pred_list.extend(box_pred_list)
                all_cls_prob_list.extend(cls_prob_list)
                all_proposal_list.extend(proposal_list)
            else:
                all_box_pred_list, all_cls_prob_list, all_proposal_list = box_pred_list, cls_prob_list, proposal_list

        # 5. postprocess
        with tf.variable_scope('postprocess_detctions'):
            box_pred = tf.concat(all_box_pred_list, axis=0)
            cls_prob = tf.concat(all_cls_prob_list, axis=0)
            proposal = tf.concat(all_proposal_list, axis=0)

            boxes, scores, category = self.postprocess_detctions(
                refine_bbox_pred=box_pred,
                refine_cls_prob=cls_prob,
                anchors=proposal)
            boxes = tf.stop_gradient(boxes)
            scores = tf.stop_gradient(scores)
            category = tf.stop_gradient(category)

        if self.is_training:
            return boxes, scores, category, self.losses_dict
        else:
            return boxes, scores, category

    def postprocess_detctions(self, refine_bbox_pred, refine_cls_prob,
                              anchors):
        def filter_detections(boxes, scores):
            """
            :param boxes: [-1, 4]
            :param scores: [-1, ]
            :param labels: [-1, ]
            :return:
            """
            if self.is_training:
                indices = tf.reshape(
                    tf.where(tf.greater(scores, self.cfgs.VIS_SCORE)), [
                        -1,
                    ])
            else:
                indices = tf.reshape(
                    tf.where(tf.greater(scores, self.cfgs.FILTERED_SCORE)), [
                        -1,
                    ])

            if self.cfgs.NMS:
                filtered_boxes = tf.gather(boxes, indices)
                filtered_scores = tf.gather(scores, indices)

                # perform NMS

                nms_indices = nms_rotate.nms_rotate(
                    decode_boxes=filtered_boxes,
                    scores=filtered_scores,
                    iou_threshold=self.cfgs.NMS_IOU_THRESHOLD,
                    max_output_size=100 if self.is_training else 1000,
                    use_gpu=False)

                # filter indices based on NMS
                indices = tf.gather(indices, nms_indices)

            # add indices to list of all indices
            return indices

        boxes_pred = bbox_transform.rbbox_transform_inv(
            boxes=anchors,
            deltas=refine_bbox_pred,
            scale_factors=self.cfgs.ANCHOR_SCALE_FACTORS)

        return_boxes_pred = []
        return_scores = []
        return_labels = []
        for j in range(0, self.cfgs.CLASS_NUM):
            indices = filter_detections(boxes_pred, refine_cls_prob[:, j])
            tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, indices),
                                        [-1, 5])
            tmp_scores = tf.reshape(tf.gather(refine_cls_prob[:, j], indices),
                                    [
                                        -1,
                                    ])

            return_boxes_pred.append(tmp_boxes_pred)
            return_scores.append(tmp_scores)
            return_labels.append(tf.ones_like(tmp_scores) * (j + 1))

        return_boxes_pred = tf.concat(return_boxes_pred, axis=0)
        return_scores = tf.concat(return_scores, axis=0)
        return_labels = tf.concat(return_labels, axis=0)

        return return_boxes_pred, return_scores, return_labels
class DetectionNetworkR3Det(DetectionNetworkBase):
    def __init__(self, cfgs, is_training):
        super(DetectionNetworkR3Det, self).__init__(cfgs, is_training)
        self.anchor_sampler_retinenet = AnchorSamplerRetinaNet(cfgs)
        self.refine_anchor_sampler_r3det = RefineAnchorSamplerR3Det(cfgs)
        self.losses = Loss(self.cfgs)

    def refine_cls_net(self, inputs, scope_list, reuse_flag, level):
        rpn_conv2d_3x3 = inputs
        for i in range(self.cfgs.NUM_SUBNET_CONV):
            rpn_conv2d_3x3 = slim.conv2d(
                inputs=rpn_conv2d_3x3,
                num_outputs=self.cfgs.FPN_CHANNEL,
                kernel_size=[3, 3],
                stride=1,
                activation_fn=None if self.cfgs.USE_GN else tf.nn.relu,
                weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
                biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER,
                trainable=self.is_training,
                scope='{}_{}'.format(scope_list[0], i),
                reuse=reuse_flag)

            if self.cfgs.USE_GN:
                rpn_conv2d_3x3 = tf.contrib.layers.group_norm(rpn_conv2d_3x3)
                rpn_conv2d_3x3 = tf.nn.relu(rpn_conv2d_3x3)

        rpn_box_scores = slim.conv2d(
            rpn_conv2d_3x3,
            num_outputs=self.cfgs.CLASS_NUM,
            kernel_size=[3, 3],
            stride=1,
            weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
            biases_initializer=self.cfgs.FINAL_CONV_BIAS_INITIALIZER,
            scope=scope_list[2],
            trainable=self.is_training,
            activation_fn=None,
            reuse=reuse_flag)

        rpn_box_scores = tf.reshape(
            rpn_box_scores, [-1, self.cfgs.CLASS_NUM],
            name='refine_{}_classification_reshape'.format(level))
        rpn_box_probs = tf.sigmoid(
            rpn_box_scores,
            name='refine_{}_classification_sigmoid'.format(level))

        return rpn_box_scores, rpn_box_probs

    def refine_reg_net(self, inputs, scope_list, reuse_flag, level):
        rpn_conv2d_3x3 = inputs
        for i in range(self.cfgs.NUM_SUBNET_CONV):
            rpn_conv2d_3x3 = slim.conv2d(
                inputs=rpn_conv2d_3x3,
                num_outputs=self.cfgs.FPN_CHANNEL,
                kernel_size=[3, 3],
                weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
                biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER,
                stride=1,
                activation_fn=None if self.cfgs.USE_GN else tf.nn.relu,
                scope='{}_{}'.format(scope_list[1], i),
                trainable=self.is_training,
                reuse=reuse_flag)

            if self.cfgs.USE_GN:
                rpn_conv2d_3x3 = tf.contrib.layers.group_norm(rpn_conv2d_3x3)
                rpn_conv2d_3x3 = tf.nn.relu(rpn_conv2d_3x3)

        rpn_delta_boxes = slim.conv2d(
            rpn_conv2d_3x3,
            num_outputs=5,
            kernel_size=[3, 3],
            stride=1,
            weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
            biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER,
            scope=scope_list[3],
            trainable=self.is_training,
            activation_fn=None,
            reuse=reuse_flag)

        rpn_delta_boxes = tf.reshape(
            rpn_delta_boxes, [-1, 5],
            name='refine_{}_regression_reshape'.format(level))
        return rpn_delta_boxes

    def refine_net(self, feature_pyramid, name):

        refine_delta_boxes_list = []
        refine_scores_list = []
        refine_probs_list = []
        with tf.variable_scope(name):
            with slim.arg_scope([slim.conv2d],
                                weights_regularizer=slim.l2_regularizer(
                                    self.cfgs.WEIGHT_DECAY)):
                for level in self.cfgs.LEVEL:

                    if self.cfgs.SHARE_NET:
                        reuse_flag = None if level == self.cfgs.LEVEL[
                            0] else True
                        scope_list = [
                            'conv2d_3x3_cls', 'conv2d_3x3_reg',
                            'refine_classification', 'refine_regression'
                        ]
                    else:
                        reuse_flag = None
                        scope_list = [
                            'conv2d_3x3_cls_' + level,
                            'conv2d_3x3_reg_' + level,
                            'refine_classification_' + level,
                            'refine_regression_' + level
                        ]

                    refine_box_scores, refine_box_probs = self.refine_cls_net(
                        feature_pyramid[level], scope_list, reuse_flag, level)
                    refine_delta_boxes = self.refine_reg_net(
                        feature_pyramid[level], scope_list, reuse_flag, level)

                    refine_scores_list.append(refine_box_scores)
                    refine_probs_list.append(refine_box_probs)
                    refine_delta_boxes_list.append(refine_delta_boxes)

            return refine_delta_boxes_list, refine_scores_list, refine_probs_list

    def refine_feature_op(self, points, feature_map, name):

        h, w = tf.cast(tf.shape(feature_map)[1],
                       tf.int32), tf.cast(tf.shape(feature_map)[2], tf.int32)

        xmin = tf.maximum(0.0, tf.floor(points[:, 0]))
        xmin = tf.minimum(tf.cast(w - 1, tf.float32), tf.ceil(xmin))

        ymin = tf.maximum(0.0, tf.floor(points[:, 1]))
        ymin = tf.minimum(tf.cast(h - 1, tf.float32), tf.ceil(ymin))

        xmax = tf.minimum(tf.cast(w - 1, tf.float32), tf.ceil(points[:, 0]))
        xmax = tf.maximum(0.0, tf.floor(xmax))

        ymax = tf.minimum(tf.cast(h - 1, tf.float32), tf.ceil(points[:, 1]))
        ymax = tf.maximum(0.0, tf.floor(ymax))

        left_top = tf.cast(tf.transpose(tf.stack([ymin, xmin], axis=0)),
                           tf.int32)
        right_bottom = tf.cast(tf.transpose(tf.stack([ymax, xmax], axis=0)),
                               tf.int32)
        left_bottom = tf.cast(tf.transpose(tf.stack([ymax, xmin], axis=0)),
                              tf.int32)
        right_top = tf.cast(tf.transpose(tf.stack([ymin, xmax], axis=0)),
                            tf.int32)

        feature_1x5 = slim.conv2d(
            inputs=feature_map,
            num_outputs=self.cfgs.FPN_CHANNEL,
            kernel_size=[1, 5],
            weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
            biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER,
            stride=1,
            activation_fn=None,
            trainable=self.is_training,
            scope='refine_1x5_{}'.format(name))

        feature5x1 = slim.conv2d(
            inputs=feature_1x5,
            num_outputs=self.cfgs.FPN_CHANNEL,
            kernel_size=[5, 1],
            weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
            biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER,
            stride=1,
            activation_fn=None,
            trainable=self.is_training,
            scope='refine_5x1_{}'.format(name))

        feature_1x1 = slim.conv2d(
            inputs=feature_map,
            num_outputs=self.cfgs.FPN_CHANNEL,
            kernel_size=[1, 1],
            weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
            biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER,
            stride=1,
            activation_fn=None,
            trainable=self.is_training,
            scope='refine_1x1_{}'.format(name))

        feature = feature5x1 + feature_1x1
        # feature = feature_map

        left_top_feature = tf.gather_nd(tf.squeeze(feature), left_top)
        right_bottom_feature = tf.gather_nd(tf.squeeze(feature), right_bottom)
        left_bottom_feature = tf.gather_nd(tf.squeeze(feature), left_bottom)
        right_top_feature = tf.gather_nd(tf.squeeze(feature), right_top)

        refine_feature = right_bottom_feature * tf.tile(
            tf.reshape((tf.abs((points[:, 0] - xmin) * (points[:, 1] - ymin))), [-1, 1]),
            [1, self.cfgs.FPN_CHANNEL]) \
                         + left_top_feature * tf.tile(
            tf.reshape((tf.abs((xmax - points[:, 0]) * (ymax - points[:, 1]))), [-1, 1]),
            [1, self.cfgs.FPN_CHANNEL]) \
                         + right_top_feature * tf.tile(
            tf.reshape((tf.abs((points[:, 0] - xmin) * (ymax - points[:, 1]))), [-1, 1]),
            [1, self.cfgs.FPN_CHANNEL]) \
                         + left_bottom_feature * tf.tile(
            tf.reshape((tf.abs((xmax - points[:, 0]) * (points[:, 1] - ymin))), [-1, 1]),
            [1, self.cfgs.FPN_CHANNEL])

        refine_feature = tf.reshape(refine_feature, [
            1,
            tf.cast(h, tf.int32),
            tf.cast(w, tf.int32), self.cfgs.FPN_CHANNEL
        ])

        # refine_feature = tf.reshape(refine_feature, [1, tf.cast(feature_size[1], tf.int32),
        #                                              tf.cast(feature_size[0], tf.int32), 256])

        return refine_feature + feature

    def refine_feature_five_op(self, points, feature_map, name):

        h, w = tf.cast(tf.shape(feature_map)[1],
                       tf.int32), tf.cast(tf.shape(feature_map)[2], tf.int32)

        feature_1x5 = slim.conv2d(
            inputs=feature_map,
            num_outputs=self.cfgs.FPN_CHANNEL,
            kernel_size=[1, 5],
            weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
            biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER,
            stride=1,
            activation_fn=None,
            trainable=self.is_training,
            scope='refine_1x5_{}'.format(name))

        feature5x1 = slim.conv2d(
            inputs=feature_1x5,
            num_outputs=self.cfgs.FPN_CHANNEL,
            kernel_size=[5, 1],
            weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
            biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER,
            stride=1,
            activation_fn=None,
            trainable=self.is_training,
            scope='refine_5x1_{}'.format(name))

        feature_1x1 = slim.conv2d(
            inputs=feature_map,
            num_outputs=self.cfgs.FPN_CHANNEL,
            kernel_size=[1, 1],
            weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
            biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER,
            stride=1,
            activation_fn=None,
            trainable=self.is_training,
            scope='refine_1x1_{}'.format(name))

        feature = feature5x1 + feature_1x1

        for i in range(5):
            xmin = tf.maximum(0.0, tf.floor(points[:, 0 + 2 * (i - 1)]))
            ymin = tf.maximum(0.0, tf.floor(points[:, 1 + 2 * (i - 1)]))
            xmax = tf.minimum(tf.cast(w - 1, tf.float32),
                              tf.ceil(points[:, 0 + 2 * (i - 1)]))
            ymax = tf.minimum(tf.cast(h - 1, tf.float32),
                              tf.ceil(points[:, 1 + 2 * (i - 1)]))

            left_top = tf.cast(tf.transpose(tf.stack([ymin, xmin], axis=0)),
                               tf.int32)
            right_bottom = tf.cast(
                tf.transpose(tf.stack([ymax, xmax], axis=0)), tf.int32)
            left_bottom = tf.cast(tf.transpose(tf.stack([ymax, xmin], axis=0)),
                                  tf.int32)
            right_top = tf.cast(tf.transpose(tf.stack([ymin, xmax], axis=0)),
                                tf.int32)

            left_top_feature = tf.gather_nd(tf.squeeze(feature), left_top)
            right_bottom_feature = tf.gather_nd(tf.squeeze(feature),
                                                right_bottom)
            left_bottom_feature = tf.gather_nd(tf.squeeze(feature),
                                               left_bottom)
            right_top_feature = tf.gather_nd(tf.squeeze(feature), right_top)

            refine_feature = right_bottom_feature * tf.tile(
                tf.reshape((tf.abs((points[:, 0+2*(i-1)] - xmin) * (points[:, 1+2*(i-1)] - ymin))), [-1, 1]),
                [1, self.cfgs.FPN_CHANNEL]) \
                             + left_top_feature * tf.tile(
                tf.reshape((tf.abs((xmax - points[:, 0+2*(i-1)]) * (ymax - points[:, 1+2*(i-1)]))), [-1, 1]),
                [1, self.cfgs.FPN_CHANNEL]) \
                             + right_top_feature * tf.tile(
                tf.reshape((tf.abs((points[:, 0+2*(i-1)] - xmin) * (ymax - points[:, 1+2*(i-1)]))), [-1, 1]),
                [1, self.cfgs.FPN_CHANNEL]) \
                             + left_bottom_feature * tf.tile(
                tf.reshape((tf.abs((xmax - points[:, 0+2*(i-1)]) * (points[:, 1+2*(i-1)] - ymin))), [-1, 1]),
                [1, self.cfgs.FPN_CHANNEL])

            refine_feature = tf.reshape(refine_feature, [
                1,
                tf.cast(h, tf.int32),
                tf.cast(w, tf.int32), self.cfgs.FPN_CHANNEL
            ])

            feature += refine_feature

        return feature

    def refine_stage(self,
                     input_img_batch,
                     gtboxes_batch_r,
                     box_pred_list,
                     cls_prob_list,
                     proposal_list,
                     feature_pyramid,
                     gpu_id,
                     pos_threshold,
                     neg_threshold,
                     stage,
                     proposal_filter=False):
        with tf.variable_scope('refine_feature_pyramid{}'.format(stage)):
            refine_feature_pyramid = {}
            refine_boxes_list = []

            for box_pred, cls_prob, proposal, stride, level in \
                    zip(box_pred_list, cls_prob_list, proposal_list,
                        self.cfgs.ANCHOR_STRIDE, self.cfgs.LEVEL):

                if proposal_filter:
                    box_pred = tf.reshape(
                        box_pred, [-1, self.num_anchors_per_location, 5])
                    proposal = tf.reshape(proposal, [
                        -1, self.num_anchors_per_location,
                        5 if self.method == 'R' else 4
                    ])
                    cls_prob = tf.reshape(cls_prob, [
                        -1, self.num_anchors_per_location, self.cfgs.CLASS_NUM
                    ])

                    cls_max_prob = tf.reduce_max(cls_prob, axis=-1)
                    box_pred_argmax = tf.cast(
                        tf.reshape(tf.argmax(cls_max_prob, axis=-1), [-1, 1]),
                        tf.int32)
                    indices = tf.cast(
                        tf.cumsum(tf.ones_like(box_pred_argmax), axis=0),
                        tf.int32) - tf.constant(1, tf.int32)
                    indices = tf.concat([indices, box_pred_argmax], axis=-1)

                    box_pred = tf.reshape(tf.gather_nd(box_pred, indices),
                                          [-1, 5])
                    proposal = tf.reshape(tf.gather_nd(proposal, indices),
                                          [-1, 5 if self.method == 'R' else 4])

                    if self.cfgs.METHOD == 'H':
                        x_c = (proposal[:, 2] + proposal[:, 0]) / 2
                        y_c = (proposal[:, 3] + proposal[:, 1]) / 2
                        h = proposal[:, 2] - proposal[:, 0] + 1
                        w = proposal[:, 3] - proposal[:, 1] + 1
                        theta = -90 * tf.ones_like(x_c)
                        proposal = tf.transpose(
                            tf.stack([x_c, y_c, w, h, theta]))
                else:
                    box_pred = tf.reshape(box_pred, [-1, 5])
                    proposal = tf.reshape(proposal, [-1, 5])

                bboxes = bbox_transform.rbbox_transform_inv(boxes=proposal,
                                                            deltas=box_pred)
                refine_boxes_list.append(bboxes)

                center_point = bboxes[:, :2] / stride
                refine_feature_pyramid[level] = self.refine_feature_op(
                    points=center_point,
                    feature_map=feature_pyramid[level],
                    name=level)
                # points = coordinate5_2_8_tf(bboxes) / stride
                # refine_feature_pyramid[level] = self.refine_feature_five_op(points=points,
                #                                                             feature_map=feature_pyramid[level],
                #                                                             name=level)

            refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list = self.refine_net(
                refine_feature_pyramid, 'refine_net{}'.format(stage))

            refine_box_pred = tf.concat(refine_box_pred_list, axis=0)
            refine_cls_score = tf.concat(refine_cls_score_list, axis=0)
            # refine_cls_prob = tf.concat(refine_cls_prob_list, axis=0)
            refine_boxes = tf.concat(refine_boxes_list, axis=0)

        if self.is_training:
            with tf.variable_scope('build_refine_loss{}'.format(stage)):
                refine_labels, refine_target_delta, refine_box_states, refine_target_boxes = tf.py_func(
                    func=self.refine_anchor_sampler_r3det.
                    refine_anchor_target_layer,
                    inp=[
                        gtboxes_batch_r, refine_boxes, pos_threshold,
                        neg_threshold, gpu_id
                    ],
                    Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

                self.add_anchor_img_smry(input_img_batch, refine_boxes,
                                         refine_box_states, 1)

                refine_cls_loss = self.losses.focal_loss(
                    refine_labels, refine_cls_score, refine_box_states)
                if self.cfgs.USE_IOU_FACTOR:
                    refine_reg_loss = self.losses.iou_smooth_l1_loss_exp(
                        refine_target_delta,
                        refine_box_pred,
                        refine_box_states,
                        refine_target_boxes,
                        refine_boxes,
                        is_refine=True)
                else:
                    refine_reg_loss = self.losses.smooth_l1_loss(
                        refine_target_delta, refine_box_pred,
                        refine_box_states)

                self.losses_dict['refine_cls_loss{}'.format(
                    stage)] = refine_cls_loss * self.cfgs.CLS_WEIGHT
                self.losses_dict['refine_reg_loss{}'.format(
                    stage)] = refine_reg_loss * self.cfgs.REG_WEIGHT

        return refine_box_pred_list, refine_cls_prob_list, refine_boxes_list

    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_batch_h=None,
                                      gtboxes_batch_r=None,
                                      gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)

            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

        if self.cfgs.USE_GN:
            input_img_batch = tf.reshape(
                input_img_batch,
                [1, self.cfgs.IMG_SHORT_SIDE_LEN, self.cfgs.IMG_MAX_LENGTH, 3])

        # 1. build backbone
        feature_pyramid = self.build_backbone(input_img_batch)

        # 2. build rpn
        rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net(
            feature_pyramid, 'rpn_net')
        rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0)
        rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0)
        # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0)

        # 3. generate anchors
        anchor_list = self.make_anchors(feature_pyramid, use_tf=True)
        anchors = tf.concat(anchor_list, axis=0)

        # 4. build loss
        if self.is_training:
            with tf.variable_scope('build_loss'):
                labels, target_delta, anchor_states, target_boxes = tf.py_func(
                    func=self.anchor_sampler_retinenet.anchor_target_layer,
                    inp=[gtboxes_batch_h, gtboxes_batch_r, anchors, gpu_id],
                    Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

                if self.method == 'H':
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 0)
                else:
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 1)

                cls_loss = self.losses.focal_loss(labels, rpn_cls_score,
                                                  anchor_states)

                if self.cfgs.USE_IOU_FACTOR:
                    reg_loss = self.losses.iou_smooth_l1_loss_exp(
                        target_delta,
                        rpn_box_pred,
                        anchor_states,
                        target_boxes,
                        anchors,
                        alpha=self.cfgs.ALPHA,
                        beta=self.cfgs.BETA)
                else:
                    reg_loss = self.losses.smooth_l1_loss(
                        target_delta, rpn_box_pred, anchor_states)

                self.losses_dict['cls_loss'] = cls_loss * self.cfgs.CLS_WEIGHT
                self.losses_dict['reg_loss'] = reg_loss * self.cfgs.REG_WEIGHT

        box_pred_list, cls_prob_list, proposal_list = rpn_box_pred_list, rpn_cls_prob_list, anchor_list

        all_box_pred_list, all_cls_prob_list, all_proposal_list = [], [], []

        for i in range(self.cfgs.NUM_REFINE_STAGE):
            box_pred_list, cls_prob_list, proposal_list = self.refine_stage(
                input_img_batch,
                gtboxes_batch_r,
                box_pred_list,
                cls_prob_list,
                proposal_list,
                feature_pyramid,
                gpu_id,
                pos_threshold=self.cfgs.REFINE_IOU_POSITIVE_THRESHOLD[i],
                neg_threshold=self.cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[i],
                stage='' if i == 0 else '_stage{}'.format(i + 2),
                proposal_filter=True if i == 0 else False)

            if not self.is_training:
                all_box_pred_list.extend(box_pred_list)
                all_cls_prob_list.extend(cls_prob_list)
                all_proposal_list.extend(proposal_list)
            else:
                all_box_pred_list, all_cls_prob_list, all_proposal_list = box_pred_list, cls_prob_list, proposal_list

        box_pred = tf.concat(all_box_pred_list, axis=0)
        cls_prob = tf.concat(all_cls_prob_list, axis=0)
        proposal = tf.concat(all_proposal_list, axis=0)
        return box_pred, cls_prob, proposal
class DetectionNetworkRetinaNet(DetectionNetworkBase):
    def __init__(self, cfgs, is_training):
        super(DetectionNetworkRetinaNet, self).__init__(cfgs, is_training)
        self.anchor_sampler_retinenet = AnchorSamplerRetinaNet(cfgs)
        self.losses = Loss(self.cfgs)

    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_batch_h=None,
                                      gtboxes_batch_r=None,
                                      gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)

            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

        if self.cfgs.USE_GN:
            input_img_batch = tf.reshape(
                input_img_batch,
                [1, self.cfgs.IMG_SHORT_SIDE_LEN, self.cfgs.IMG_MAX_LENGTH, 3])

        # 1. build backbone
        feature_pyramid = self.build_backbone(input_img_batch)

        # 2. build rpn
        rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net(
            feature_pyramid, 'rpn_net')
        rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0)
        rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0)
        rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0)

        # 3. generate anchors
        anchor_list = self.make_anchors(feature_pyramid)
        anchors = tf.concat(anchor_list, axis=0)

        # 4. build loss
        if self.is_training:
            with tf.variable_scope('build_loss'):
                labels, target_delta, anchor_states, target_boxes = tf.py_func(
                    func=self.anchor_sampler_retinenet.anchor_target_layer,
                    inp=[gtboxes_batch_h, gtboxes_batch_r, anchors, gpu_id],
                    Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

                if self.method == 'H':
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 0)
                else:
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 1)

                cls_loss = self.losses.focal_loss(labels, rpn_cls_score,
                                                  anchor_states)

                if self.cfgs.REG_LOSS_MODE == 0:
                    reg_loss = self.losses.iou_smooth_l1_loss_log(
                        target_delta, rpn_box_pred, anchor_states,
                        target_boxes, anchors)
                elif self.cfgs.REG_LOSS_MODE == 1:
                    reg_loss = self.losses.iou_smooth_l1_loss_exp(
                        target_delta,
                        rpn_box_pred,
                        anchor_states,
                        target_boxes,
                        anchors,
                        alpha=self.cfgs.ALPHA,
                        beta=self.cfgs.BETA)
                else:
                    reg_loss = self.losses.smooth_l1_loss(
                        target_delta, rpn_box_pred, anchor_states)

                self.losses_dict['cls_loss'] = cls_loss * self.cfgs.CLS_WEIGHT
                self.losses_dict['reg_loss'] = reg_loss * self.cfgs.REG_WEIGHT

        # 5. postprocess
        with tf.variable_scope('postprocess_detctions'):
            boxes, scores, category = self.postprocess_detctions(
                rpn_bbox_pred=rpn_box_pred,
                rpn_cls_prob=rpn_cls_prob,
                anchors=anchors,
                gpu_id=gpu_id)
            boxes = tf.stop_gradient(boxes)
            scores = tf.stop_gradient(scores)
            category = tf.stop_gradient(category)

        if self.is_training:
            return boxes, scores, category, self.losses_dict
        else:
            return boxes, scores, category

    def postprocess_detctions(self, rpn_bbox_pred, rpn_cls_prob, anchors,
                              gpu_id):

        return_boxes_pred = []
        return_scores = []
        return_labels = []
        for j in range(0, self.cfgs.CLASS_NUM):
            scores = rpn_cls_prob[:, j]
            if self.is_training:
                indices = tf.reshape(
                    tf.where(tf.greater(scores, self.cfgs.VIS_SCORE)), [
                        -1,
                    ])
            else:
                indices = tf.reshape(
                    tf.where(tf.greater(scores, self.cfgs.FILTERED_SCORE)), [
                        -1,
                    ])

            anchors_ = tf.gather(anchors, indices)
            rpn_bbox_pred_ = tf.gather(rpn_bbox_pred, indices)
            scores = tf.gather(scores, indices)

            if self.method == 'H':
                x_c = (anchors_[:, 2] + anchors_[:, 0]) / 2
                y_c = (anchors_[:, 3] + anchors_[:, 1]) / 2
                h = anchors_[:, 2] - anchors_[:, 0] + 1
                w = anchors_[:, 3] - anchors_[:, 1] + 1
                theta = -90 * tf.ones_like(x_c)
                anchors_ = tf.transpose(tf.stack([x_c, y_c, w, h, theta]))

            if self.cfgs.ANGLE_RANGE == 180:
                anchors_ = tf.py_func(coordinate_present_convert,
                                      inp=[anchors_, -1],
                                      Tout=[tf.float32])
                anchors_ = tf.reshape(anchors_, [-1, 5])

            boxes_pred = bbox_transform.rbbox_transform_inv(
                boxes=anchors_, deltas=rpn_bbox_pred_)

            if self.cfgs.ANGLE_RANGE == 180:
                _, _, _, _, theta = tf.unstack(boxes_pred, axis=1)
                indx = tf.reshape(
                    tf.where(
                        tf.logical_and(tf.less(theta, 0),
                                       tf.greater_equal(theta, -180))), [
                                           -1,
                                       ])
                boxes_pred = tf.gather(boxes_pred, indx)
                scores = tf.gather(scores, indx)

                boxes_pred = tf.py_func(coordinate_present_convert,
                                        inp=[boxes_pred, 1],
                                        Tout=[tf.float32])
                boxes_pred = tf.reshape(boxes_pred, [-1, 5])

            nms_indices = nms_rotate.nms_rotate(
                decode_boxes=boxes_pred,
                scores=scores,
                iou_threshold=self.cfgs.NMS_IOU_THRESHOLD,
                max_output_size=100 if self.is_training else 1000,
                use_gpu=True,
                gpu_id=gpu_id)

            tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, nms_indices),
                                        [-1, 5])
            tmp_scores = tf.reshape(tf.gather(scores, nms_indices), [
                -1,
            ])

            return_boxes_pred.append(tmp_boxes_pred)
            return_scores.append(tmp_scores)
            return_labels.append(tf.ones_like(tmp_scores) * (j + 1))

        return_boxes_pred = tf.concat(return_boxes_pred, axis=0)
        return_scores = tf.concat(return_scores, axis=0)
        return_labels = tf.concat(return_labels, axis=0)

        return return_boxes_pred, return_scores, return_labels
Exemple #6
0
class DetectionNetworkR2CNN(DetectionNetworkBase):
    def __init__(self, cfgs, is_training):
        super(DetectionNetworkR2CNN, self).__init__(cfgs, is_training)
        self.anchor_sampler_r2cnn = AnchorSamplerR2CNN(cfgs)
        self.proposal_sampler_r2cnn = ProposalSamplerR2CNN(cfgs)
        self.losses = Loss(cfgs)
        self.roi_extractor = RoIExtractor(cfgs)
        self.box_head = BoxHead(cfgs)

    def build_loss(self, rpn_box_pred, rpn_bbox_targets, rpn_cls_score,
                   rpn_labels, bbox_pred, bbox_targets, cls_score, labels):

        with tf.variable_scope('build_loss'):

            with tf.variable_scope('rpn_loss'):

                rpn_reg_loss = self.losses.smooth_l1_loss_rpn(
                    bbox_pred=rpn_box_pred,
                    bbox_targets=rpn_bbox_targets,
                    label=rpn_labels,
                    sigma=self.cfgs.RPN_SIGMA)
                rpn_select = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)),
                                        [-1])
                rpn_cls_score = tf.reshape(
                    tf.gather(rpn_cls_score, rpn_select), [-1, 2])
                rpn_labels = tf.reshape(tf.gather(rpn_labels, rpn_select),
                                        [-1])
                rpn_cls_loss = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=rpn_cls_score, labels=rpn_labels))

                self.losses_dict[
                    'rpn_cls_loss'] = rpn_cls_loss * self.cfgs.RPN_CLASSIFICATION_LOSS_WEIGHT
                self.losses_dict[
                    'rpn_reg_loss'] = rpn_reg_loss * self.cfgs.RPN_LOCATION_LOSS_WEIGHT

            with tf.variable_scope('FastRCNN_loss'):
                reg_loss = self.losses.smooth_l1_loss_rcnn_r(
                    bbox_pred=bbox_pred,
                    bbox_targets=bbox_targets,
                    label=labels,
                    num_classes=self.cfgs.CLASS_NUM + 1,
                    sigma=self.cfgs.FASTRCNN_SIGMA)

                # cls_score = tf.reshape(cls_score, [-1, cfgs.CLASS_NUM + 1])
                # labels = tf.reshape(labels, [-1])
                cls_loss = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=cls_score,
                        labels=labels))  # beacause already sample before

                self.losses_dict[
                    'fast_cls_loss'] = cls_loss * self.cfgs.FAST_RCNN_CLASSIFICATION_LOSS_WEIGHT
                self.losses_dict[
                    'fast_reg_loss'] = reg_loss * self.cfgs.FAST_RCNN_LOCATION_LOSS_WEIGHT

    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_batch_h=None,
                                      gtboxes_batch_r=None,
                                      gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)

            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build backbone
        feature_pyramid = self.build_backbone(input_img_batch)

        # 2. build rpn
        fpn_box_pred, fpn_cls_score, fpn_cls_prob = self.rpn(feature_pyramid)

        # 3. generate anchors
        anchor_list = self.make_anchors(feature_pyramid)
        anchors = tf.concat(anchor_list, axis=0)

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_FPN'):
            rois, roi_scores = self.postprocess_rpn_proposals(
                rpn_bbox_pred=fpn_box_pred,
                rpn_cls_prob=fpn_cls_prob,
                img_shape=img_shape,
                anchors=anchors,
                is_training=self.is_training)

        # 5. sample minibatch
        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                fpn_labels, fpn_bbox_targets = \
                    tf.py_func(
                        self.anchor_sampler_r2cnn.anchor_target_layer,
                        [gtboxes_batch_h, img_shape, anchors],
                        [tf.float32, tf.float32])
                fpn_bbox_targets = tf.reshape(fpn_bbox_targets, [-1, 4])
                fpn_labels = tf.to_int32(fpn_labels, name="to_int32")
                fpn_labels = tf.reshape(fpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch,
                                         anchors,
                                         fpn_labels,
                                         method=0)

            fpn_cls_category = tf.argmax(fpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(fpn_labels, -1)),
                                    [-1])
            fpn_cls_category = tf.gather(fpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(fpn_cls_category,
                             tf.to_int64(tf.gather(fpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/fpn_accuracy', acc)

            with tf.control_dependencies([fpn_labels]):

                with tf.variable_scope('sample_RCNN_minibatch'):
                    rois, labels, _, bbox_targets, _, _ = \
                        tf.py_func(self.proposal_sampler_r2cnn.proposal_target_layer,
                                   [rois, gtboxes_batch_h, gtboxes_batch_r],
                                   [tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32])
                    rois = tf.reshape(rois, [-1, 4])
                    labels = tf.to_int32(labels)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets = tf.reshape(
                        bbox_targets, [-1, 5 * (self.cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry(input_img_batch,
                                                rois,
                                                labels,
                                                method=0)

        # 6. assign level
        if self.is_training:
            rois_list, labels, bbox_targets = self.assign_levels(
                all_rois=rois, labels=labels, bbox_targets=bbox_targets)
        else:
            rois_list = self.assign_levels(all_rois=rois)

        # 7. build Fast-RCNN, include roi align/pooling, box head
        bbox_pred, cls_score = self.box_head.fpn_fc_head(
            self.roi_extractor, rois_list, feature_pyramid, img_shape,
            self.is_training)
        rois = tf.concat(rois_list, axis=0, name='concat_rois')
        cls_prob = slim.softmax(cls_score, 'cls_prob')

        if self.is_training:
            cls_category = tf.argmax(cls_prob, axis=1)
            fast_acc = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc', fast_acc)

        #  8. build loss
        if self.is_training:
            self.build_loss(rpn_box_pred=fpn_box_pred,
                            rpn_bbox_targets=fpn_bbox_targets,
                            rpn_cls_score=fpn_cls_score,
                            rpn_labels=fpn_labels,
                            bbox_pred=bbox_pred,
                            bbox_targets=bbox_targets,
                            cls_score=cls_score,
                            labels=labels)

        # 9. postprocess_fastrcnn
        final_bbox, final_scores, final_category = self.postprocess_fastrcnn(
            rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, gpu_id=gpu_id)
        if self.is_training:
            return final_bbox, final_scores, final_category, self.losses_dict
        else:
            return final_bbox, final_scores, final_category

    def postprocess_fastrcnn(self, rois, bbox_ppred, scores, gpu_id):
        '''
        :param rois:[-1, 4]
        :param bbox_ppred: [-1, (cfgs.Class_num+1) * 5]
        :param scores: [-1, cfgs.Class_num + 1]
        :return:
        '''

        with tf.name_scope('postprocess_fastrcnn'):
            rois = tf.stop_gradient(rois)
            scores = tf.stop_gradient(scores)
            bbox_ppred = tf.reshape(bbox_ppred,
                                    [-1, self.cfgs.CLASS_NUM + 1, 5])
            bbox_ppred = tf.stop_gradient(bbox_ppred)

            bbox_pred_list = tf.unstack(bbox_ppred, axis=1)
            score_list = tf.unstack(scores, axis=1)

            allclasses_boxes = []
            allclasses_scores = []
            categories = []

            x_c = (rois[:, 2] + rois[:, 0]) / 2
            y_c = (rois[:, 3] + rois[:, 1]) / 2
            h = rois[:, 2] - rois[:, 0] + 1
            w = rois[:, 3] - rois[:, 1] + 1
            theta = -90 * tf.ones_like(x_c)
            rois = tf.transpose(tf.stack([x_c, y_c, w, h, theta]))
            for i in range(1, self.cfgs.CLASS_NUM + 1):

                # 1. decode boxes in each class
                tmp_encoded_box = bbox_pred_list[i]
                tmp_score = score_list[i]

                tmp_decoded_boxes = bbox_transform.rbbox_transform_inv(
                    boxes=rois,
                    deltas=tmp_encoded_box,
                    scale_factors=self.cfgs.ROI_SCALE_FACTORS)

                # 2. clip to img boundaries
                # tmp_decoded_boxes = boxes_utils.clip_boxes_to_img_boundaries(decode_boxes=tmp_decoded_boxes,
                #                                                              img_shape=img_shape)

                # 3. NMS
                if self.cfgs.SOFT_NMS:
                    print("Using Soft NMS.......")
                    raise NotImplementedError(
                        "soft NMS for rotate has not implemented")

                else:
                    max_output_size = 4000 if 'DOTA' in self.cfgs.NET_NAME else 200
                    keep = nms_rotate.nms_rotate(
                        decode_boxes=tmp_decoded_boxes,
                        scores=tmp_score,
                        iou_threshold=self.cfgs.FAST_RCNN_NMS_IOU_THRESHOLD,
                        max_output_size=100
                        if self.is_training else max_output_size,
                        use_gpu=self.cfgs.ROTATE_NMS_USE_GPU,
                        gpu_id=gpu_id)

                perclass_boxes = tf.gather(tmp_decoded_boxes, keep)
                perclass_scores = tf.gather(tmp_score, keep)

                allclasses_boxes.append(perclass_boxes)
                allclasses_scores.append(perclass_scores)
                categories.append(tf.ones_like(perclass_scores) * i)

            final_boxes = tf.concat(allclasses_boxes, axis=0)
            final_scores = tf.concat(allclasses_scores, axis=0)
            final_category = tf.concat(categories, axis=0)

            if self.is_training:
                '''
                in training. We should show the detecitons in the tensorboard. So we add this.
                '''
                kept_indices = tf.reshape(
                    tf.where(
                        tf.greater_equal(final_scores, self.cfgs.VIS_SCORE)),
                    [-1])
            else:
                kept_indices = tf.reshape(
                    tf.where(
                        tf.greater_equal(final_scores,
                                         self.cfgs.FILTERED_SCORE)), [-1])
            final_boxes = tf.gather(final_boxes, kept_indices)
            final_scores = tf.gather(final_scores, kept_indices)
            final_category = tf.gather(final_category, kept_indices)

            return final_boxes, final_scores, final_category
 def __init__(self, cfgs, is_training):
     super(DetectionNetwork, self).__init__(cfgs, is_training)
     self.anchor_sampler_csl = AnchorSamplerCSL(cfgs)
     self.losses = Loss(self.cfgs)
     self.coding_len = cfgs.ANGLE_RANGE // cfgs.OMEGA
class DetectionNetwork(DetectionNetworkBase):

    def __init__(self, cfgs, is_training):
        super(DetectionNetwork, self).__init__(cfgs, is_training)
        self.anchor_sampler_csl = AnchorSamplerCSL(cfgs)
        self.losses = Loss(self.cfgs)
        self.coding_len = cfgs.ANGLE_RANGE // cfgs.OMEGA

    def rpn_reg_net(self, inputs, scope_list, reuse_flag, level):
        rpn_conv2d_3x3 = inputs
        for i in range(self.cfgs.NUM_SUBNET_CONV):
            rpn_conv2d_3x3 = slim.conv2d(inputs=rpn_conv2d_3x3,
                                         num_outputs=self.cfgs.FPN_CHANNEL,
                                         kernel_size=[3, 3],
                                         weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
                                         biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER,
                                         stride=1,
                                         activation_fn=tf.nn.relu,
                                         scope='{}_{}'.format(scope_list[1], i),
                                         reuse=reuse_flag)

        rpn_delta_boxes = slim.conv2d(rpn_conv2d_3x3,
                                      num_outputs=5 * self.num_anchors_per_location,
                                      kernel_size=[3, 3],
                                      stride=1,
                                      weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
                                      biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER,
                                      scope=scope_list[3],
                                      activation_fn=None,
                                      reuse=reuse_flag)

        rpn_angle_cls = slim.conv2d(rpn_conv2d_3x3,
                                    num_outputs=self.coding_len * self.num_anchors_per_location,
                                    kernel_size=[3, 3],
                                    stride=1,
                                    weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
                                    biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER,
                                    scope=scope_list[4],
                                    activation_fn=None,
                                    reuse=reuse_flag)

        rpn_delta_boxes = tf.reshape(rpn_delta_boxes, [-1, 5],
                                     name='rpn_{}_regression_reshape'.format(level))
        rpn_angle_cls = tf.reshape(rpn_angle_cls, [-1, self.coding_len],
                                   name='rpn_{}_angle_cls_reshape'.format(level))
        return rpn_delta_boxes, rpn_angle_cls

    def rpn_net(self, feature_pyramid, name):

        rpn_delta_boxes_list = []
        rpn_scores_list = []
        rpn_probs_list = []
        rpn_angle_cls_list = []
        with tf.variable_scope(name):
            with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(self.cfgs.WEIGHT_DECAY)):
                for level in self.cfgs.LEVEL:

                    if self.cfgs.SHARE_NET:
                        reuse_flag = None if level == self.cfgs.LEVEL[0] else True
                        scope_list = ['conv2d_3x3_cls', 'conv2d_3x3_reg', 'rpn_classification',
                                      'rpn_regression', 'rpn_angle_cls']
                    else:
                        reuse_flag = None
                        scope_list = ['conv2d_3x3_cls_' + level, 'conv2d_3x3_reg_' + level,
                                      'rpn_classification_' + level, 'rpn_regression_' + level,
                                      'rpn_angle_cls_' + level]

                    rpn_box_scores, rpn_box_probs = self.rpn_cls_net(feature_pyramid[level], scope_list, reuse_flag, level)
                    rpn_delta_boxes, rpn_angle_cls = self.rpn_reg_net(feature_pyramid[level], scope_list, reuse_flag, level)

                    rpn_scores_list.append(rpn_box_scores)
                    rpn_probs_list.append(rpn_box_probs)
                    rpn_delta_boxes_list.append(rpn_delta_boxes)
                    rpn_angle_cls_list.append(rpn_angle_cls)

            return rpn_delta_boxes_list, rpn_scores_list, rpn_probs_list, rpn_angle_cls_list

    def build_whole_detection_network(self, input_img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None,
                                      gt_smooth_label=None, gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)

            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

            gt_smooth_label = tf.reshape(gt_smooth_label, [-1, self.coding_len])
            gt_smooth_label = tf.cast(gt_smooth_label, tf.float32)

        # 1. build backbone
        feature_pyramid = self.build_backbone(input_img_batch)

        # 2. build rpn
        rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list, rpn_angle_cls_list = self.rpn_net(feature_pyramid, 'rpn_net')
        rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0)
        rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0)
        rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0)
        rpn_angle_cls = tf.concat(rpn_angle_cls_list, axis=0)

        # 3. generate anchors
        anchor_list = self.make_anchors(feature_pyramid)
        anchors = tf.concat(anchor_list, axis=0)

        # 4. build loss
        if self.is_training:
            with tf.variable_scope('build_loss'):
                labels, target_delta, anchor_states, target_boxes, target_smooth_label = tf.py_func(
                    func=self.anchor_sampler_csl.anchor_target_layer,
                    inp=[gtboxes_batch_h, gtboxes_batch_r,
                         gt_smooth_label, anchors, gpu_id],
                    Tout=[tf.float32, tf.float32, tf.float32,
                          tf.float32, tf.float32])

                if self.method == 'H':
                    self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 0)
                else:
                    self.add_anchor_img_smry(input_img_batch, anchors, anchor_states, 1)

                cls_loss = self.losses.focal_loss(labels, rpn_cls_score, anchor_states)

                if self.cfgs.REG_LOSS_MODE == 0:
                    reg_loss = self.losses.iou_smooth_l1_loss_log(target_delta, rpn_box_pred, anchor_states,
                                                                  target_boxes, anchors)
                elif self.cfgs.REG_LOSS_MODE == 1:
                    reg_loss = self.losses.iou_smooth_l1_loss_exp(target_delta, rpn_box_pred, anchor_states,
                                                                  target_boxes, anchors, alpha=self.cfgs.ALPHA,
                                                                  beta=self.cfgs.BETA)
                else:
                    reg_loss = self.losses.smooth_l1_loss(target_delta, rpn_box_pred, anchor_states)

                angle_cls_loss = self.losses.angle_focal_loss(target_smooth_label, rpn_angle_cls, anchor_states)

                self.losses_dict['cls_loss'] = cls_loss * self.cfgs.CLS_WEIGHT
                self.losses_dict['reg_loss'] = reg_loss * self.cfgs.REG_WEIGHT
                self.losses_dict['angle_cls_loss'] = angle_cls_loss * self.cfgs.ANGLE_WEIGHT

        # 5. postprocess
        with tf.variable_scope('postprocess_detctions'):
            boxes, scores, category, boxes_angle = self.postprocess_detctions(rpn_bbox_pred=rpn_box_pred,
                                                                              rpn_cls_prob=rpn_cls_prob,
                                                                              rpn_angle_prob=tf.sigmoid(rpn_angle_cls),
                                                                              anchors=anchors)
            boxes = tf.stop_gradient(boxes)
            scores = tf.stop_gradient(scores)
            category = tf.stop_gradient(category)
            boxes_angle = tf.stop_gradient(boxes_angle)

        if self.is_training:
            return boxes, scores, category, boxes, self.losses_dict
        else:
            return boxes_angle, scores, category

    def postprocess_detctions(self, rpn_bbox_pred, rpn_cls_prob, rpn_angle_prob, anchors):

        return_boxes_pred = []
        return_boxes_pred_angle = []
        return_scores = []
        return_labels = []
        for j in range(0, self.cfgs.CLASS_NUM):
            scores = rpn_cls_prob[:, j]
            if self.is_training:
                indices = tf.reshape(tf.where(tf.greater(scores, self.cfgs.VIS_SCORE)), [-1, ])
            else:
                indices = tf.reshape(tf.where(tf.greater(scores, self.cfgs.FILTERED_SCORE)), [-1, ])

            anchors_ = tf.gather(anchors, indices)
            rpn_bbox_pred_ = tf.gather(rpn_bbox_pred, indices)
            scores = tf.gather(scores, indices)
            rpn_angle_prob_ = tf.gather(rpn_angle_prob, indices)
            angle_cls = tf.cast(tf.argmax(rpn_angle_prob_, axis=1), tf.float32)

            if self.cfgs.METHOD == 'H':
                x_c = (anchors_[:, 2] + anchors_[:, 0]) / 2
                y_c = (anchors_[:, 3] + anchors_[:, 1]) / 2
                h = anchors_[:, 2] - anchors_[:, 0] + 1
                w = anchors_[:, 3] - anchors_[:, 1] + 1
                theta = -90 * tf.ones_like(x_c)
                anchors_ = tf.transpose(tf.stack([x_c, y_c, w, h, theta]))

            if self.cfgs.ANGLE_RANGE == 180:
                anchors_ = tf.py_func(coordinate_present_convert,
                                      inp=[anchors_, -1],
                                      Tout=[tf.float32])
                anchors_ = tf.reshape(anchors_, [-1, 5])

            boxes_pred = bbox_transform.rbbox_transform_inv(boxes=anchors_, deltas=rpn_bbox_pred_)

            boxes_pred = tf.reshape(boxes_pred, [-1, 5])
            angle_cls = (tf.reshape(angle_cls, [-1, ]) * -1 - 0.5) * self.cfgs.OMEGA

            x, y, w, h, theta = tf.unstack(boxes_pred, axis=1)
            boxes_pred_angle = tf.transpose(tf.stack([x, y, w, h, angle_cls]))

            if self.cfgs.ANGLE_RANGE == 180:
                # _, _, _, _, theta = tf.unstack(boxes_pred, axis=1)
                # indx = tf.reshape(tf.where(tf.logical_and(tf.less(theta, 0), tf.greater_equal(theta, -180))), [-1, ])
                # boxes_pred = tf.gather(boxes_pred, indx)
                # scores = tf.gather(scores, indx)

                boxes_pred = tf.py_func(coordinate_present_convert,
                                        inp=[boxes_pred, 1],
                                        Tout=[tf.float32])
                boxes_pred = tf.reshape(boxes_pred, [-1, 5])

                boxes_pred_angle = tf.py_func(coordinate_present_convert,
                                              inp=[boxes_pred_angle, 1],
                                              Tout=[tf.float32])
                boxes_pred_angle = tf.reshape(boxes_pred_angle, [-1, 5])

            nms_indices = nms_rotate.nms_rotate(decode_boxes=boxes_pred_angle,
                                                scores=scores,
                                                iou_threshold=self.cfgs.NMS_IOU_THRESHOLD,
                                                max_output_size=100 if self.is_training else 1000,
                                                use_gpu=False)

            tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, nms_indices), [-1, 5])
            tmp_boxes_pred_angle = tf.reshape(tf.gather(boxes_pred_angle, nms_indices), [-1, 5])
            tmp_scores = tf.reshape(tf.gather(scores, nms_indices), [-1, ])

            return_boxes_pred.append(tmp_boxes_pred)
            return_boxes_pred_angle.append(tmp_boxes_pred_angle)
            return_scores.append(tmp_scores)
            return_labels.append(tf.ones_like(tmp_scores) * (j + 1))

        return_boxes_pred = tf.concat(return_boxes_pred, axis=0)
        return_boxes_pred_angle = tf.concat(return_boxes_pred_angle, axis=0)
        return_scores = tf.concat(return_scores, axis=0)
        return_labels = tf.concat(return_labels, axis=0)

        return return_boxes_pred, return_scores, return_labels, return_boxes_pred_angle
Exemple #9
0
class DetectionNetworkSCRDet(DetectionNetworkBase):
    def __init__(self, cfgs, is_training):
        super(DetectionNetworkSCRDet, self).__init__(cfgs, is_training)
        self.proposal_sampler_r2cnn = ProposalSamplerR2CNN(cfgs)
        self.anchor_sampler_r2cnn = AnchorSamplerR2CNN(cfgs)
        self.losses = Loss(cfgs)
        self.roi_extractor = RoIExtractor(cfgs)
        self.box_head = BoxHead(cfgs)

    def rpn(self, inputs):
        rpn_conv3x3 = slim.conv2d(inputs,
                                  512, [3, 3],
                                  trainable=self.is_training,
                                  weights_initializer=self.cfgs.INITIALIZER,
                                  activation_fn=tf.nn.relu,
                                  scope='rpn_conv/3x3')
        rpn_cls_score = slim.conv2d(rpn_conv3x3,
                                    self.num_anchors_per_location * 2, [1, 1],
                                    stride=1,
                                    trainable=self.is_training,
                                    weights_initializer=self.cfgs.INITIALIZER,
                                    activation_fn=None,
                                    scope='rpn_cls_score')
        rpn_box_pred = slim.conv2d(
            rpn_conv3x3,
            self.num_anchors_per_location * 4, [1, 1],
            stride=1,
            trainable=self.is_training,
            weights_initializer=self.cfgs.BBOX_INITIALIZER,
            activation_fn=None,
            scope='rpn_bbox_pred')
        rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob')

        return rpn_box_pred, rpn_cls_score, rpn_cls_prob

    def make_anchors(self, feature_to_cropped):
        featuremap_height, featuremap_width = tf.shape(
            feature_to_cropped)[1], tf.shape(feature_to_cropped)[2]
        featuremap_height = tf.cast(featuremap_height, tf.float32)
        featuremap_width = tf.cast(featuremap_width, tf.float32)

        anchors = anchor_utils.make_anchors(
            base_anchor_size=self.cfgs.BASE_ANCHOR_SIZE_LIST,
            anchor_scales=self.cfgs.ANCHOR_SCALES,
            anchor_ratios=self.cfgs.ANCHOR_RATIOS,
            featuremap_height=featuremap_height,
            featuremap_width=featuremap_width,
            stride=self.cfgs.ANCHOR_STRIDE,
            name="make_anchors_forRPN")
        return anchors

    def build_loss(self, rpn_box_pred, rpn_bbox_targets, rpn_cls_score,
                   rpn_labels, bbox_pred_h, bbox_targets_h, cls_score_h,
                   bbox_pred_r, bbox_targets_r, rois, target_gt_r, cls_score_r,
                   labels, mask_gt, pa_mask_pred):
        '''
        :param rpn_box_pred: [-1, 4]
        :param rpn_bbox_targets: [-1, 4]
        :param rpn_cls_score: [-1]
        :param rpn_labels: [-1]
        :param bbox_pred_h: [-1, 4*(cls_num+1)]
        :param bbox_targets_h: [-1, 4*(cls_num+1)]
        :param cls_score_h: [-1, cls_num+1]
        :param bbox_pred_r: [-1, 5*(cls_num+1)]
        :param bbox_targets_r: [-1, 5*(cls_num+1)]
        :param cls_score_r: [-1, cls_num+1]
        :param labels: [-1]
        :return:
        '''

        with tf.variable_scope('build_loss'):

            with tf.variable_scope('rpn_loss'):

                rpn_reg_loss = self.losses.smooth_l1_loss_rpn(
                    bbox_pred=rpn_box_pred,
                    bbox_targets=rpn_bbox_targets,
                    label=rpn_labels,
                    sigma=self.cfgs.RPN_SIGMA)
                rpn_select = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)),
                                        [-1])
                rpn_cls_score = tf.reshape(
                    tf.gather(rpn_cls_score, rpn_select), [-1, 2])
                rpn_labels = tf.reshape(tf.gather(rpn_labels, rpn_select),
                                        [-1])
                rpn_cls_loss = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=rpn_cls_score, labels=rpn_labels))

                self.losses_dict[
                    'rpn_cls_loss'] = rpn_cls_loss * self.cfgs.RPN_CLASSIFICATION_LOSS_WEIGHT
                self.losses_dict[
                    'rpn_reg_loss'] = rpn_reg_loss * self.cfgs.RPN_LOCATION_LOSS_WEIGHT

            with tf.variable_scope('FastRCNN_loss'):

                reg_loss_h = self.losses.smooth_l1_loss_rcnn_h(
                    bbox_pred=bbox_pred_h,
                    bbox_targets=bbox_targets_h,
                    label=labels,
                    num_classes=self.cfgs.CLASS_NUM + 1,
                    sigma=self.cfgs.FASTRCNN_SIGMA)
                if self.cfgs.USE_IOU_FACTOR:
                    reg_loss_r = self.losses.iou_smooth_l1_loss_rcnn_r(
                        bbox_pred=bbox_pred_r,
                        bbox_targets=bbox_targets_r,
                        label=labels,
                        rois=rois,
                        target_gt_r=target_gt_r,
                        num_classes=self.cfgs.CLASS_NUM + 1,
                        sigma=self.cfgs.FASTRCNN_SIGMA)
                else:
                    reg_loss_r = self.losses.smooth_l1_loss_rcnn_r(
                        bbox_pred=bbox_pred_r,
                        bbox_targets=bbox_targets_r,
                        label=labels,
                        num_classes=self.cfgs.CLASS_NUM + 1,
                        sigma=self.cfgs.FASTRCNN_SIGMA)

                cls_loss_h = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=cls_score_h,
                        labels=labels))  # beacause already sample before
                cls_loss_r = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=cls_score_r, labels=labels))

                self.losses_dict[
                    'fast_cls_loss_h'] = cls_loss_h * self.cfgs.FAST_RCNN_CLASSIFICATION_LOSS_WEIGHT
                self.losses_dict[
                    'fast_reg_loss_h'] = reg_loss_h * self.cfgs.FAST_RCNN_LOCATION_LOSS_WEIGHT
                self.losses_dict[
                    'fast_cls_loss_r'] = cls_loss_r * self.cfgs.FAST_RCNN_CLASSIFICATION_LOSS_WEIGHT
                self.losses_dict[
                    'fast_reg_loss_r'] = reg_loss_r * self.cfgs.FAST_RCNN_LOCATION_LOSS_WEIGHT

            with tf.variable_scope('build_attention_loss',
                                   regularizer=slim.l2_regularizer(
                                       self.cfgs.WEIGHT_DECAY)):
                attention_loss = self.losses.build_attention_loss(
                    mask_gt, pa_mask_pred)
                self.losses_dict['attention_loss'] = attention_loss

    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_batch_h=None,
                                      gtboxes_batch_r=None,
                                      mask_batch=None,
                                      gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)

            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build backbone
        feature, pa_mask = self.build_backbone(input_img_batch)

        # 2. build rpn
        rpn_box_pred, rpn_cls_score, rpn_cls_prob = self.rpn(feature)
        rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
        rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
        rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob')

        # 3. generate anchors
        anchors = self.make_anchors(feature)

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_RPN'):
            rois, roi_scores = self.postprocess_rpn_proposals(
                rpn_bbox_pred=rpn_box_pred,
                rpn_cls_prob=rpn_cls_prob,
                img_shape=img_shape,
                anchors=anchors,
                is_training=self.is_training)

        # 5. sample minibatch
        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                rpn_labels, rpn_bbox_targets = \
                    tf.py_func(
                        self.anchor_sampler_r2cnn.anchor_target_layer,
                        [gtboxes_batch_h, img_shape, anchors],
                        [tf.float32, tf.float32])
                rpn_bbox_targets = tf.reshape(rpn_bbox_targets, [-1, 4])
                rpn_labels = tf.to_int32(rpn_labels, name="to_int32")
                rpn_labels = tf.reshape(rpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch,
                                         anchors,
                                         rpn_labels,
                                         method=0)

            rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)),
                                    [-1])
            rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(rpn_cls_category,
                             tf.to_int64(tf.gather(rpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/fpn_accuracy', acc)

            with tf.control_dependencies([rpn_labels]):

                with tf.variable_scope('sample_RCNN_minibatch'):
                    rois, labels, bbox_targets_h, bbox_targets_r, target_gt_h, target_gt_r = \
                        tf.py_func(self.proposal_sampler_r2cnn.proposal_target_layer,
                                   [rois, gtboxes_batch_h, gtboxes_batch_r],
                                   [tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32])
                    rois = tf.reshape(rois, [-1, 4])
                    labels = tf.to_int32(labels)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets_h = tf.reshape(
                        bbox_targets_h, [-1, 4 * (self.cfgs.CLASS_NUM + 1)])
                    bbox_targets_r = tf.reshape(
                        bbox_targets_r, [-1, 5 * (self.cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry(input_img_batch,
                                                rois,
                                                labels,
                                                method=0)

        # 6. build Fast-RCNN, include roi align/pooling, box head
        bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r = self.box_head.fc_head(
            self.roi_extractor,
            rois,
            feature,
            img_shape,
            self.is_training,
            mode=0)
        cls_prob_h = slim.softmax(cls_score_h, 'cls_prob_h')
        cls_prob_r = slim.softmax(cls_score_r, 'cls_prob_r')

        if self.is_training:
            cls_category_h = tf.argmax(cls_prob_h, axis=1)
            fast_acc_h = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category_h, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc_h', fast_acc_h)

            cls_category_r = tf.argmax(cls_prob_r, axis=1)
            fast_acc_r = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category_r, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc_r', fast_acc_r)

        #  8. build loss
        if self.is_training:
            self.build_loss(rpn_box_pred=rpn_box_pred,
                            rpn_bbox_targets=rpn_bbox_targets,
                            rpn_cls_score=rpn_cls_score,
                            rpn_labels=rpn_labels,
                            bbox_pred_h=bbox_pred_h,
                            bbox_targets_h=bbox_targets_h,
                            cls_score_h=cls_score_h,
                            bbox_pred_r=bbox_pred_r,
                            bbox_targets_r=bbox_targets_r,
                            rois=rois,
                            target_gt_r=target_gt_r,
                            cls_score_r=cls_score_r,
                            labels=labels,
                            mask_gt=mask_batch,
                            pa_mask_pred=pa_mask)

        # 9. postprocess_fastrcnn
        final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h(
            rois=rois,
            bbox_ppred=bbox_pred_h,
            scores=cls_prob_h,
            img_shape=img_shape)
        final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r(
            rois=rois,
            bbox_ppred=bbox_pred_r,
            scores=cls_prob_r,
            gpu_id=gpu_id)
        if self.is_training:
            return final_boxes_h, final_scores_h, final_category_h, \
                   final_boxes_r, final_scores_r, final_category_r, self.losses_dict
        else:
            return final_boxes_h, final_scores_h, final_category_h, \
                   final_boxes_r, final_scores_r, final_category_r,

    def postprocess_fastrcnn_r(self, rois, bbox_ppred, scores, gpu_id):
        '''
        :param rois:[-1, 4]
        :param bbox_ppred: [-1, (cfgs.Class_num+1) * 5]
        :param scores: [-1, cfgs.Class_num + 1]
        :return:
        '''

        with tf.name_scope('postprocess_fastrcnn'):
            rois = tf.stop_gradient(rois)
            scores = tf.stop_gradient(scores)
            bbox_ppred = tf.reshape(bbox_ppred,
                                    [-1, self.cfgs.CLASS_NUM + 1, 5])
            bbox_ppred = tf.stop_gradient(bbox_ppred)

            bbox_pred_list = tf.unstack(bbox_ppred, axis=1)
            score_list = tf.unstack(scores, axis=1)

            allclasses_boxes = []
            allclasses_scores = []
            categories = []

            x_c = (rois[:, 2] + rois[:, 0]) / 2
            y_c = (rois[:, 3] + rois[:, 1]) / 2
            h = rois[:, 2] - rois[:, 0] + 1
            w = rois[:, 3] - rois[:, 1] + 1
            theta = -90 * tf.ones_like(x_c)
            rois = tf.transpose(tf.stack([x_c, y_c, w, h, theta]))
            for i in range(1, self.cfgs.CLASS_NUM + 1):

                # 1. decode boxes in each class
                tmp_encoded_box = bbox_pred_list[i]
                tmp_score = score_list[i]

                tmp_decoded_boxes = bbox_transform.rbbox_transform_inv(
                    boxes=rois,
                    deltas=tmp_encoded_box,
                    scale_factors=self.cfgs.ROI_SCALE_FACTORS)

                # 2. clip to img boundaries
                # tmp_decoded_boxes = boxes_utils.clip_boxes_to_img_boundaries(decode_boxes=tmp_decoded_boxes,
                #                                                              img_shape=img_shape)

                # 3. NMS
                if self.cfgs.SOFT_NMS:
                    print("Using Soft NMS.......")
                    raise NotImplementedError(
                        "soft NMS for rotate has not implemented")

                else:
                    keep = nms_rotate.nms_rotate(
                        decode_boxes=tmp_decoded_boxes,
                        scores=tmp_score,
                        iou_threshold=self.cfgs.FAST_RCNN_R_NMS_IOU_THRESHOLD,
                        max_output_size=self.cfgs.
                        FAST_RCNN_NMS_MAX_BOXES_PER_CLASS,
                        use_gpu=self.cfgs.ROTATE_NMS_USE_GPU,
                        gpu_id=gpu_id)

                perclass_boxes = tf.gather(tmp_decoded_boxes, keep)
                perclass_scores = tf.gather(tmp_score, keep)

                allclasses_boxes.append(perclass_boxes)
                allclasses_scores.append(perclass_scores)
                categories.append(tf.ones_like(perclass_scores) * i)

            final_boxes = tf.concat(allclasses_boxes, axis=0)
            final_scores = tf.concat(allclasses_scores, axis=0)
            final_category = tf.concat(categories, axis=0)

            if self.is_training:
                '''
                in training. We should show the detecitons in the tensorboard. So we add this.
                '''
                kept_indices = tf.reshape(
                    tf.where(
                        tf.greater_equal(final_scores, self.cfgs.VIS_SCORE)),
                    [-1])
            else:
                kept_indices = tf.reshape(
                    tf.where(
                        tf.greater_equal(final_scores,
                                         self.cfgs.FILTERED_SCORE)), [-1])
            final_boxes = tf.gather(final_boxes, kept_indices)
            final_scores = tf.gather(final_scores, kept_indices)
            final_category = tf.gather(final_category, kept_indices)

            return final_boxes, final_scores, final_category

    def postprocess_fastrcnn_h(self, rois, bbox_ppred, scores, img_shape):
        '''
        :param rois:[-1, 4]
        :param bbox_ppred: [-1, (cfgs.Class_num+1) * 4]
        :param scores: [-1, cfgs.Class_num + 1]
        :return:
        '''

        with tf.name_scope('postprocess_fastrcnn_h'):
            rois = tf.stop_gradient(rois)
            scores = tf.stop_gradient(scores)
            bbox_ppred = tf.reshape(bbox_ppred,
                                    [-1, self.cfgs.CLASS_NUM + 1, 4])
            bbox_ppred = tf.stop_gradient(bbox_ppred)

            bbox_pred_list = tf.unstack(bbox_ppred, axis=1)
            score_list = tf.unstack(scores, axis=1)

            allclasses_boxes = []
            allclasses_scores = []
            categories = []
            for i in range(1, self.cfgs.CLASS_NUM + 1):
                # 1. decode boxes in each class
                tmp_encoded_box = bbox_pred_list[i]
                tmp_score = score_list[i]
                tmp_decoded_boxes = bbox_transform.bbox_transform_inv(
                    boxes=rois,
                    deltas=tmp_encoded_box,
                    scale_factors=self.cfgs.ROI_SCALE_FACTORS)

                # 2. clip to img boundaries
                tmp_decoded_boxes = clip_boxes_to_img_boundaries(
                    decode_boxes=tmp_decoded_boxes, img_shape=img_shape)

                # 3. NMS
                keep = tf.image.non_max_suppression(
                    boxes=tmp_decoded_boxes,
                    scores=tmp_score,
                    max_output_size=self.cfgs.
                    FAST_RCNN_NMS_MAX_BOXES_PER_CLASS,
                    iou_threshold=self.cfgs.FAST_RCNN_H_NMS_IOU_THRESHOLD)

                perclass_boxes = tf.gather(tmp_decoded_boxes, keep)
                perclass_scores = tf.gather(tmp_score, keep)

                allclasses_boxes.append(perclass_boxes)
                allclasses_scores.append(perclass_scores)
                categories.append(tf.ones_like(perclass_scores) * i)

            final_boxes = tf.concat(allclasses_boxes, axis=0)
            final_scores = tf.concat(allclasses_scores, axis=0)
            final_category = tf.concat(categories, axis=0)

            # if self.is_training:
            '''
            in training. We should show the detecitons in the tensorboard. So we add this.
            '''
            if self.is_training:
                '''
                in training. We should show the detecitons in the tensorboard. So we add this.
                '''
                kept_indices = tf.reshape(
                    tf.where(
                        tf.greater_equal(final_scores, self.cfgs.VIS_SCORE)),
                    [-1])
            else:
                kept_indices = tf.reshape(
                    tf.where(
                        tf.greater_equal(final_scores,
                                         self.cfgs.FILTERED_SCORE)), [-1])
            final_boxes = tf.gather(final_boxes, kept_indices)
            final_scores = tf.gather(final_scores, kept_indices)
            final_category = tf.gather(final_category, kept_indices)

            return final_boxes, final_scores, final_category
class DetectionNetworkRetinaNet(DetectionNetworkBase):
    def __init__(self, cfgs, is_training):
        super(DetectionNetworkRetinaNet, self).__init__(cfgs, is_training)
        self.anchor_sampler_retinenet = AnchorSamplerRetinaNet(cfgs)
        self.losses = Loss(self.cfgs)

    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_batch_h=None,
                                      gtboxes_batch_r=None,
                                      gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)

            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

        if self.cfgs.USE_GN:
            input_img_batch = tf.reshape(
                input_img_batch,
                [1, self.cfgs.IMG_SHORT_SIDE_LEN, self.cfgs.IMG_MAX_LENGTH, 3])

        # 1. build backbone
        feature_pyramid = self.build_backbone(input_img_batch)

        # 2. build rpn
        rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net(
            feature_pyramid, 'rpn_net')
        rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0)
        rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0)
        rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0)

        # 3. generate anchors
        anchor_list = self.make_anchors(feature_pyramid)
        anchors = tf.concat(anchor_list, axis=0)

        # 4. build loss
        if self.is_training:
            with tf.variable_scope('build_loss'):
                labels, target_delta, anchor_states, target_boxes = tf.py_func(
                    func=self.anchor_sampler_retinenet.anchor_target_layer,
                    inp=[gtboxes_batch_h, gtboxes_batch_r, anchors, gpu_id],
                    Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

                if self.method == 'H':
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 0)
                else:
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 1)

                cls_loss = self.losses.focal_loss(labels, rpn_cls_score,
                                                  anchor_states)

                if self.cfgs.REG_LOSS_MODE == 0:
                    reg_loss = self.losses.iou_smooth_l1_loss_log(
                        target_delta, rpn_box_pred, anchor_states,
                        target_boxes, anchors)
                elif self.cfgs.REG_LOSS_MODE == 1:
                    reg_loss = self.losses.iou_smooth_l1_loss_exp(
                        target_delta,
                        rpn_box_pred,
                        anchor_states,
                        target_boxes,
                        anchors,
                        alpha=self.cfgs.ALPHA,
                        beta=self.cfgs.BETA)
                else:
                    reg_loss = self.losses.smooth_l1_loss(
                        target_delta, rpn_box_pred, anchor_states)

                self.losses_dict['cls_loss'] = cls_loss * self.cfgs.CLS_WEIGHT
                self.losses_dict['reg_loss'] = reg_loss * self.cfgs.REG_WEIGHT

        return rpn_box_pred, rpn_cls_prob
class DetectionNetworkRetinaNet(DetectionNetworkBase):
    def __init__(self, cfgs, is_training):
        super(DetectionNetworkRetinaNet, self).__init__(cfgs, is_training)
        self.anchor_sampler_retinenet = AnchorSamplerRetinaNet(cfgs)
        self.losses = Loss(self.cfgs)

    def rpn_reg_net(self, inputs, scope_list, reuse_flag, level):
        rpn_conv2d_3x3 = inputs
        for i in range(self.cfgs.NUM_SUBNET_CONV):
            rpn_conv2d_3x3 = slim.conv2d(
                inputs=rpn_conv2d_3x3,
                num_outputs=self.cfgs.FPN_CHANNEL,
                kernel_size=[3, 3],
                weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
                biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER,
                stride=1,
                activation_fn=None if self.cfgs.USE_GN else tf.nn.relu,
                scope='{}_{}'.format(scope_list[1], i),
                trainable=self.is_training,
                reuse=reuse_flag)

            if self.cfgs.USE_GN:
                rpn_conv2d_3x3 = tf.contrib.layers.group_norm(rpn_conv2d_3x3)
                rpn_conv2d_3x3 = tf.nn.relu(rpn_conv2d_3x3)
        rpn_delta_boxes = slim.conv2d(
            rpn_conv2d_3x3,
            num_outputs=4 * self.num_anchors_per_location,
            kernel_size=[3, 3],
            stride=1,
            weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
            biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER,
            scope=scope_list[3],
            activation_fn=None,
            trainable=self.is_training,
            reuse=reuse_flag)

        rpn_delta_sin = slim.conv2d(
            rpn_conv2d_3x3,
            num_outputs=self.num_anchors_per_location,
            kernel_size=[3, 3],
            stride=1,
            weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
            biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER,
            scope=scope_list[3] + '_sin',
            activation_fn=tf.nn.sigmoid,
            trainable=self.is_training,
            reuse=reuse_flag)

        rpn_delta_cos = slim.conv2d(
            rpn_conv2d_3x3,
            num_outputs=self.num_anchors_per_location,
            kernel_size=[3, 3],
            stride=1,
            weights_initializer=self.cfgs.SUBNETS_WEIGHTS_INITIALIZER,
            biases_initializer=self.cfgs.SUBNETS_BIAS_INITIALIZER,
            scope=scope_list[3] + '_cos',
            activation_fn=tf.nn.sigmoid,
            trainable=self.is_training,
            reuse=reuse_flag)

        if self.cfgs.ANGLE_RANGE == 180:
            # [-90, 90]   sin in [-1, 1]  cos in [0, 1]
            # rpn_delta_sin = 2 * (rpn_delta_sin - 0.5)
            # [-90, 90]   sin in [-1, 1]  cos in [-1, 1]
            rpn_delta_sin, rpn_delta_cos = 2 * (rpn_delta_sin - 0.5), 2 * (
                rpn_delta_cos - 0.5)  # better
        else:
            # [-90, 0]   sin in [-1, 0]   cos in [0, 1]
            rpn_delta_sin *= -1

        rpn_delta_boxes = tf.reshape(
            rpn_delta_boxes, [-1, 4],
            name='rpn_{}_regression_reshape'.format(level))
        rpn_delta_sin = tf.reshape(rpn_delta_sin, [-1, 1],
                                   name='rpn_{}_sin_reshape'.format(level))
        rpn_delta_cos = tf.reshape(rpn_delta_cos, [-1, 1],
                                   name='rpn_{}_cos_reshape'.format(level))

        rpn_delta_boxes = tf.concat(
            [rpn_delta_boxes, rpn_delta_sin, rpn_delta_cos], axis=-1)

        return rpn_delta_boxes

    def rpn_net(self, feature_pyramid, name):

        rpn_delta_boxes_list = []
        rpn_scores_list = []
        rpn_probs_list = []
        with tf.variable_scope(name):
            with slim.arg_scope([slim.conv2d],
                                weights_regularizer=slim.l2_regularizer(
                                    self.cfgs.WEIGHT_DECAY)):
                for level in self.cfgs.LEVEL:

                    if self.cfgs.SHARE_NET:
                        reuse_flag = None if level == self.cfgs.LEVEL[
                            0] else True
                        scope_list = [
                            'conv2d_3x3_cls', 'conv2d_3x3_reg',
                            'rpn_classification', 'rpn_regression'
                        ]
                    else:
                        reuse_flag = None
                        scope_list = [
                            'conv2d_3x3_cls_' + level,
                            'conv2d_3x3_reg_' + level,
                            'rpn_classification_' + level,
                            'rpn_regression_' + level
                        ]

                    rpn_box_scores, rpn_box_probs = self.rpn_cls_net(
                        feature_pyramid[level], scope_list, reuse_flag, level)
                    rpn_delta_boxes = self.rpn_reg_net(feature_pyramid[level],
                                                       scope_list, reuse_flag,
                                                       level)

                    rpn_scores_list.append(rpn_box_scores)
                    rpn_probs_list.append(rpn_box_probs)
                    rpn_delta_boxes_list.append(rpn_delta_boxes)

                # rpn_all_delta_boxes = tf.concat(rpn_delta_boxes_list, axis=0)
                # rpn_all_boxes_scores = tf.concat(rpn_scores_list, axis=0)
                # rpn_all_boxes_probs = tf.concat(rpn_probs_list, axis=0)

            return rpn_delta_boxes_list, rpn_scores_list, rpn_probs_list

    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_batch_h=None,
                                      gtboxes_batch_r=None,
                                      gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)

            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

        if self.cfgs.USE_GN:
            input_img_batch = tf.reshape(
                input_img_batch,
                [1, self.cfgs.IMG_SHORT_SIDE_LEN, self.cfgs.IMG_MAX_LENGTH, 3])

        # 1. build backbone
        feature_pyramid = self.build_backbone(input_img_batch)

        # 2. build rpn
        rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net(
            feature_pyramid, 'rpn_net')
        rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0)
        rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0)
        rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0)

        # 3. generate anchors
        anchor_list = self.make_anchors(feature_pyramid)
        anchors = tf.concat(anchor_list, axis=0)

        # 4. build loss
        if self.is_training:
            with tf.variable_scope('build_loss'):
                labels, target_delta, anchor_states, target_boxes = tf.py_func(
                    func=self.anchor_sampler_retinenet.anchor_target_layer,
                    inp=[gtboxes_batch_h, gtboxes_batch_r, anchors, gpu_id],
                    Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

                if self.method == 'H':
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 0)
                else:
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 1)

                cls_loss = self.losses.focal_loss(labels, rpn_cls_score,
                                                  anchor_states)

                reg_xywh_loss = self.losses.smooth_l1_loss(
                    target_delta[:, :-1], rpn_box_pred[:, :-2], anchor_states)
                target_theta = tf.reshape(target_boxes, [
                    -1, 6
                ])[:, -2] + 90. if self.cfgs.ANGLE_RANGE == 180 else 0.
                target_theta = target_theta * 3.1415926 / 180.
                target_theta_sin = tf.reshape(tf.sin(target_theta), [-1, 1])
                target_theta_cos = tf.reshape(tf.cos(target_theta), [-1, 1])
                reg_theta_loss = self.losses.smooth_l1_loss(
                    tf.concat([target_theta_sin, target_theta_cos], axis=-1),
                    rpn_box_pred[:, -2:], anchor_states)

                reg_loss = reg_xywh_loss + reg_theta_loss

                self.losses_dict['cls_loss'] = cls_loss * self.cfgs.CLS_WEIGHT
                self.losses_dict['reg_loss'] = reg_loss * self.cfgs.REG_WEIGHT

        # 5. postprocess
        with tf.variable_scope('postprocess_detctions'):
            boxes, scores, category = self.postprocess_detctions(
                rpn_bbox_pred=rpn_box_pred,
                rpn_cls_prob=rpn_cls_prob,
                anchors=anchors,
                gpu_id=gpu_id)
            boxes = tf.stop_gradient(boxes)
            scores = tf.stop_gradient(scores)
            category = tf.stop_gradient(category)

        if self.is_training:
            return boxes, scores, category, self.losses_dict
        else:
            return boxes, scores, category

    def postprocess_detctions(self, rpn_bbox_pred, rpn_cls_prob, anchors,
                              gpu_id):

        return_boxes_pred = []
        return_scores = []
        return_labels = []
        for j in range(0, self.cfgs.CLASS_NUM):
            scores = rpn_cls_prob[:, j]
            if self.is_training:
                indices = tf.reshape(
                    tf.where(tf.greater(scores, self.cfgs.VIS_SCORE)), [
                        -1,
                    ])
            else:
                indices = tf.reshape(
                    tf.where(tf.greater(scores, self.cfgs.FILTERED_SCORE)), [
                        -1,
                    ])

            anchors_ = tf.gather(anchors, indices)
            rpn_bbox_pred_ = tf.gather(rpn_bbox_pred, indices)
            scores = tf.gather(scores, indices)

            if self.method == 'H':
                x_c = (anchors_[:, 2] + anchors_[:, 0]) / 2
                y_c = (anchors_[:, 3] + anchors_[:, 1]) / 2
                h = anchors_[:, 2] - anchors_[:, 0] + 1
                w = anchors_[:, 3] - anchors_[:, 1] + 1
                theta = -90 * tf.ones_like(x_c)
                anchors_ = tf.transpose(tf.stack([x_c, y_c, w, h, theta]))

            if self.cfgs.ANGLE_RANGE == 180:
                anchors_ = tf.py_func(coordinate_present_convert,
                                      inp=[anchors_, -1],
                                      Tout=[tf.float32])
                anchors_ = tf.reshape(anchors_, [-1, 5])

            boxes_pred = bbox_transform.rbbox_transform_inv(
                boxes=anchors_, deltas=rpn_bbox_pred_)

            x, y, w, h, _ = tf.unstack(boxes_pred, axis=1)
            theta = tf.atan(rpn_bbox_pred_[:, -2] /
                            rpn_bbox_pred_[:, -1]) * 180 / 3.1415926
            boxes_pred = tf.transpose(tf.stack([x, y, w, h, theta]))

            if self.cfgs.ANGLE_RANGE == 180:

                boxes_pred = tf.py_func(coordinate_present_convert,
                                        inp=[boxes_pred, 1, False],
                                        Tout=[tf.float32])
                boxes_pred = tf.reshape(boxes_pred, [-1, 5])

            # max_output_size = 4000 if 'DOTA' in self.cfgs.NET_NAME else 200
            max_output_size = 100
            nms_indices = nms_rotate.nms_rotate(
                decode_boxes=boxes_pred,
                scores=scores,
                iou_threshold=self.cfgs.NMS_IOU_THRESHOLD,
                max_output_size=100 if self.is_training else max_output_size,
                use_gpu=True,
                gpu_id=gpu_id)

            tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, nms_indices),
                                        [-1, 5])
            tmp_scores = tf.reshape(tf.gather(scores, nms_indices), [
                -1,
            ])

            return_boxes_pred.append(tmp_boxes_pred)
            return_scores.append(tmp_scores)
            return_labels.append(tf.ones_like(tmp_scores) * (j + 1))

        return_boxes_pred = tf.concat(return_boxes_pred, axis=0)
        return_scores = tf.concat(return_scores, axis=0)
        return_labels = tf.concat(return_labels, axis=0)

        return return_boxes_pred, return_scores, return_labels