def build_whole_detection_network(self, input_img_batch, gtboxes_batch):

        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 6])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        feature_to_cropped = self.build_base_network(input_img_batch)

        # 2. build rpn
        with tf.variable_scope('build_rpn', regularizer=slim.l2_regularizer(cfgs.WEIGHT_DECAY)):

            rpn_conv3x3 = slim.conv2d(
                feature_to_cropped, 512, [3, 3],
                trainable=self.is_training, weights_initializer=cfgs.INITIALIZER,
                activation_fn=tf.nn.relu,
                scope='rpn_conv/3x3')
            rpn_cls_score = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location*2, [1, 1], stride=1,
                                        trainable=self.is_training, weights_initializer=cfgs.INITIALIZER,
                                        activation_fn=None,
                                        scope='rpn_cls_score')
            rpn_box_pred = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location*5, [1, 1], stride=1,
                                       trainable=self.is_training, weights_initializer=cfgs.BBOX_INITIALIZER,
                                       activation_fn=None,
                                       scope='rpn_bbox_pred')
            rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 5])
            rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob')

        # 3. generate_anchors
        featuremap_height, featuremap_width = tf.shape(feature_to_cropped)[1], tf.shape(feature_to_cropped)[2]
        featuremap_height = tf.cast(featuremap_height, tf.float32)
        featuremap_width = tf.cast(featuremap_width, tf.float32)

        anchors = make_rotate_anchors.make_anchors(base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0],
                                                   anchor_scales=cfgs.ANCHOR_SCALES,
                                                   anchor_ratios=cfgs.ANCHOR_RATIOS,
                                                   anchor_angles=cfgs.ANCHOR_ANGLES,
                                                   featuremap_height=featuremap_height,
                                                   featuremap_width=featuremap_width,
                                                   stride=cfgs.ANCHOR_STRIDE[0],
                                                   name="make_anchors_forRPN")

        # with tf.variable_scope('make_anchors'):
        #     anchors = anchor_utils.make_anchors(height=featuremap_height,
        #                                         width=featuremap_width,
        #                                         feat_stride=cfgs.ANCHOR_STRIDE[0],
        #                                         anchor_scales=cfgs.ANCHOR_SCALES,
        #                                         anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16
        #                                         )

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_RPN'):
            # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2])
            # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob')
            # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rois, roi_scores = postprocess_rpn_proposals(rpn_bbox_pred=rpn_box_pred,
                                                         rpn_cls_prob=rpn_cls_prob,
                                                         img_shape=img_shape,
                                                         anchors=anchors,
                                                         is_training=self.is_training)
            # rois shape [-1, 4]
            # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++

            if self.is_training:
                rois_in_img = show_box_in_tensor.draw_box_with_color_rotate(img_batch=input_img_batch,
                                                                            boxes=rois,
                                                                            text=tf.shape(rois)[0])
                tf.summary.image('all_rpn_rois', rois_in_img)

                score_gre_05 = tf.reshape(tf.where(tf.greater_equal(roi_scores, 0.5)), [-1])
                score_gre_05_rois = tf.gather(rois, score_gre_05)
                score_gre_05_score = tf.gather(roi_scores, score_gre_05)
                score_gre_05_in_img = show_box_in_tensor.draw_box_with_color_rotate(img_batch=input_img_batch,
                                                                                    boxes=score_gre_05_rois,
                                                                                    text=tf.shape(score_gre_05_rois)[0])
                tf.summary.image('score_greater_05_rois', score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):

                # overlaps between the anchors and the gt boxes
                overlaps = iou_rotate.iou_rotate_calculate(anchors, gtboxes_batch[:, :-1], use_gpu=True, gpu_id=0)

                rpn_labels, rpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_batch, anchors, overlaps],
                        [tf.float32, tf.float32])
                rpn_bbox_targets = tf.reshape(rpn_bbox_targets, [-1, 5])
                rpn_labels = tf.to_int32(rpn_labels, name="to_int32")
                rpn_labels = tf.reshape(rpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch, anchors, rpn_labels)

            # --------------------------------------add smry-----------------------------------------------------------

            rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)), [-1])
            rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(tf.to_float(tf.equal(rpn_cls_category, tf.to_int64(tf.gather(rpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/rpn_accuracy', acc)

            with tf.control_dependencies([rpn_labels]):
                with tf.variable_scope('sample_RCNN_minibatch'):

                    overlaps = iou_rotate.iou_rotate_calculate(rois, gtboxes_batch[:, :-1], use_gpu=True, gpu_id=0)

                    rois, labels, bbox_targets = \
                    tf.py_func(proposal_target_layer,
                               [rois, gtboxes_batch, overlaps],
                               [tf.float32, tf.float32, tf.float32])

                    rois = tf.reshape(rois, [-1, 5])
                    labels = tf.to_int32(labels)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets = tf.reshape(bbox_targets, [-1, 5*(cfgs.CLASS_NUM+1)])
                    self.add_roi_batch_img_smry(input_img_batch, rois, labels)

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)
        bbox_pred, cls_score = self.build_fastrcnn(feature_to_cropped=feature_to_cropped,
                                                   rois=rois,
                                                   img_shape=img_shape)

        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        cls_prob = slim.softmax(cls_score, 'cls_prob')

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            cls_category = tf.argmax(cls_prob, axis=1)
            fast_acc = tf.reduce_mean(tf.to_float(tf.equal(cls_category, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc', fast_acc)

        #  6. postprocess_fastrcnn
        if not self.is_training:
            final_boxes, final_scores, final_category = self.postprocess_fastrcnn(rois=rois,
                                                                                  bbox_ppred=bbox_pred,
                                                                                  scores=cls_prob,
                                                                                  img_shape=img_shape)
            return final_boxes, final_scores, final_category
        else:
            '''
            when trian. We need build Loss
            '''
            loss_dict = self.build_loss(rpn_box_pred=rpn_box_pred,
                                        rpn_bbox_targets=rpn_bbox_targets,
                                        rpn_cls_score=rpn_cls_score,
                                        rpn_labels=rpn_labels,
                                        bbox_pred=bbox_pred,
                                        bbox_targets=bbox_targets,
                                        cls_score=cls_score,
                                        labels=labels)

            final_boxes, final_scores, final_category = self.postprocess_fastrcnn(rois=rois,
                                                                                  bbox_ppred=bbox_pred,
                                                                                  scores=cls_prob,
                                                                                  img_shape=img_shape)

            return final_boxes, final_scores, final_category, loss_dict
    def build_whole_detection_network(self, input_img_batch, gtboxes_batch):

        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        P_list = self.build_base_network(
            input_img_batch)  # [P2, P3, P4, P5, P6]

        # 2. build rpn
        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):

            fpn_cls_score = []
            fpn_box_pred = []
            for level_name, p in zip(cfgs.LEVLES, P_list):
                if cfgs.SHARE_HEADS:
                    reuse_flag = None if level_name == cfgs.LEVLES[0] else True
                    scope_list = [
                        'rpn_conv/3x3', 'rpn_cls_score', 'rpn_bbox_pred'
                    ]
                else:
                    reuse_flag = None
                    scope_list = [
                        'rpn_conv/3x3_%s' % level_name,
                        'rpn_cls_score_%s' % level_name,
                        'rpn_bbox_pred_%s' % level_name
                    ]
                rpn_conv3x3 = slim.conv2d(p,
                                          512, [3, 3],
                                          trainable=self.is_training,
                                          weights_initializer=cfgs.INITIALIZER,
                                          padding="SAME",
                                          activation_fn=tf.nn.relu,
                                          scope=scope_list[0],
                                          reuse=reuse_flag)
                rpn_cls_score = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location * 2, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.INITIALIZER,
                    activation_fn=None,
                    padding="VALID",
                    scope=scope_list[1],
                    reuse=reuse_flag)
                rpn_box_pred = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location * 4, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.BBOX_INITIALIZER,
                    activation_fn=None,
                    padding="VALID",
                    scope=scope_list[2],
                    reuse=reuse_flag)
                rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
                rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])

                fpn_cls_score.append(rpn_cls_score)
                fpn_box_pred.append(rpn_box_pred)

            fpn_cls_score = tf.concat(fpn_cls_score,
                                      axis=0,
                                      name='fpn_cls_score')
            fpn_box_pred = tf.concat(fpn_box_pred, axis=0, name='fpn_box_pred')
            fpn_cls_prob = slim.softmax(fpn_cls_score, scope='fpn_cls_prob')

        # 3. generate_anchors
        all_anchors = []
        for i in range(len(cfgs.LEVLES)):
            level_name, p = cfgs.LEVLES[i], P_list[i]

            p_h, p_w = tf.shape(p)[1], tf.shape(p)[2]
            featuremap_height = tf.cast(p_h, tf.float32)
            featuremap_width = tf.cast(p_w, tf.float32)
            anchors = anchor_utils.make_anchors(
                base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[i],
                anchor_scales=cfgs.ANCHOR_SCALES,
                anchor_ratios=cfgs.ANCHOR_RATIOS,
                featuremap_height=featuremap_height,
                featuremap_width=featuremap_width,
                stride=cfgs.ANCHOR_STRIDE_LIST[i],
                name="make_anchors_for%s" % level_name)
            all_anchors.append(anchors)
        all_anchors = tf.concat(all_anchors, axis=0, name='all_anchors_of_FPN')

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_FPN'):
            rois, roi_scores = postprocess_rpn_proposals(
                rpn_bbox_pred=fpn_box_pred,
                rpn_cls_prob=fpn_cls_prob,
                img_shape=img_shape,
                anchors=all_anchors,
                is_training=self.is_training)
            # rois shape [-1, 4]
            # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++

            if self.is_training:
                score_gre_05 = tf.reshape(
                    tf.where(tf.greater_equal(roi_scores, 0.5)), [-1])
                score_gre_05_rois = tf.gather(rois, score_gre_05)
                score_gre_05_score = tf.gather(roi_scores, score_gre_05)
                score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores(
                    img_batch=input_img_batch,
                    boxes=score_gre_05_rois,
                    scores=score_gre_05_score)
                tf.summary.image('score_greater_05_rois', score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                fpn_labels, fpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_batch, img_shape, all_anchors],
                        [tf.float32, tf.float32])
                fpn_bbox_targets = tf.reshape(fpn_bbox_targets, [-1, 4])
                fpn_labels = tf.to_int32(fpn_labels, name="to_int32")
                fpn_labels = tf.reshape(fpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch, all_anchors,
                                         fpn_labels)

            # --------------------------------------add smry-----------------------------------------------------------

            fpn_cls_category = tf.argmax(fpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(fpn_labels, -1)),
                                    [-1])
            fpn_cls_category = tf.gather(fpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(fpn_cls_category,
                             tf.to_int64(tf.gather(fpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/fpn_accuracy', acc)

            with tf.control_dependencies([fpn_labels]):
                with tf.variable_scope('sample_RCNN_minibatch'):
                    rois, labels, bbox_targets = \
                    tf.py_func(proposal_target_layer,
                               [rois, gtboxes_batch],
                               [tf.float32, tf.float32, tf.float32])
                    rois = tf.reshape(rois, [-1, 4])
                    labels = tf.to_int32(labels)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets = tf.reshape(bbox_targets,
                                              [-1, 4 * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry(input_img_batch, rois, labels)
        if self.is_training:
            rois_list, labels, bbox_targets = self.assign_levels(
                all_rois=rois, labels=labels, bbox_targets=bbox_targets)
        else:
            rois_list = self.assign_levels(
                all_rois=rois
            )  # rois_list: [P2_rois, P3_rois, P4_rois, P5_rois]

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)
        bbox_pred, cls_score = self.build_fastrcnn(P_list=P_list,
                                                   rois_list=rois_list,
                                                   img_shape=img_shape)
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        cls_prob = slim.softmax(cls_score, 'cls_prob')

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            cls_category = tf.argmax(cls_prob, axis=1)
            fast_acc = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc', fast_acc)

        rois = tf.concat(rois_list, axis=0, name='concat_rois')
        #  6. postprocess_fastrcnn
        if not self.is_training:
            return self.postprocess_fastrcnn(rois=rois,
                                             bbox_ppred=bbox_pred,
                                             scores=cls_prob,
                                             img_shape=img_shape)
        else:
            '''
            when trian. We need build Loss
            '''

            #GIOU loss需要先解码
            fpn_pred = encode_and_decode.decode_boxes(
                encoded_boxes=fpn_box_pred,
                reference_boxes=all_anchors,
                scale_factors=cfgs.ROI_SCALE_FACTORS)

            loss_dict = self.build_loss(rpn_box_pred=fpn_pred,
                                        rpn_bbox_targets=all_anchors,
                                        rpn_cls_score=fpn_cls_score,
                                        rpn_labels=fpn_labels,
                                        bbox_pred=bbox_pred,
                                        bbox_targets=bbox_targets,
                                        cls_score=cls_score,
                                        labels=labels)

            final_bbox, final_scores, final_category = self.postprocess_fastrcnn(
                rois=rois,
                bbox_ppred=bbox_pred,
                scores=cls_prob,
                img_shape=img_shape)
            return final_bbox, final_scores, final_category, loss_dict
Esempio n. 3
0
    def build_whole_detection_network(self, input_img_batch, gtboxes_h_batch,
                                      gtboxes_r_batch):

        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6])
            gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32)
            gtboxes_batch = tf.reshape(gtboxes_h_batch, [-1, 5])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        feature_to_cropped = self.build_base_network(input_img_batch)

        # 2. build rpn
        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):

            rpn_conv3x3 = slim.conv2d(feature_to_cropped,
                                      512, [3, 3],
                                      trainable=self.is_training,
                                      weights_initializer=cfgs.INITIALIZER,
                                      activation_fn=tf.nn.relu,
                                      scope='rpn_conv/3x3')
            rpn_cls_score = slim.conv2d(rpn_conv3x3,
                                        self.num_anchors_per_location * 2,
                                        [1, 1],
                                        stride=1,
                                        trainable=self.is_training,
                                        weights_initializer=cfgs.INITIALIZER,
                                        activation_fn=None,
                                        scope='rpn_cls_score')
            rpn_box_pred = slim.conv2d(
                rpn_conv3x3,
                self.num_anchors_per_location * 4, [1, 1],
                stride=1,
                trainable=self.is_training,
                weights_initializer=cfgs.BBOX_INITIALIZER,
                activation_fn=None,
                scope='rpn_bbox_pred')
            rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob')

        # 3. generate_anchors
        featuremap_height, featuremap_width = tf.shape(
            feature_to_cropped)[1], tf.shape(feature_to_cropped)[2]
        featuremap_height = tf.cast(featuremap_height, tf.float32)
        featuremap_width = tf.cast(featuremap_width, tf.float32)

        anchors = anchor_utils.make_anchors(
            base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0],
            anchor_scales=cfgs.ANCHOR_SCALES,
            anchor_ratios=cfgs.ANCHOR_RATIOS,
            featuremap_height=featuremap_height,
            featuremap_width=featuremap_width,
            stride=cfgs.ANCHOR_STRIDE,
            name="make_anchors_forRPN")

        # with tf.variable_scope('make_anchors'):
        #     anchors = anchor_utils.make_anchors(height=featuremap_height,
        #                                         width=featuremap_width,
        #                                         feat_stride=cfgs.ANCHOR_STRIDE[0],
        #                                         anchor_scales=cfgs.ANCHOR_SCALES,
        #                                         anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16
        #                                         )

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_RPN'):
            # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2])
            # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob')
            # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rois, roi_scores = postprocess_rpn_proposals(
                rpn_bbox_pred=rpn_box_pred,
                rpn_cls_prob=rpn_cls_prob,
                img_shape=img_shape,
                anchors=anchors,
                is_training=self.is_training)
            # rois shape [-1, 4]
            # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++

            if self.is_training:
                rois_in_img = show_box_in_tensor.draw_boxes_with_scores(
                    img_batch=input_img_batch, boxes=rois, scores=roi_scores)
                tf.summary.image('all_rpn_rois', rois_in_img)

                score_gre_05 = tf.reshape(
                    tf.where(tf.greater_equal(roi_scores, 0.5)), [-1])
                score_gre_05_rois = tf.gather(rois, score_gre_05)
                score_gre_05_score = tf.gather(roi_scores, score_gre_05)
                score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores(
                    img_batch=input_img_batch,
                    boxes=score_gre_05_rois,
                    scores=score_gre_05_score)
                tf.summary.image('score_greater_05_rois', score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                rpn_labels, rpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_batch, img_shape, anchors],
                        [tf.float32, tf.float32])
                rpn_bbox_targets = tf.reshape(rpn_bbox_targets, [-1, 4])
                rpn_labels = tf.to_int32(rpn_labels, name="to_int32")
                rpn_labels = tf.reshape(rpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch, anchors, rpn_labels)

            # --------------------------------------add smry----------------------------------------------------------------

            rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)),
                                    [-1])
            rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(rpn_cls_category,
                             tf.to_int64(tf.gather(rpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/rpn_accuracy', acc)

            with tf.control_dependencies([rpn_labels]):
                with tf.variable_scope('sample_RCNN_minibatch_stage1'):
                    stage1_rois, stage1_labels,  stage1_bbox_targets = \
                    tf.py_func(proposal_target_layer_3,
                               [rois, rois, gtboxes_batch, gtboxes_r_batch, cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD[0]],
                               [tf.float32, tf.float32, tf.float32])
                    stage1_rois = tf.reshape(stage1_rois, [-1, 4])
                    stage1_labels = tf.to_int32(stage1_labels)
                    stage1_labels = tf.reshape(stage1_labels, [-1])
                    stage1_bbox_targets = tf.reshape(
                        stage1_bbox_targets, [-1, 5 * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry(input_img_batch, stage1_rois,
                                                stage1_labels, 'stage1')

                    #stage1_bbox_targets_h = boxes_utils.get_horizen_minAreaRectangle(stage1_bbox_targets, False)
        else:
            stage1_rois = rois

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN-before1                                                 #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN-before1
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)

        stage1_bbox_pred_fliter, stage1_bbox_pred, stage1_cls_score = self.build_fastrcnn(
            feature_to_cropped=feature_to_cropped,
            rois=stage1_rois,
            img_shape=img_shape,
            scope='stage1')
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        stage1_cls_prob = slim.softmax(stage1_cls_score, 'stage1_cls_prob')
        stage1_cls_category = tf.argmax(stage1_cls_prob, axis=1)
        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            stage1_fast_acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(stage1_cls_category, tf.to_int64(stage1_labels))))
            tf.summary.scalar('ACC/stage1_fast_acc', stage1_fast_acc)

        #  postprocess_fastrcnn_before1
        # return x,y,w,h,theta

        stage1_bbox = self.postprocess_cascade(
            rois=stage1_rois,
            bbox_ppred=stage1_bbox_pred_fliter,
            scope='stage1',
            five=False)

        #stage1_bbox_h = boxes_utils.get_horizen_minAreaRectangle(stage1_bbox, with_label=False)
        if self.is_training:

            overlaps = iou_rotate.iou_rotate_calculate(stage1_bbox,
                                                       gtboxes_r_batch[:, :-1],
                                                       use_gpu=True,
                                                       gpu_id=0)

        if self.is_training:
            with tf.control_dependencies([stage1_bbox]):
                with tf.variable_scope('sample_RCNN_minibatch_stage2'):
                    stage2_rois, stage2_labels, stage2_bbox_targets = \
                    tf.py_func(proposal_target_layer_r,
                               [stage1_bbox,gtboxes_r_batch, overlaps, cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD[1]],
                               [tf.float32,  tf.float32,tf.float32])
                    stage2_rois = tf.reshape(stage2_rois, [-1, 5])  # 斜
                    stage2_labels = tf.to_int32(stage2_labels)
                    stage2_labels = tf.reshape(stage2_labels, [-1])
                    stage2_bbox_targets = tf.reshape(
                        stage2_bbox_targets, [-1, 5 * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry_rotate(input_img_batch,
                                                       stage2_rois,
                                                       stage2_labels, 'stage2')
        else:
            stage2_rois = stage1_bbox

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN-before2                                                 #
        # -------------------------------------------------------------------------------------------------------------#

        # 6. build Fast-RCNN-before2
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)
        # stage2_rois = tf.stop_gradient(stage2_rois)
        stage2_rois_h = boxes_utils.get_horizen_minAreaRectangle(
            stage2_rois, with_label=False)  ##斜变正
        stage2_bbox_pred_fliter, stage2_bbox_pred, stage2_cls_score = self.build_fastrcnn(
            feature_to_cropped=feature_to_cropped,
            rois=stage2_rois_h,
            img_shape=img_shape,
            scope='stage2')
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        stage2_cls_prob = slim.softmax(stage2_cls_score, 'stage2_cls_prob')
        stage2_cls_category = tf.argmax(stage2_cls_prob, axis=1)
        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            stage2_fast_acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(stage2_cls_category, tf.to_int64(stage2_labels))))
            tf.summary.scalar('ACC/stage2_fast_acc', stage2_fast_acc)

        #  postprocess_fastrcnn_before2
        stage2_bbox = self.postprocess_cascade(
            rois=stage2_rois,
            bbox_ppred=stage2_bbox_pred_fliter,
            scope='stage2')
        #stage2_bbox_h = boxes_utils.get_horizen_minAreaRectangle(stage2_bbox, with_label=False)
        if self.is_training:
            overlaps = iou_rotate.iou_rotate_calculate(stage2_bbox,
                                                       gtboxes_r_batch[:, :-1],
                                                       use_gpu=True,
                                                       gpu_id=0)

        if self.is_training:
            with tf.control_dependencies([stage2_bbox]):
                with tf.variable_scope('sample_RCNN_minibatch_stage3'):
                    stage3_rois, stage3_labels,  stage3_bbox_targets = \
                    tf.py_func(proposal_target_layer_r,
                               [stage2_bbox,  gtboxes_r_batch, overlaps, cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD[2]],
                               [tf.float32,  tf.float32, tf.float32])
                    stage3_rois = tf.reshape(stage3_rois, [-1, 5])
                    stage3_labels = tf.to_int32(stage3_labels)
                    stage3_labels = tf.reshape(stage3_labels, [-1])
                    stage3_bbox_targets = tf.reshape(
                        stage3_bbox_targets, [-1, 5 * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry_rotate(input_img_batch,
                                                       stage3_rois,
                                                       stage3_labels, 'stage3')
        else:
            stage3_rois = stage2_bbox

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 7. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)
        # stage3_rois = tf.stop_gradient(stage3_rois)
        stage3_rois_h = boxes_utils.get_horizen_minAreaRectangle(
            stage3_rois, with_label=False)  ##斜变正
        stage3_bbox_pred, stage3_cls_score = self.build_fastrcnn(
            feature_to_cropped=feature_to_cropped,
            rois=stage3_rois_h,
            img_shape=img_shape,
            scope='stage3')
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        stage3_cls_prob = slim.softmax(stage3_cls_score, 'stage3_cls_prob')
        stage3_cls_category = tf.argmax(stage3_cls_prob, axis=1)

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            fast_acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(stage3_cls_category, tf.to_int64(stage3_labels))))
            tf.summary.scalar('ACC/fast_acc', fast_acc)

        #  postprocess_fastrcnn
        if not self.is_training:
            with slim.arg_scope([
                    slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose,
                    slim.separable_conv2d, slim.fully_connected
            ],
                                reuse=True):
                _, _, final_scores_stage2 = self.build_fastrcnn(
                    feature_to_cropped=feature_to_cropped,
                    rois=stage3_rois_h,
                    img_shape=img_shape,
                    scope='stage2')
                final_scores_stage2 = slim.softmax(final_scores_stage2,
                                                   'final_scores_stage2')

                _, _, final_scores_stage1 = self.build_fastrcnn(
                    feature_to_cropped=feature_to_cropped,
                    rois=stage3_rois_h,
                    img_shape=img_shape,
                    scope='stage1')
                final_scores_stage1 = slim.softmax(final_scores_stage1,
                                                   'final_scores_stage1')
                # choose which stage to export
                cls_prob = tf.add(final_scores_stage2, final_scores_stage1)
                cls_prob = tf.add(cls_prob, stage3_cls_prob) / 3
                return self.postprocess_fastrcnn_r(rois=stage3_rois,
                                                   bbox_ppred=stage3_bbox_pred,
                                                   scores=cls_prob,
                                                   img_shape=img_shape,
                                                   scope='stage3')
        else:
            '''
            when trian. We need build Loss
            '''
            loss_dict = self.build_loss(
                rpn_box_pred=rpn_box_pred,
                rpn_bbox_targets=rpn_bbox_targets,
                rpn_cls_score=rpn_cls_score,
                rpn_labels=rpn_labels,
                bbox_pred=stage3_bbox_pred,
                bbox_targets=stage3_bbox_targets,
                stage2_bbox_pred=stage2_bbox_pred,
                stage2_bbox_targets=stage2_bbox_targets,
                stage1_bbox_pred=stage1_bbox_pred,
                stage1_bbox_targets=stage1_bbox_targets,
                cls_score=stage3_cls_score,
                labels=stage3_labels,
                stage2_cls_score=stage2_cls_score,
                stage2_labels=stage2_labels,
                stage1_cls_score=stage1_cls_score,
                stage1_labels=stage1_labels)
            final_bbox, final_scores, final_category = self.postprocess_fastrcnn_r(
                rois=stage3_rois,
                bbox_ppred=stage3_bbox_pred,
                scores=stage3_cls_prob,
                img_shape=img_shape,
                scope='stage3')
            return final_bbox, final_scores, final_category, loss_dict
Esempio n. 4
0
    def build_whole_detection_network(self, input_img_batch, gtboxes_batch,
                                      gtboxes_r_batch, gpu_id):

        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

            gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6])
            gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        mask_list = []
        if cfgs.USE_SUPERVISED_MASK:
            P_list, mask_list = self.build_base_network(
                input_img_batch)  # [P2, P3, P4, P5, P6], [mask_p2, mask_p3]
        else:
            P_list = self.build_base_network(
                input_img_batch)  # [P2, P3, P4, P5, P6]

        # 2. build rpn
        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):

            fpn_cls_score = []
            fpn_box_pred = []
            for level_name, p in zip(cfgs.LEVLES, P_list):
                if cfgs.SHARE_HEADS:
                    reuse_flag = None if level_name == cfgs.LEVLES[0] else True
                    scope_list = [
                        'rpn_conv/3x3', 'rpn_cls_score', 'rpn_bbox_pred'
                    ]
                else:
                    reuse_flag = None
                    scope_list = [
                        'rpn_conv/3x3_%s' % level_name,
                        'rpn_cls_score_%s' % level_name,
                        'rpn_bbox_pred_%s' % level_name
                    ]
                rpn_conv3x3 = slim.conv2d(p,
                                          512, [3, 3],
                                          trainable=self.is_training,
                                          weights_initializer=cfgs.INITIALIZER,
                                          padding="SAME",
                                          activation_fn=tf.nn.relu,
                                          scope=scope_list[0],
                                          reuse=reuse_flag)
                rpn_cls_score = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location * 2, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.INITIALIZER,
                    activation_fn=None,
                    padding="VALID",
                    scope=scope_list[1],
                    reuse=reuse_flag)
                rpn_box_pred = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location * 4, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.BBOX_INITIALIZER,
                    activation_fn=None,
                    padding="VALID",
                    scope=scope_list[2],
                    reuse=reuse_flag)
                rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
                rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])

                fpn_cls_score.append(rpn_cls_score)
                fpn_box_pred.append(rpn_box_pred)

            fpn_cls_score = tf.concat(fpn_cls_score,
                                      axis=0,
                                      name='fpn_cls_score')
            fpn_box_pred = tf.concat(fpn_box_pred, axis=0, name='fpn_box_pred')
            fpn_cls_prob = slim.softmax(fpn_cls_score, scope='fpn_cls_prob')

        # 3. generate_anchors
        all_anchors = []
        mask_gt_list = []
        for i in range(len(cfgs.LEVLES)):
            level_name, p = cfgs.LEVLES[i], P_list[i]

            p_h, p_w = tf.shape(p)[1], tf.shape(p)[2]
            if cfgs.USE_SUPERVISED_MASK and i < len(
                    mask_list) and self.is_training:
                if cfgs.MASK_TYPE.strip() == 'h':
                    mask = tf.py_func(
                        mask_utils.make_gt_mask,
                        [p_h, p_w, img_shape[1], img_shape[2], gtboxes_batch],
                        Tout=tf.int32)
                elif cfgs.MASK_TYPE.strip() == 'r':
                    mask = tf.py_func(mask_utils.make_r_gt_mask, [
                        p_h, p_w, img_shape[1], img_shape[2], gtboxes_r_batch
                    ],
                                      Tout=tf.int32)
                if cfgs.BINARY_MASK:
                    mask = tf.where(tf.greater(mask, 0), tf.ones_like(mask),
                                    tf.zeros_like(mask))
                mask_gt_list.append(mask)
                mask_utils.vis_mask_tfsmry(mask, name="MASK/%s" % level_name)

            featuremap_height = tf.cast(p_h, tf.float32)
            featuremap_width = tf.cast(p_w, tf.float32)
            anchors = anchor_utils.make_anchors(
                base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[i],
                anchor_scales=cfgs.ANCHOR_SCALES,
                anchor_ratios=cfgs.ANCHOR_RATIOS,
                featuremap_height=featuremap_height,
                featuremap_width=featuremap_width,
                stride=cfgs.ANCHOR_STRIDE_LIST[i],
                name="make_anchors_for%s" % level_name)
            all_anchors.append(anchors)
        all_anchors = tf.concat(all_anchors, axis=0, name='all_anchors_of_FPN')

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_FPN'):
            rois, roi_scores = postprocess_rpn_proposals(
                rpn_bbox_pred=fpn_box_pred,
                rpn_cls_prob=fpn_cls_prob,
                img_shape=img_shape,
                anchors=all_anchors,
                is_training=self.is_training)

        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                fpn_labels, fpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_batch, img_shape, all_anchors],
                        [tf.float32, tf.float32])
                fpn_bbox_targets = tf.reshape(fpn_bbox_targets, [-1, 4])
                fpn_labels = tf.to_int32(fpn_labels, name="to_int32")
                fpn_labels = tf.reshape(fpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch,
                                         all_anchors,
                                         fpn_labels,
                                         method=0)

            # --------------------------------------add smry-----------------------------------------------------------

            fpn_cls_category = tf.argmax(fpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(fpn_labels, -1)),
                                    [-1])
            fpn_cls_category = tf.gather(fpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(fpn_cls_category,
                             tf.to_int64(tf.gather(fpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/fpn_accuracy', acc)

            with tf.control_dependencies([fpn_labels]):
                with tf.variable_scope('sample_RCNN_minibatch'):
                    rois, labels, bbox_targets = \
                        tf.py_func(proposal_target_layer,
                                   [rois, gtboxes_batch, gtboxes_r_batch],
                                   [tf.float32, tf.float32, tf.float32])
                    rois = tf.reshape(rois, [-1, 4])
                    labels = tf.to_int32(labels)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets = tf.reshape(bbox_targets,
                                              [-1, 5 * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry(input_img_batch,
                                                rois,
                                                labels,
                                                method=0)

        if not cfgs.USE_CONCAT:
            if self.is_training:
                rois_list, labels, bbox_targets = self.assign_levels(
                    all_rois=rois, labels=labels, bbox_targets=bbox_targets)
            else:
                rois_list = self.assign_levels(
                    all_rois=rois
                )  # rois_list: [P2_rois, P3_rois, P4_rois, P5_rois]

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        if not cfgs.USE_CONCAT:
            bbox_pred, cls_score = self.build_fastrcnn(P_list=P_list,
                                                       rois_list=rois_list,
                                                       img_shape=img_shape)
            rois = tf.concat(rois_list, axis=0, name='concat_rois')
        else:
            bbox_pred, cls_score = self.build_concat_fastrcnn(
                P_list=P_list, all_rois=rois, img_shape=img_shape)

        cls_prob = slim.softmax(cls_score, 'cls_prob')

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            cls_category = tf.argmax(cls_prob, axis=1)
            fast_acc = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc', fast_acc)

        #  6. postprocess_fastrcnn
        if self.is_training:
            self.build_loss(
                rpn_box_pred=fpn_box_pred,
                rpn_bbox_targets=fpn_bbox_targets,
                rpn_cls_score=fpn_cls_score,
                rpn_labels=fpn_labels,
                bbox_pred=bbox_pred,
                bbox_targets=bbox_targets,
                cls_score=cls_score,
                labels=labels,
                mask_list=mask_list if cfgs.USE_SUPERVISED_MASK else None,
                mask_gt_list=mask_gt_list
                if cfgs.USE_SUPERVISED_MASK else None)

        final_bbox, final_scores, final_category = self.postprocess_fastrcnn(
            rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, gpu_id=gpu_id)
        if self.is_training:
            return final_bbox, final_scores, final_category, self.loss_dict
        else:
            return final_bbox, final_scores, final_category
Esempio n. 5
0
    def build_whole_detection_network(self, input_img_batch, gtboxes_batch):

        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        P_dict = self.build_base_network(input_img_batch)  # [P2, P3, P4, P5, P6]

        # 2. build fpn  by build rpn for each level
        with tf.variable_scope("build_FPN", regularizer=slim.l2_regularizer(cfgs.WEIGHT_DECAY)):
            fpn_box_delta = {}
            fpn_cls_score = {}
            fpn_cls_prob = {}
            for key in cfgs.LEVLES:
                if cfgs.SHARE_HEADS:
                    reuse_flag = None if key == cfgs.LEVLES[0] else True
                    scope_list = ['fpn_conv/3x3', 'fpn_cls_score', 'fpn_bbox_pred']
                else:
                    reuse_flag = None
                    scope_list= ['fpn_conv/3x3_%s' % key, 'fpn_cls_score_%s' % key, 'fpn_bbox_pred_%s' % key]
                rpn_conv3x3 = slim.conv2d(
                    P_dict[key], 512, [3, 3],
                    trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, padding="SAME",
                    activation_fn=tf.nn.relu,
                    scope=scope_list[0],
                    reuse=reuse_flag)
                rpn_cls_score = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location*2, [1, 1], stride=1,
                                            trainable=self.is_training, weights_initializer=cfgs.INITIALIZER,
                                            activation_fn=None, padding="VALID",
                                            scope=scope_list[1],
                                            reuse=reuse_flag)
                rpn_box_delta = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location*4, [1, 1], stride=1,
                                            trainable=self.is_training, weights_initializer=cfgs.BBOX_INITIALIZER,
                                            activation_fn=None, padding="VALID",
                                            scope=scope_list[2],
                                            reuse=reuse_flag)
                fpn_box_delta[key] = tf.reshape(rpn_box_delta, [-1, 4])
                fpn_cls_score[key] = tf.reshape(rpn_cls_score, [-1, 2])
                fpn_cls_prob[key] = slim.softmax(fpn_cls_score[key])

        # 3. generate anchors for fpn. (by generate for each level)
        anchors_dict = {}
        anchor_list = []
        with tf.name_scope("generate_FPN_anchors"):
            for key in cfgs.LEVLES:
                p_h, p_w = tf.to_float(tf.shape(P_dict[key])[1]), tf.to_float(tf.shape(P_dict[key])[2])
                id_ = int(key[-1]) - int(cfgs.LEVLES[0][-1]) # such as : 2-2, 3-3
                tmp_anchors = anchor_utils.make_anchors(base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[id_],
                                                        anchor_scales=cfgs.ANCHOR_SCALES,
                                                        anchor_ratios=cfgs.ANCHOR_RATIOS,
                                                        stride=cfgs.ANCHOR_STRIDE_LIST[id_],
                                                        featuremap_height=p_h,
                                                        featuremap_width=p_w,
                                                        name='%s_make_anchors' % key)
                anchors_dict[key] = tmp_anchors
                anchor_list.append(tmp_anchors)
        all_anchors = tf.concat(anchor_list, axis=0)

        # 4. postprocess fpn proposals. such as: decode, clip, NMS
        #    Need to Note: Here we NMS for each level instead of NMS for all anchors.
        rois_list = []
        rois_scores_list = []
        with tf.name_scope("postproces_fpn"):
            for key in cfgs.LEVLES:
                tmp_rois, tmp_roi_scores = postprocess_rpn_proposals(rpn_bbox_pred=fpn_box_delta[key],
                                                                     rpn_cls_prob=fpn_cls_prob[key],
                                                                     img_shape=img_shape,
                                                                     anchors=anchors_dict[key],
                                                                     is_training=self.is_training)
                rois_list.append(tmp_rois)
                rois_scores_list.append(tmp_roi_scores)
            allrois = tf.concat(rois_list, axis=0)
            allrois_scores = tf.concat(rois_scores_list, axis=0)
            fpn_topk = cfgs.FPN_TOP_K_PER_LEVEL_TRAIN if self.is_training else cfgs.FPN_TOP_K_PER_LEVEL_TEST
            topk = tf.minimum(fpn_topk, tf.shape(allrois)[0])

            rois_scores, topk_indices = tf.nn.top_k(allrois_scores, k=topk)

            rois = tf.stop_gradient(tf.gather(allrois, topk_indices))
            rois_scores = tf.stop_gradient(rois_scores)

            # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++
            if self.is_training:
                score_gre_05 = tf.reshape(tf.where(tf.greater_equal(rois_scores, 0.5)), [-1])
                score_gre_05_rois = tf.gather(rois, score_gre_05)
                score_gre_05_score = tf.gather(rois_scores, score_gre_05)
                score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores(img_batch=input_img_batch,
                                                                                boxes=score_gre_05_rois,
                                                                                scores=score_gre_05_score)
                tf.summary.image('score_greater_05_rois', score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        # sample for fpn. We should concat all the anchors
        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                fpn_labels, fpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_batch, img_shape, all_anchors],
                        [tf.float32, tf.float32])
                fpn_bbox_targets = tf.reshape(fpn_bbox_targets, [-1, 4])
                fpn_labels = tf.to_int32(fpn_labels, name="to_int32")
                fpn_labels = tf.reshape(fpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch, all_anchors, fpn_labels)

            with tf.control_dependencies([fpn_labels]):
                with tf.variable_scope('sample_RCNN_minibatch'):
                    rois, labels, bbox_targets = \
                    tf.py_func(proposal_target_layer,
                               [rois, gtboxes_batch],
                               [tf.float32, tf.float32, tf.float32])
                    rois = tf.reshape(rois, [-1, 4])
                    labels = tf.to_int32(labels)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets = tf.reshape(bbox_targets, [-1, 4*(cfgs.CLASS_NUM+1)])
                    self.add_roi_batch_img_smry(input_img_batch, rois, labels)
        if self.is_training:
            rois_list, labels, bbox_targets = self.assign_levels(all_rois=rois,
                                                                 labels=labels,
                                                                 bbox_targets=bbox_targets)
        else:
            rois_list = self.assign_levels(all_rois=rois)  # rois_list: [P2_rois, P3_rois, P4_rois, P5_rois]

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)
        bbox_pred, cls_score = self.build_fastrcnn(P_list=[P_dict[key] for key in cfgs.LEVLES],
                                                   rois_list=rois_list,
                                                   img_shape=img_shape)
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        cls_prob = slim.softmax(cls_score, 'cls_prob')


        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            cls_category = tf.argmax(cls_prob, axis=1)
            fast_acc = tf.reduce_mean(tf.to_float(tf.equal(cls_category, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc', fast_acc)

        rois = tf.concat(rois_list, axis=0, name='concat_rois')
        #  6. postprocess_fastrcnn
        if not self.is_training:
            return self.postprocess_fastrcnn(rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, img_shape=img_shape)
        else:
            '''
            when trian. We need build Loss
            '''
            loss_dict = self.build_loss(rpn_box_pred=tf.concat([fpn_box_delta[key] for key in cfgs.LEVLES], axis=0),
                                        rpn_bbox_targets=fpn_bbox_targets,
                                        rpn_cls_score=tf.concat([fpn_cls_score[key] for key in cfgs.LEVLES], axis=0),
                                        rpn_labels=fpn_labels,
                                        bbox_pred=bbox_pred,
                                        bbox_targets=bbox_targets,
                                        cls_score=cls_score,
                                        labels=labels)

            final_bbox, final_scores, final_category = self.postprocess_fastrcnn(rois=rois,
                                                                                 bbox_ppred=bbox_pred,
                                                                                 scores=cls_prob,
                                                                                 img_shape=img_shape)
            return final_bbox, final_scores, final_category, loss_dict
Esempio n. 6
0
    def build_whole_detection_network(self, input_img_batch, gtboxes_batch):

        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        P_list = self.build_base_network(input_img_batch)

        # 2. build rpn
        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):

            fpn_cls_score = []
            fpn_box_pred = []
            for level_name, p in zip(cfgs.LEVLES, P_list):
                if cfgs.SHARE_HEADS:
                    reuse_flag = None if level_name == cfgs.LEVLES[0] else True
                    scope_list = [
                        'rpn_conv/3x3', 'rpn_cls_score', 'rpn_bbox_pred'
                    ]
                else:
                    reuse_flag = None
                    scope_list = [
                        'rpn_conv/3x3_%s' % level_name,
                        'rpn_cls_score_%s' % level_name,
                        'rpn_bbox_pred_%s' % level_name
                    ]
                rpn_conv3x3 = slim.conv2d(p,
                                          512, [3, 3],
                                          trainable=self.is_training,
                                          weights_initializer=cfgs.INITIALIZER,
                                          padding="SAME",
                                          activation_fn=tf.nn.relu,
                                          scope=scope_list[0],
                                          reuse=reuse_flag)
                rpn_cls_score = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location * 2, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.INITIALIZER,
                    activation_fn=None,
                    padding="VALID",
                    scope=scope_list[1],
                    reuse=reuse_flag)
                rpn_box_pred = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location * 4, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.BBOX_INITIALIZER,
                    activation_fn=None,
                    padding="VALID",
                    scope=scope_list[2],
                    reuse=reuse_flag)
                rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
                rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])

                fpn_cls_score.append(rpn_cls_score)
                fpn_box_pred.append(rpn_box_pred)

            fpn_cls_score = tf.concat(fpn_cls_score,
                                      axis=0,
                                      name='fpn_cls_score')
            fpn_box_pred = tf.concat(fpn_box_pred, axis=0, name='fpn_box_pred')
            fpn_cls_prob = slim.softmax(fpn_cls_score, scope='fpn_cls_prob')

        # 3. generate_anchors
        all_anchors = []
        for i in range(len(cfgs.LEVLES)):
            level_name, p = cfgs.LEVLES[i], P_list[i]

            p_h, p_w = tf.shape(p)[1], tf.shape(p)[2]

            featuremap_height = tf.cast(p_h, tf.float32)
            featuremap_width = tf.cast(p_w, tf.float32)
            anchors = anchor_utils.make_anchors(
                base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[i],
                anchor_scales=cfgs.ANCHOR_SCALES,
                anchor_ratios=cfgs.ANCHOR_RATIOS,
                featuremap_height=featuremap_height,
                featuremap_width=featuremap_width,
                stride=cfgs.ANCHOR_STRIDE_LIST[i],
                name="make_anchors_for%s" % level_name)
            all_anchors.append(anchors)
        all_anchors = tf.concat(all_anchors, axis=0, name='all_anchors_of_FPN')

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_FPN'):
            rois, roi_scores = postprocess_rpn_proposals(
                rpn_bbox_pred=fpn_box_pred,
                rpn_cls_prob=fpn_cls_prob,
                img_shape=img_shape,
                anchors=all_anchors,
                is_training=self.is_training)
            # rois shape [-1, 4]
            # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++

            if self.is_training:
                score_gre_05 = tf.reshape(
                    tf.where(tf.greater_equal(roi_scores, 0.5)), [-1])
                score_gre_05_rois = tf.gather(rois, score_gre_05)
                score_gre_05_score = tf.gather(roi_scores, score_gre_05)
                score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores(
                    img_batch=input_img_batch,
                    boxes=score_gre_05_rois,
                    scores=score_gre_05_score)
                tf.summary.image('score_greater_05_rois', score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                fpn_labels, fpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_batch, img_shape, all_anchors],
                        [tf.float32, tf.float32])
                fpn_bbox_targets = tf.reshape(fpn_bbox_targets, [-1, 4])
                fpn_labels = tf.to_int32(fpn_labels, name="to_int32")
                fpn_labels = tf.reshape(fpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch, all_anchors,
                                         fpn_labels)

            # --------------------------------------add smry-----------------------------------------------------------

            fpn_cls_category = tf.argmax(fpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(fpn_labels, -1)),
                                    [-1])
            fpn_cls_category = tf.gather(fpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(fpn_cls_category,
                             tf.to_int64(tf.gather(fpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/fpn_accuracy', acc)

        # cascade rcnn
        total_loss_dict = {}
        cascade_bbox_pred = []
        cascade_cls_prob = []
        cascade_rois = []
        fg_thresholds = [0.5, 0.6, 0.7]
        for i in range(len(fg_thresholds)):
            if self.is_training:
                rois, bbox_pred, cls_prob, loss_dict = self.cascade_rcnn(
                    rois,
                    gtboxes_batch,
                    input_img_batch,
                    P_list,
                    img_shape,
                    fg_thresholds[i],
                    fpn_box_pred,
                    fpn_bbox_targets,
                    fpn_cls_score,
                    fpn_labels,
                    stage=i + 1)

                for k in loss_dict.keys():
                    if k not in total_loss_dict.keys():
                        total_loss_dict[k] = loss_dict[k]
                    else:
                        total_loss_dict[k] += loss_dict[k]
            else:
                rois, bbox_pred, cls_prob = self.cascade_rcnn(rois,
                                                              gtboxes_batch,
                                                              input_img_batch,
                                                              P_list,
                                                              img_shape,
                                                              fg_thresholds[i],
                                                              fpn_box_pred,
                                                              None,
                                                              fpn_cls_score,
                                                              None,
                                                              stage=i + 1)
            cascade_bbox_pred.append(bbox_pred)
            cascade_cls_prob.append(cls_prob)
            cascade_rois.append(rois)

        final_bbox, final_scores, final_category = self.postprocess_fastrcnn(
            rois=cascade_rois[-1],
            bbox_ppred=cascade_bbox_pred[-1],
            scores=cascade_cls_prob[-1],
            img_shape=img_shape)

        if self.is_training:
            return final_bbox, final_scores, final_category, total_loss_dict
        else:
            return final_bbox, final_scores, final_category
Esempio n. 7
0
    def build_whole_detection_network(self, input_img_batch, gtboxes_batch):

        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1.0 build base network
        rpn_base = self.build_base_network(input_img_batch)

        # 1.1 build the head_base network
        if self.base_network_name.startswith('resnet_v1'):
            rfcn_base = resnet.restnet_head(rpn_base,
                                            scope_name=self.base_network_name,
                                            is_training=self.is_training)

        elif self.base_network_name.startswith('MobilenetV2'):
            rfcn_base = mobilenet_v2.mobilenetv2_head(
                rpn_base, is_training=self.is_training)

        else:
            raise ValueError('Sorry, we only support resnet or mobilenet_v2')

        # 2. build rpn head
        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):

            rpn_conv3x3 = slim.conv2d(rpn_base,
                                      128, [3, 3],
                                      trainable=self.is_training,
                                      weights_initializer=cfgs.INITIALIZER,
                                      activation_fn=tf.nn.relu,
                                      scope='rpn_conv/3x3')
            rpn_cls_score = slim.conv2d(rpn_conv3x3,
                                        self.num_anchors_per_location * 2,
                                        [1, 1],
                                        stride=1,
                                        trainable=self.is_training,
                                        weights_initializer=cfgs.INITIALIZER,
                                        activation_fn=None,
                                        scope='rpn_cls_score')
            rpn_box_pred = slim.conv2d(
                rpn_conv3x3,
                self.num_anchors_per_location * 4, [1, 1],
                stride=1,
                trainable=self.is_training,
                weights_initializer=cfgs.BBOX_INITIALIZER,
                activation_fn=None,
                scope='rpn_bbox_pred')
            rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob')

        # 3. generate_anchors
        featuremap_height, featuremap_width = tf.shape(rpn_base)[1], tf.shape(
            rpn_base)[2]
        featuremap_height = tf.cast(featuremap_height, tf.float32)
        featuremap_width = tf.cast(featuremap_width, tf.float32)

        anchors = anchor_utils.make_anchors(
            base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0],
            anchor_scales=cfgs.ANCHOR_SCALES,
            anchor_ratios=cfgs.ANCHOR_RATIOS,
            featuremap_height=featuremap_height,
            featuremap_width=featuremap_width,
            stride=cfgs.ANCHOR_STRIDE,
            name="make_anchors_forRPN")

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_RPN'):
            rois, roi_scores = postprocess_rpn_proposals(
                rpn_bbox_pred=rpn_box_pred,
                rpn_cls_prob=rpn_cls_prob,
                img_shape=img_shape,
                anchors=anchors,
                is_training=self.is_training)
            # rois shape [-1, 4]
            # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++

            if self.is_training:
                rois_in_img = show_box_in_tensor.draw_boxes_with_scores(
                    img_batch=input_img_batch, boxes=rois, scores=roi_scores)
                tf.summary.image('all_rpn_rois', rois_in_img)

                score_gre_05 = tf.reshape(
                    tf.where(tf.greater_equal(roi_scores, 0.5)), [-1])
                score_gre_05_rois = tf.gather(rois, score_gre_05)
                score_gre_05_score = tf.gather(roi_scores, score_gre_05)
                score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores(
                    img_batch=input_img_batch,
                    boxes=score_gre_05_rois,
                    scores=score_gre_05_score)
                tf.summary.image('score_greater_05_rois', score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                rpn_labels, rpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_batch, img_shape, anchors],
                        [tf.float32, tf.float32])
                rpn_bbox_targets = tf.reshape(rpn_bbox_targets, [-1, 4])
                rpn_labels = tf.to_int32(rpn_labels, name="to_int32")
                rpn_labels = tf.reshape(rpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch, anchors, rpn_labels)

            # --------------------------------------add smry----------------------------------------------------------

            rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)),
                                    [-1])
            rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(rpn_cls_category,
                             tf.to_int64(tf.gather(rpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/rpn_accuracy', acc)

            with tf.control_dependencies([rpn_labels]):
                with tf.variable_scope('sample_RFCN_minibatch'):
                    rois, labels, bbox_targets = \
                    tf.py_func(proposal_target_layer,
                               [rois, gtboxes_batch],
                               [tf.float32, tf.float32, tf.float32])
                    rois = tf.reshape(rois, [-1, 4])
                    labels = tf.to_int32(labels)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets = tf.reshape(bbox_targets,
                                              [-1, 4 * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry(input_img_batch, rois, labels)

        # -------------------------------------------------------------------------------------------------------------#
        #                                                          RFCN                                                #
        # -------------------------------------------------------------------------------------------------------------#
        # 5. build rfcn head
        bbox_pred, cls_score = self.build_rfcn_head(
            rfcn_base=rfcn_base,
            rois=rois,
            img_shape=img_shape,
            bin_nums=[3, 3],
            crop_size=[9, 9])  # crop_size is the total size
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        cls_prob = slim.softmax(cls_score, 'cls_prob')

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            cls_category = tf.argmax(cls_prob, axis=1)
            rfcn_acc = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category, tf.to_int64(labels))))
            tf.summary.scalar('ACC/rfcn_acc', rfcn_acc)

        #  6. postprocess_rfcn
        if not self.is_training:
            return self.postprocess_rfcn(rois=rois,
                                         bbox_ppred=bbox_pred,
                                         scores=cls_prob,
                                         img_shape=img_shape)
        else:
            '''
            when trian. We need build Loss
            '''
            loss_dict = self.build_loss(rpn_box_pred=rpn_box_pred,
                                        rpn_bbox_targets=rpn_bbox_targets,
                                        rpn_cls_score=rpn_cls_score,
                                        rpn_labels=rpn_labels,
                                        bbox_pred=bbox_pred,
                                        bbox_targets=bbox_targets,
                                        cls_score=cls_score,
                                        labels=labels)

            final_bbox, final_scores, final_category = self.postprocess_rfcn(
                rois=rois,
                bbox_ppred=bbox_pred,
                scores=cls_prob,
                img_shape=img_shape)
            return final_bbox, final_scores, final_category, loss_dict
    def build_whole_detection_network(self, input_img_batch, gtboxes_r_batch,
                                      gtboxes_h_batch, mask_batch):

        if self.is_training:
            # ensure shape is [M, 5] and [M, 6]
            gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6])
            gtboxes_h_batch = tf.reshape(gtboxes_h_batch, [-1, 5])
            gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32)
            gtboxes_h_batch = tf.cast(gtboxes_h_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        if cfgs.ADD_ATTENTION:
            feature_to_cropped, C4_attention_layer = self.build_base_network(
                input_img_batch)
            # feature_to_cropped_shape = tf.shape(feature_to_cropped)
            # feature_to_cropped = tf.image.resize_bilinear(feature_to_cropped,
            #                                               (feature_to_cropped_shape[1] * 2,
            #                                                feature_to_cropped_shape[2] * 2))
        else:
            feature_to_cropped = self.build_base_network(input_img_batch)

        if cfgs.ADD_ATTENTION and self.is_training:
            with tf.variable_scope('build_attention_loss',
                                   regularizer=slim.l2_regularizer(
                                       cfgs.WEIGHT_DECAY)):

                attention_loss_c4 = losses.build_attention_loss(
                    mask_batch, C4_attention_layer)
                attention_loss = attention_loss_c4

        rpn_input = feature_to_cropped

        # 2. build rpn
        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):

            rpn_cls_score, rpn_box_pred = build_rpn(
                rpn_input, self.num_anchors_per_location, self.is_training)

            rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob')

        # 3. generate_anchors
        featuremap_height, featuremap_width = tf.shape(
            feature_to_cropped)[1], tf.shape(feature_to_cropped)[2]
        featuremap_height = tf.cast(featuremap_height, tf.float32)
        featuremap_width = tf.cast(featuremap_width, tf.float32)

        anchors = anchor_utils.make_anchors(
            base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0],
            anchor_scales=cfgs.ANCHOR_SCALES,
            anchor_ratios=cfgs.ANCHOR_RATIOS,
            featuremap_height=featuremap_height,
            featuremap_width=featuremap_width,
            stride=cfgs.ANCHOR_STRIDE,
            name="make_anchors_forRPN")

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_RPN'):
            # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2])
            # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob')
            # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rois, roi_scores = postprocess_rpn_proposals(
                rpn_bbox_pred=rpn_box_pred,
                rpn_cls_prob=rpn_cls_prob,
                img_shape=img_shape,
                anchors=anchors,
                is_training=self.is_training)
            # rois shape [-1, 4]
            # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++

            if self.is_training:
                rois_in_img = show_box_in_tensor.draw_boxes_with_categories(
                    img_batch=input_img_batch, boxes=rois, scores=roi_scores)
                tf.summary.image('all_rpn_rois', rois_in_img)

                score_gre_05 = tf.reshape(
                    tf.where(tf.greater_equal(roi_scores, 0.5)), [-1])
                score_gre_05_rois = tf.gather(rois, score_gre_05)
                score_gre_05_score = tf.gather(roi_scores, score_gre_05)
                score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_categories(
                    img_batch=input_img_batch,
                    boxes=score_gre_05_rois,
                    scores=score_gre_05_score)
                tf.summary.image('score_greater_05_rois', score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                rpn_labels, rpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_h_batch, img_shape, anchors],
                        [tf.float32, tf.float32])
                rpn_bbox_targets = tf.reshape(rpn_bbox_targets, [-1, 4])
                rpn_labels = tf.to_int32(rpn_labels, name="to_int32")
                rpn_labels = tf.reshape(rpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch, anchors, rpn_labels)

            # --------------------------------------add smry-----------------------------------------------------------

            rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)),
                                    [-1])
            rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(rpn_cls_category,
                             tf.to_int64(tf.gather(rpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/rpn_accuracy', acc)

            with tf.control_dependencies([rpn_labels]):
                with tf.variable_scope('sample_RCNN_minibatch'):
                    rois, labels, bbox_targets_h, bbox_targets_r = \
                    tf.py_func(proposal_target_layer,
                               [rois, gtboxes_h_batch, gtboxes_r_batch],
                               [tf.float32, tf.float32, tf.float32, tf.float32])

                    rois = tf.reshape(rois, [-1, 4])
                    labels = tf.to_int32(labels)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets_h = tf.reshape(bbox_targets_h,
                                                [-1, 4 * (cfgs.CLASS_NUM + 1)])
                    bbox_targets_r = tf.reshape(bbox_targets_r,
                                                [-1, 5 * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry(input_img_batch, rois, labels)

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)
        bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r = build_fastrcnn(
            feature_to_cropped=feature_to_cropped,
            rois=rois,
            img_shape=img_shape,
            base_network_name=self.base_network_name,
            is_training=self.is_training)
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]
        cls_prob_h = slim.softmax(cls_score_h, 'cls_prob_h')
        cls_prob_r = slim.softmax(cls_score_r, 'cls_prob_r')

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            cls_category_h = tf.argmax(cls_prob_h, axis=1)
            fast_acc_h = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category_h, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc_h', fast_acc_h)

            cls_category_r = tf.argmax(cls_prob_r, axis=1)
            fast_acc_r = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category_r, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc_r', fast_acc_r)

        #  6. postprocess_fastrcnn
        if not self.is_training:
            final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h(
                rois=rois,
                bbox_ppred=bbox_pred_h,
                scores=cls_prob_h,
                img_shape=img_shape)
            final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r(
                rois=rois,
                bbox_ppred=bbox_pred_r,
                scores=cls_prob_r,
                img_shape=img_shape)
            return final_boxes_h, final_scores_h, final_category_h, final_boxes_r, final_scores_r, final_category_r
        else:
            '''
            when trian. We need build Loss
            '''
            loss_dict = self.build_loss(rpn_box_pred=rpn_box_pred,
                                        rpn_bbox_targets=rpn_bbox_targets,
                                        rpn_cls_score=rpn_cls_score,
                                        rpn_labels=rpn_labels,
                                        bbox_pred_h=bbox_pred_h,
                                        bbox_targets_h=bbox_targets_h,
                                        cls_score_h=cls_score_h,
                                        bbox_pred_r=bbox_pred_r,
                                        bbox_targets_r=bbox_targets_r,
                                        cls_score_r=cls_score_r,
                                        labels=labels)

            if cfgs.ADD_ATTENTION:
                loss_dict['attention_loss'] = attention_loss

            final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h(
                rois=rois,
                bbox_ppred=bbox_pred_h,
                scores=cls_prob_h,
                img_shape=img_shape)
            final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r(
                rois=rois,
                bbox_ppred=bbox_pred_r,
                scores=cls_prob_r,
                img_shape=img_shape)

            return final_boxes_h, final_scores_h, final_category_h, \
                   final_boxes_r, final_scores_r, final_category_r, loss_dict
Esempio n. 9
0
    def build_whole_detection_network(self, input_img_batch, gtboxes_r_batch,
                                      gtboxes_h_batch):

        if self.is_training:
            # ensure shape is [M, 5] and [M, 6]
            gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6])
            gtboxes_h_batch = tf.reshape(gtboxes_h_batch, [-1, 5])
            gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32)
            gtboxes_h_batch = tf.cast(gtboxes_h_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        C2_, C4 = self.build_base_network(input_img_batch)

        C2 = slim.conv2d(C2_,
                         num_outputs=1024,
                         kernel_size=[1, 1],
                         stride=1,
                         scope='build_C2_to_1024')

        self.feature_pyramid = {'C2': C2, 'C4': C4}

        # 2. build rpn

        rpn_all_encode_boxes = {}
        rpn_all_boxes_scores = {}
        rpn_all_cls_score = {}
        anchors = {}

        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):
            i = 0
            for level in self.level:
                rpn_conv3x3 = slim.conv2d(
                    self.feature_pyramid[level],
                    512, [3, 3],
                    trainable=self.is_training,
                    weights_initializer=cfgs.INITIALIZER,
                    activation_fn=tf.nn.relu,
                    scope='rpn_conv/3x3_{}'.format(level))
                rpn_cls_score = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location[i] * 2, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.INITIALIZER,
                    activation_fn=None,
                    scope='rpn_cls_score_{}'.format(level))
                rpn_box_pred = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location[i] * 4, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.BBOX_INITIALIZER,
                    activation_fn=None,
                    scope='rpn_bbox_pred_{}'.format(level))
                rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
                rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
                rpn_cls_prob = slim.softmax(
                    rpn_cls_score,
                    scope='rpn_cls_prob_{}'.format(level))  # do the softmax

                rpn_all_cls_score[level] = rpn_cls_score
                rpn_all_boxes_scores[level] = rpn_cls_prob  # do the softmax
                rpn_all_encode_boxes[level] = rpn_box_pred
                i += 1

        # 3. generate_anchors
        i = 0
        for level, base_anchor_size, stride in zip(self.level,
                                                   self.base_anchor_size_list,
                                                   self.stride):
            featuremap_height, featuremap_width = tf.shape(
                self.feature_pyramid[level])[1], tf.shape(
                    self.feature_pyramid[level])[2]

            featuremap_height = tf.cast(featuremap_height, tf.float32)
            featuremap_width = tf.cast(featuremap_width, tf.float32)

            #anchor_scale = tf.constant(self.anchor_scales[i], dtype=tf.float32)
            #)anchor_ratio = tf.constant(self.anchor_ratios[i], dtype=tf.float32)
            anchor_scale = self.anchor_scales[i]
            anchor_ratio = self.anchor_ratios[i]

            tmp_anchors = anchor_utils.make_anchors(
                base_anchor_size=base_anchor_size,
                anchor_scales=anchor_scale,
                anchor_ratios=anchor_ratio,
                featuremap_height=featuremap_height,
                featuremap_width=featuremap_width,
                stride=stride,
                name="make_anchors_forRPN_{}".format(level))
            tmp_anchors = tf.reshape(tmp_anchors, [-1, 4])
            anchors[level] = tmp_anchors
            i += 1

        # with tf.variable_scope('make_anchors'):
        #     anchors = anchor_utils.make_anchors(height=featuremap_height,
        #                                         width=featuremap_width,
        #                                         feat_stride=cfgs.ANCHOR_STRIDE[0],
        #                                         anchor_scales=cfgs.ANCHOR_SCALES,
        #                                         anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16
        #                                         )

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        rois = {}
        roi_scores = {}
        with tf.variable_scope('postprocess_RPN'):
            # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2])
            # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob')
            # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            for level in self.level:
                rois_rpn, roi_scores_rpn = postprocess_rpn_proposals(
                    rpn_bbox_pred=rpn_all_encode_boxes[level],
                    rpn_cls_prob=rpn_all_boxes_scores[level],
                    img_shape=img_shape,
                    anchors=anchors[level],
                    is_training=self.is_training)
                # rois[level] = rois
                # roi_scores[level] = roi_scores
                # rois shape [-1, 4]
                # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++
                rois[level] = rois_rpn
                roi_scores[level] = roi_scores_rpn

                if self.is_training:
                    rois_in_img = show_box_in_tensor.draw_boxes_with_categories(
                        img_batch=input_img_batch,
                        boxes=rois_rpn,
                        scores=roi_scores_rpn)
                    tf.summary.image('all_rpn_rois_{}'.format(level),
                                     rois_in_img)

                    score_gre_05 = tf.reshape(
                        tf.where(tf.greater_equal(roi_scores_rpn, 0.5)), [-1])
                    score_gre_05_rois = tf.gather(rois_rpn, score_gre_05)
                    score_gre_05_score = tf.gather(roi_scores_rpn,
                                                   score_gre_05)
                    score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_categories(
                        img_batch=input_img_batch,
                        boxes=score_gre_05_rois,
                        scores=score_gre_05_score)
                    tf.summary.image('score_greater_05_rois_{}'.format(level),
                                     score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        rpn_labels = {}
        rpn_bbox_targets = {}
        labels_all = []
        labels = {}
        bbox_targets_h = {}
        bbox_targets_r = {}
        bbox_targets_all_h = []
        bbox_targets_all_r = []

        if self.is_training:
            for level in self.level:
                with tf.variable_scope(
                        'sample_anchors_minibatch_{}'.format(level)):
                    rpn_labels_one, rpn_bbox_targets_one = \
                        tf.py_func(
                            anchor_target_layer,
                            [gtboxes_h_batch, img_shape, anchors[level]],
                            [tf.float32, tf.float32])
                    rpn_bbox_targets_one = tf.reshape(rpn_bbox_targets_one,
                                                      [-1, 4])
                    rpn_labels_one = tf.to_int32(
                        rpn_labels_one, name="to_int32_{}".format(level))
                    rpn_labels_one = tf.reshape(rpn_labels_one, [-1])
                    self.add_anchor_img_smry(input_img_batch, anchors[level],
                                             rpn_labels_one)

                    # -----------------------------add to the dict-------------------------------------------------------------
                    rpn_labels[level] = rpn_labels_one
                    rpn_bbox_targets[level] = rpn_bbox_targets_one
                # --------------------------------------add smry-----------------------------------------------------------

                rpn_cls_category = tf.argmax(rpn_all_boxes_scores[level],
                                             axis=1)
                kept_rpppn = tf.reshape(
                    tf.where(tf.not_equal(rpn_labels_one, -1)), [-1])
                rpn_cls_category = tf.gather(rpn_cls_category,
                                             kept_rpppn)  # 预测
                acc = tf.reduce_mean(
                    tf.to_float(
                        tf.equal(
                            rpn_cls_category,
                            tf.to_int64(tf.gather(rpn_labels_one,
                                                  kept_rpppn)))))
                tf.summary.scalar('ACC/rpn_accuracy_{}'.format(level), acc)

                with tf.control_dependencies([rpn_labels[level]]):
                    with tf.variable_scope(
                            'sample_RCNN_minibatch_{}'.format(level)):
                        rois_, labels_, bbox_targets_h_, bbox_targets_r_ = \
                        tf.py_func(proposal_target_layer,
                                   [rois[level], gtboxes_h_batch, gtboxes_r_batch],
                                   [tf.float32, tf.float32, tf.float32, tf.float32])

                        rois_fast = tf.reshape(rois_, [-1, 4])
                        labels_fast = tf.to_int32(labels_)
                        labels_fast = tf.reshape(labels_fast, [-1])
                        bbox_targets_h_fast = tf.reshape(
                            bbox_targets_h_, [-1, 4 * (cfgs.CLASS_NUM + 1)])
                        bbox_targets_r_fast = tf.reshape(
                            bbox_targets_r_, [-1, 5 * (cfgs.CLASS_NUM + 1)])
                        self.add_roi_batch_img_smry(input_img_batch, rois_fast,
                                                    labels_fast)
                        #----------------------new_add----------------------
                        rois[level] = rois_fast
                        labels[level] = labels_fast
                        bbox_targets_h[level] = bbox_targets_h_fast
                        bbox_targets_r[level] = bbox_targets_r_fast
                        labels_all.append(labels_fast)
                        bbox_targets_all_h.append(bbox_targets_h_fast)
                        bbox_targets_all_r.append(bbox_targets_r_fast)

            fast_labels = tf.concat(labels_all, axis=0)
            fast_bbox_targets_h = tf.concat(bbox_targets_all_h, axis=0)
            fast_bbox_targets_r = tf.concat(bbox_targets_all_r, axis=0)
        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)

        bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r = self.build_fastrcnn(
            feature_to_cropped=self.feature_pyramid,
            rois_all=rois,
            img_shape=img_shape)

        # 这里的feature_to_cropped是feature maps 特征图
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        cls_prob_h = slim.softmax(cls_score_h,
                                  'cls_prob_h')  # 根据代码可看到水平和旋转的处理过程是分开的
        cls_prob_r = slim.softmax(cls_score_r, 'cls_prob_r')

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:

            cls_category_h = tf.argmax(cls_prob_h, axis=1)
            fast_acc_h = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category_h,
                                     tf.to_int64(fast_labels))))
            tf.summary.scalar('ACC/fast_acc_h', fast_acc_h)

            cls_category_r = tf.argmax(cls_prob_r, axis=1)
            fast_acc_r = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category_r,
                                     tf.to_int64(fast_labels))))
            tf.summary.scalar('ACC/fast_acc_r', fast_acc_r)

        #  6. postprocess_fastrcnn
        if not self.is_training:

            rois_all = []
            for level in self.level:
                rois_all.append(rois[level])
            rois = tf.concat(rois_all, axis=0)

            final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h(
                rois=rois,
                bbox_ppred=bbox_pred_h,
                scores=cls_prob_h,
                img_shape=img_shape)
            final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r(
                rois=rois,
                bbox_ppred=bbox_pred_r,
                scores=cls_prob_r,
                img_shape=img_shape)
            return final_boxes_h, final_scores_h, final_category_h, final_boxes_r, final_scores_r, final_category_r
        else:
            '''
            when trian. We need build Loss
            '''
            loss_dict = self.build_loss(rpn_box_pred=rpn_all_encode_boxes,
                                        rpn_bbox_targets=rpn_bbox_targets,
                                        rpn_cls_score=rpn_all_cls_score,
                                        rpn_labels=rpn_labels,
                                        bbox_pred_h=bbox_pred_h,
                                        bbox_targets_h=fast_bbox_targets_h,
                                        cls_score_h=cls_score_h,
                                        bbox_pred_r=bbox_pred_r,
                                        bbox_targets_r=fast_bbox_targets_r,
                                        cls_score_r=cls_score_r,
                                        labels=fast_labels)
            rois_all = []
            for level in self.level:
                rois_all.append(rois[level])
            rois = tf.concat(rois_all, axis=0)

            final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h(
                rois=rois,
                bbox_ppred=bbox_pred_h,
                scores=cls_prob_h,
                img_shape=img_shape)
            final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r(
                rois=rois,
                bbox_ppred=bbox_pred_r,
                scores=cls_prob_r,
                img_shape=img_shape)

            return final_boxes_h, final_scores_h, final_category_h, \
                   final_boxes_r, final_scores_r, final_category_r, loss_dict
    def build_whole_detection_network(self, input_img_batch, gtboxes_r_batch,
                                      gtboxes_h_batch):

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        feature_to_cropped = self.build_base_network(input_img_batch)

        # 2. build rpn
        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):

            rpn_conv3x3 = slim.conv2d(feature_to_cropped,
                                      512, [3, 3],
                                      trainable=self.is_training,
                                      weights_initializer=cfgs.INITIALIZER,
                                      activation_fn=tf.nn.relu,
                                      scope='rpn_conv/3x3')
            rpn_cls_score = slim.conv2d(rpn_conv3x3,
                                        self.num_anchors_per_location * 2,
                                        [1, 1],
                                        stride=1,
                                        trainable=self.is_training,
                                        weights_initializer=cfgs.INITIALIZER,
                                        activation_fn=None,
                                        scope='rpn_cls_score')
            rpn_box_pred = slim.conv2d(
                rpn_conv3x3,
                self.num_anchors_per_location * 4, [1, 1],
                stride=1,
                trainable=self.is_training,
                weights_initializer=cfgs.BBOX_INITIALIZER,
                activation_fn=None,
                scope='rpn_bbox_pred')
            rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob')

        # 3. generate_anchors
        featuremap_height, featuremap_width = tf.shape(
            feature_to_cropped)[1], tf.shape(feature_to_cropped)[2]
        featuremap_height = tf.cast(featuremap_height, tf.float32)
        featuremap_width = tf.cast(featuremap_width, tf.float32)

        anchors = anchor_utils.make_anchors(
            base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0],
            anchor_scales=cfgs.ANCHOR_SCALES,
            anchor_ratios=cfgs.ANCHOR_RATIOS,
            featuremap_height=featuremap_height,
            featuremap_width=featuremap_width,
            stride=cfgs.ANCHOR_STRIDE,
            name="make_anchors_forRPN")

        # with tf.variable_scope('make_anchors'):
        #     anchors = anchor_utils.make_anchors(height=featuremap_height,
        #                                         width=featuremap_width,
        #                                         feat_stride=cfgs.ANCHOR_STRIDE[0],
        #                                         anchor_scales=cfgs.ANCHOR_SCALES,
        #                                         anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16
        #                                         )

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_RPN'):
            # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2])
            # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob')
            # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rois, roi_scores = postprocess_rpn_proposals(
                rpn_bbox_pred=rpn_box_pred,
                rpn_cls_prob=rpn_cls_prob,
                img_shape=img_shape,
                anchors=anchors,
                is_training=self.is_training)
            # rois shape [-1, 4]

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)
        bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r = self.build_fastrcnn(
            feature_to_cropped=feature_to_cropped,
            rois=rois,
            img_shape=img_shape)
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        cls_prob_h = slim.softmax(cls_score_h, 'cls_prob_h')
        cls_prob_r = slim.softmax(cls_score_r, 'cls_prob_r')

        # ----------------------------------------------add smry-------------------------------------------------------

        #  6. postprocess_fastrcnn
        final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h(
            rois=rois,
            bbox_ppred=bbox_pred_h,
            scores=cls_prob_h,
            img_shape=img_shape)
        final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r(
            rois=rois,
            bbox_ppred=bbox_pred_r,
            scores=cls_prob_r,
            img_shape=img_shape)
        return final_boxes_h, final_scores_h, final_category_h, final_boxes_r, final_scores_r, final_category_r
Esempio n. 11
0
    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_r_batch,
                                      gtboxes_h_batch,
                                      gtboxes_binary_map=None):

        if self.is_training:
            # ensure shape is [M, 5] and [M, cfgs.RBB_LEN]
            gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6])
            gtboxes_h_batch = tf.reshape(gtboxes_h_batch, [-1, 5])
            gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32)
            gtboxes_h_batch = tf.cast(gtboxes_h_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        feature_to_cropped = self.build_base_network(input_img_batch)
        self.input_img_batch = input_img_batch
        add_heatmap(feature_to_cropped, 'img/feature_map_before')

        # 1.5 build PAN network

        if cfgs.USE_PAN:
            pan = build_pan.PAN(base_network_name=cfgs.NET_NAME,
                                is_training=self.is_training,
                                is_only_pan=False)
            if self.is_training:
                saliency_mask, pan_loss, binary_map_resize = pan.build_pan_network(
                    inputs=feature_to_cropped, binary_map=gtboxes_binary_map)
                binary_map_resize = binary_map_resize / 2
                add_heatmap(saliency_mask, 'img/saliency_mask')
                add_heatmap(binary_map_resize, 'img/binary_body_map_resize')
                pan_loss = pan_loss * cfgs.PAN_LOSS_WEIGHT

            else:
                saliency_mask = pan.build_pan_network(
                    inputs=feature_to_cropped)

            if cfgs.PAN_MULTIPLY:
                feature_to_cropped = feature_to_cropped * saliency_mask
            add_heatmap(feature_to_cropped, 'img/feature_map_after_pan')

        # 2. build rpn
        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):

            rpn_conv3x3 = slim.conv2d(feature_to_cropped,
                                      512, [3, 3],
                                      trainable=self.is_training,
                                      weights_initializer=cfgs.INITIALIZER,
                                      activation_fn=tf.nn.relu,
                                      scope='rpn_conv/3x3')
            rpn_cls_score = slim.conv2d(rpn_conv3x3,
                                        self.num_anchors_per_location * 2,
                                        [1, 1],
                                        stride=1,
                                        trainable=self.is_training,
                                        weights_initializer=cfgs.INITIALIZER,
                                        activation_fn=None,
                                        scope='rpn_cls_score')
            rpn_box_pred = slim.conv2d(
                rpn_conv3x3,
                self.num_anchors_per_location * 4, [1, 1],
                stride=1,
                trainable=self.is_training,
                weights_initializer=cfgs.BBOX_INITIALIZER,
                activation_fn=None,
                scope='rpn_bbox_pred')
            rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob')

        # 3. generate_anchors
        featuremap_height, featuremap_width = tf.shape(
            feature_to_cropped)[1], tf.shape(feature_to_cropped)[2]
        featuremap_height = tf.cast(featuremap_height, tf.float32)
        featuremap_width = tf.cast(featuremap_width, tf.float32)

        anchors = anchor_utils.make_anchors(
            base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0],
            anchor_scales=cfgs.ANCHOR_SCALES,
            anchor_ratios=cfgs.ANCHOR_RATIOS,
            featuremap_height=featuremap_height,
            featuremap_width=featuremap_width,
            stride=cfgs.ANCHOR_STRIDE,
            name="make_anchors_forRPN")

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_RPN'):
            rois, roi_scores = postprocess_rpn_proposals(
                rpn_bbox_pred=rpn_box_pred,
                rpn_cls_prob=rpn_cls_prob,
                img_shape=img_shape,
                anchors=anchors,
                is_training=self.is_training)
            # rois shape [-1, 4]
            # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++

            if self.is_training:
                rois_in_img = show_box_in_tensor.draw_boxes_with_categories(
                    img_batch=input_img_batch, boxes=rois, scores=roi_scores)
                tf.summary.image('all_rpn_rois', rois_in_img)

                score_gre_05 = tf.reshape(
                    tf.where(tf.greater_equal(roi_scores, 0.5)), [-1])
                score_gre_05_rois = tf.gather(rois, score_gre_05)
                score_gre_05_score = tf.gather(roi_scores, score_gre_05)
                score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_categories(
                    img_batch=input_img_batch,
                    boxes=score_gre_05_rois,
                    scores=score_gre_05_score)
                tf.summary.image('score_greater_05_rois', score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                rpn_labels, rpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_h_batch, img_shape, anchors],
                        [tf.float32, tf.float32])
                rpn_bbox_targets = tf.reshape(rpn_bbox_targets, [-1, 4])
                rpn_labels = tf.to_int32(rpn_labels, name="to_int32")
                rpn_labels = tf.reshape(rpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch, anchors, rpn_labels)

            # --------------------------------------add smry-----------------------------------------------------------

            rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)),
                                    [-1])
            rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(rpn_cls_category,
                             tf.to_int64(tf.gather(rpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/rpn_accuracy', acc)

            with tf.control_dependencies([rpn_labels]):
                with tf.variable_scope('sample_RCNN_minibatch'):
                    rois, labels, bbox_targets_h, bbox_targets_r = \
                    tf.py_func(proposal_target_layer,
                               [rois, gtboxes_h_batch, gtboxes_r_batch],
                               [tf.float32, tf.float32, tf.float32, tf.float32])

                    rois = tf.reshape(rois, [-1, 4])
                    labels = tf.to_int32(labels)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets_h = tf.reshape(bbox_targets_h,
                                                [-1, 4 * (cfgs.CLASS_NUM + 1)])
                    bbox_targets_r = tf.reshape(
                        bbox_targets_r,
                        [-1, cfgs.RBB_LEN * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry(input_img_batch, rois, labels)

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)
        bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r = self.build_fastrcnn(
            feature_to_cropped=feature_to_cropped,
            rois=rois,
            img_shape=img_shape)
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        cls_prob_h = slim.softmax(cls_score_h, 'cls_prob_h')
        cls_prob_r = slim.softmax(cls_score_r, 'cls_prob_r')

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            cls_category_h = tf.argmax(cls_prob_h, axis=1)
            fast_acc_h = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category_h, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc_h', fast_acc_h)

            cls_category_r = tf.argmax(cls_prob_r, axis=1)
            fast_acc_r = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category_r, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc_r', fast_acc_r)

        #  6. postprocess_fastrcnn
        if not self.is_training:
            final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h(
                rois=rois,
                bbox_ppred=bbox_pred_h,
                scores=cls_prob_h,
                img_shape=img_shape)
            final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r(
                rois=rois,
                bbox_ppred=bbox_pred_r,
                scores=cls_prob_r,
                img_shape=img_shape)
            return final_boxes_h, final_scores_h, final_category_h, final_boxes_r, final_scores_r, final_category_r
        else:
            '''
            when trian. We need build Loss
            '''
            loss_dict = self.build_loss(rpn_box_pred=rpn_box_pred,
                                        rpn_bbox_targets=rpn_bbox_targets,
                                        rpn_cls_score=rpn_cls_score,
                                        rpn_labels=rpn_labels,
                                        bbox_pred_h=bbox_pred_h,
                                        bbox_targets_h=bbox_targets_h,
                                        cls_score_h=cls_score_h,
                                        bbox_pred_r=bbox_pred_r,
                                        bbox_targets_r=bbox_targets_r,
                                        cls_score_r=cls_score_r,
                                        labels=labels)
            if cfgs.USE_PAN:
                loss_dict['pan_loss'] = pan_loss

            final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h(
                rois=rois,
                bbox_ppred=bbox_pred_h,
                scores=cls_prob_h,
                img_shape=img_shape)
            final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r(
                rois=rois,
                bbox_ppred=bbox_pred_r,
                scores=cls_prob_r,
                img_shape=img_shape)

            return final_boxes_h, final_scores_h, final_category_h, \
                   final_boxes_r, final_scores_r, final_category_r, loss_dict