コード例 #1
0
    def build_whole_detection_network(self, input_img_batch, gtboxes_batch):

        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        P_list = self.build_base_network(
            input_img_batch)  # [P2, P3, P4, P5, P6]

        # 2. build rpn
        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):

            fpn_cls_score = []
            fpn_box_pred = []
            for level_name, p in zip(cfgs.LEVLES, P_list):
                if cfgs.SHARE_HEADS:
                    reuse_flag = None if level_name == cfgs.LEVLES[0] else True
                    scope_list = [
                        'rpn_conv/3x3', 'rpn_cls_score', 'rpn_bbox_pred'
                    ]
                else:
                    reuse_flag = None
                    scope_list = [
                        'rpn_conv/3x3_%s' % level_name,
                        'rpn_cls_score_%s' % level_name,
                        'rpn_bbox_pred_%s' % level_name
                    ]
                rpn_conv3x3 = slim.conv2d(p,
                                          512, [3, 3],
                                          trainable=self.is_training,
                                          weights_initializer=cfgs.INITIALIZER,
                                          padding="SAME",
                                          activation_fn=tf.nn.relu,
                                          scope=scope_list[0],
                                          reuse=reuse_flag)
                rpn_cls_score = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location * 2, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.INITIALIZER,
                    activation_fn=None,
                    padding="VALID",
                    scope=scope_list[1],
                    reuse=reuse_flag)
                rpn_box_pred = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location * 4, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.BBOX_INITIALIZER,
                    activation_fn=None,
                    padding="VALID",
                    scope=scope_list[2],
                    reuse=reuse_flag)
                rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
                rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])

                fpn_cls_score.append(rpn_cls_score)
                fpn_box_pred.append(rpn_box_pred)

            fpn_cls_score = tf.concat(fpn_cls_score,
                                      axis=0,
                                      name='fpn_cls_score')
            fpn_box_pred = tf.concat(fpn_box_pred, axis=0, name='fpn_box_pred')
            fpn_cls_prob = slim.softmax(fpn_cls_score, scope='fpn_cls_prob')

        # 3. generate_anchors
        all_anchors = []
        for i in range(len(cfgs.LEVLES)):
            level_name, p = cfgs.LEVLES[i], P_list[i]

            p_h, p_w = tf.shape(p)[1], tf.shape(p)[2]
            featuremap_height = tf.cast(p_h, tf.float32)
            featuremap_width = tf.cast(p_w, tf.float32)
            anchors = anchor_utils.make_anchors(
                base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[i],
                anchor_scales=cfgs.ANCHOR_SCALES,
                anchor_ratios=cfgs.ANCHOR_RATIOS,
                featuremap_height=featuremap_height,
                featuremap_width=featuremap_width,
                stride=cfgs.ANCHOR_STRIDE_LIST[i],
                name="make_anchors_for%s" % level_name)
            all_anchors.append(anchors)
        all_anchors = tf.concat(all_anchors, axis=0, name='all_anchors_of_FPN')

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_FPN'):
            rois, roi_scores = postprocess_rpn_proposals(
                rpn_bbox_pred=fpn_box_pred,
                rpn_cls_prob=fpn_cls_prob,
                img_shape=img_shape,
                anchors=all_anchors,
                is_training=self.is_training)
            # rois shape [-1, 4]
            # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++

            if self.is_training:
                score_gre_05 = tf.reshape(
                    tf.where(tf.greater_equal(roi_scores, 0.5)), [-1])
                score_gre_05_rois = tf.gather(rois, score_gre_05)
                score_gre_05_score = tf.gather(roi_scores, score_gre_05)
                score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores(
                    img_batch=input_img_batch,
                    boxes=score_gre_05_rois,
                    scores=score_gre_05_score)
                tf.summary.image('score_greater_05_rois', score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                fpn_labels, fpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_batch, img_shape, all_anchors],
                        [tf.float32, tf.float32])
                fpn_bbox_targets = tf.reshape(fpn_bbox_targets, [-1, 4])
                fpn_labels = tf.to_int32(fpn_labels, name="to_int32")
                fpn_labels = tf.reshape(fpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch, all_anchors,
                                         fpn_labels)

            # --------------------------------------add smry-----------------------------------------------------------

            fpn_cls_category = tf.argmax(fpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(fpn_labels, -1)),
                                    [-1])
            fpn_cls_category = tf.gather(fpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(fpn_cls_category,
                             tf.to_int64(tf.gather(fpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/fpn_accuracy', acc)

            with tf.control_dependencies([fpn_labels]):
                with tf.variable_scope('sample_RCNN_minibatch'):
                    rois, labels, bbox_targets = \
                    tf.py_func(proposal_target_layer,
                               [rois, gtboxes_batch],
                               [tf.float32, tf.float32, tf.float32])
                    rois = tf.reshape(rois, [-1, 4])
                    labels = tf.to_int32(labels)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets = tf.reshape(bbox_targets,
                                              [-1, 4 * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry(input_img_batch, rois, labels)
        if self.is_training:
            rois_list, labels, bbox_targets = self.assign_levels(
                all_rois=rois, labels=labels, bbox_targets=bbox_targets)
        else:
            rois_list = self.assign_levels(
                all_rois=rois
            )  # rois_list: [P2_rois, P3_rois, P4_rois, P5_rois]

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)
        bbox_pred, cls_score = self.build_fastrcnn(P_list=P_list,
                                                   rois_list=rois_list,
                                                   img_shape=img_shape)
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        cls_prob = slim.softmax(cls_score, 'cls_prob')

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            cls_category = tf.argmax(cls_prob, axis=1)
            fast_acc = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc', fast_acc)

        rois = tf.concat(rois_list, axis=0, name='concat_rois')
        #  6. postprocess_fastrcnn
        if not self.is_training:
            return self.postprocess_fastrcnn(rois=rois,
                                             bbox_ppred=bbox_pred,
                                             scores=cls_prob,
                                             img_shape=img_shape)
        else:
            '''
            when trian. We need build Loss
            '''

            #GIOU loss需要先解码
            fpn_pred = encode_and_decode.decode_boxes(
                encoded_boxes=fpn_box_pred,
                reference_boxes=all_anchors,
                scale_factors=cfgs.ROI_SCALE_FACTORS)

            loss_dict = self.build_loss(rpn_box_pred=fpn_pred,
                                        rpn_bbox_targets=all_anchors,
                                        rpn_cls_score=fpn_cls_score,
                                        rpn_labels=fpn_labels,
                                        bbox_pred=bbox_pred,
                                        bbox_targets=bbox_targets,
                                        cls_score=cls_score,
                                        labels=labels)

            final_bbox, final_scores, final_category = self.postprocess_fastrcnn(
                rois=rois,
                bbox_ppred=bbox_pred,
                scores=cls_prob,
                img_shape=img_shape)
            return final_bbox, final_scores, final_category, loss_dict
コード例 #2
0
    def fpn(self, img_batch, gtboxes_batch):
        """
        construct fpn network
        :param input_img_batch:
        :param gtboxes_batch:
        :return:
        """
        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

        img_shape = tf.shape(img_batch)

        # step 1 build base network
        # get Pyramid feature list
        P_list = self.build_base_network(inputs_batch=img_batch)  #[P2, P3, P4, P5, P6]

        # step 2 build fpn
        fpn_cls_score, fpn_box_pred = self.build_rpn_network(P_list)
        fpn_cls_prob = slim.softmax(fpn_cls_score, scope='fpn_cls_prob')

        # step 3 generate anchor
        all_anchors = []
        for i in range(len(cfgs.LEVLES)):
            level_name, p = cfgs.LEVLES[i], P_list[i]
            # feature shape
            p_height, p_width = tf.shape(p)[1], tf.shape(p)[2]
            feature_height = tf.cast(p_height, dtype=tf.float32)
            feature_width = tf.cast(p_width, dtype=tf.float32)

            anchors = anchor_utils.make_anchors(base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[i],
                                                anchor_scales=cfgs.ANCHOR_SCALES,
                                                anchor_ratios=cfgs.ANCHOR_RATIOS,
                                                feature_height=feature_height,
                                                feature_width=feature_width,
                                                stride=cfgs.ANCHOR_STRIDE_LIST[i],
                                                name="make_anchors_for%s" % level_name)
            all_anchors.append(anchors)
        all_anchors = tf.concat(all_anchors, axis=0, name='all_anchors_of_FPN')

        # step 4 postprocess rpn proposals. such as: decode, clip and NMS
        with tf.variable_scope('postprocess_FPN'):
            rois, roi_scores = self.postprocess_rpn_proposals(rpn_bbox_pred=fpn_box_pred,
                                                         rpn_cls_prob=fpn_cls_prob,
                                                         img_shape=img_shape,
                                                         anchors=all_anchors,
                                                         is_training=self.is_training)
            if self.is_training:
                rois_in_img = show_box_in_tensor.draw_boxes_with_scores(img_batch=img_batch,
                                                                        boxes=rois,
                                                                        scores=roi_scores)
                tf.summary.image('all_rpn_rois', rois_in_img)

                score_gre_05 = tf.reshape(tf.where(tf.greater_equal(roi_scores, 0.5)), [-1])
                score_gre_05_rois = tf.gather(rois, score_gre_05)
                score_gre_05_score = tf.gather(roi_scores, score_gre_05)
                score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores(img_batch=img_batch,
                                                                                boxes=score_gre_05_rois,
                                                                                scores=score_gre_05_score)
                tf.summary.image('score_greater_05_rois', score_gre_05_in_img)
        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                fpn_labels, fpn_bbox_targets =tf.py_func(anchor_target_layer,
                                                         [gtboxes_batch, img_shape, all_anchors],
                                                         [tf.float32, tf.float32])
                fpn_bbox_targets = tf.reshape(fpn_bbox_targets, [-1, 4])
                fpn_labels = tf.to_int32(fpn_labels, name='to_int32')
                fpn_labels = tf.reshape(fpn_labels, [-1])
                self.add_anchor_img_smry(img_batch, all_anchors, fpn_labels)

            #------------------------------------------add summary-----------------------------------------------------
            fpn_cls_category = tf.argmax(fpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(fpn_labels, -1)), [-1])
            fpn_cls_category = tf.gather(fpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(tf.to_float(tf.equal(fpn_cls_category,
                                                      tf.to_int64(tf.gather(fpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/fpn_accuracy', acc)

            with tf.control_dependencies([fpn_labels]):
                with tf.variable_scope('sample_RCNN_minibatch'):
                    rois, labels, bbox_targets = tf.py_func(proposal_target_layer,
                                                            [rois, gtboxes_batch],
                                                            [tf.float32, tf.float32, tf.float32])
                    rois = tf.reshape(rois, [-1, 4])
                    labels = tf.to_int32(labels)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets = tf.reshape(bbox_targets, [-1, 4 * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry(img_batch, rois, labels)


        if self.is_training:
            rois_list, labels, bbox_targets = self.assign_levels(all_rois=rois,
                                                                 labels=labels,
                                                                 bbox_targets=bbox_targets)
        else:
            rois_list = self.assign_levels(all_rois=rois)  # rois_list: [P2_rois, P3_rois, P4_rois, P5_rois]

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)
        bbox_pred, cls_score = self.build_fastrcnn(P_list=P_list, rois_list=rois_list,
                                                   img_shape=img_shape)
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        cls_prob = slim.softmax(cls_score, 'cls_prob')


        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            cls_category = tf.argmax(cls_prob, axis=1)
            fast_acc = tf.reduce_mean(tf.to_float(tf.equal(cls_category, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc', fast_acc)

        rois = tf.concat(rois_list, axis=0, name='concat_rois')
        #  6. postprocess_fastrcnn
        if not self.is_training:
            return self.postprocess_fastrcnn(rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, img_shape=img_shape)
        else:
            '''
            when train. We need build Loss
            '''
            self.loss_dict = self.build_loss(rpn_box_pred=fpn_box_pred,
                                        rpn_bbox_targets=fpn_bbox_targets,
                                        rpn_cls_score=fpn_cls_score,
                                        rpn_labels=fpn_labels,
                                        bbox_pred=bbox_pred,
                                        bbox_targets=bbox_targets,
                                        cls_score=cls_score,
                                        labels=labels)

            final_bbox, final_scores, final_category = self.postprocess_fastrcnn(rois=rois,
                                                                                 bbox_ppred=bbox_pred,
                                                                                 scores=cls_prob,
                                                                                 img_shape=img_shape)
            return final_bbox, final_scores, final_category
コード例 #3
0
    def build_whole_detection_network(self, input_img_batch, gtboxes_batch,
                                      gtboxes_r_batch, gpu_id):

        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

            gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6])
            gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        mask_list = []
        if cfgs.USE_SUPERVISED_MASK:
            P_list, mask_list = self.build_base_network(
                input_img_batch)  # [P2, P3, P4, P5, P6], [mask_p2, mask_p3]
        else:
            P_list = self.build_base_network(
                input_img_batch)  # [P2, P3, P4, P5, P6]

        # 2. build rpn
        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):

            fpn_cls_score = []
            fpn_box_pred = []
            for level_name, p in zip(cfgs.LEVLES, P_list):
                if cfgs.SHARE_HEADS:
                    reuse_flag = None if level_name == cfgs.LEVLES[0] else True
                    scope_list = [
                        'rpn_conv/3x3', 'rpn_cls_score', 'rpn_bbox_pred'
                    ]
                else:
                    reuse_flag = None
                    scope_list = [
                        'rpn_conv/3x3_%s' % level_name,
                        'rpn_cls_score_%s' % level_name,
                        'rpn_bbox_pred_%s' % level_name
                    ]
                rpn_conv3x3 = slim.conv2d(p,
                                          512, [3, 3],
                                          trainable=self.is_training,
                                          weights_initializer=cfgs.INITIALIZER,
                                          padding="SAME",
                                          activation_fn=tf.nn.relu,
                                          scope=scope_list[0],
                                          reuse=reuse_flag)
                rpn_cls_score = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location * 2, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.INITIALIZER,
                    activation_fn=None,
                    padding="VALID",
                    scope=scope_list[1],
                    reuse=reuse_flag)
                rpn_box_pred = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location * 4, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.BBOX_INITIALIZER,
                    activation_fn=None,
                    padding="VALID",
                    scope=scope_list[2],
                    reuse=reuse_flag)
                rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
                rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])

                fpn_cls_score.append(rpn_cls_score)
                fpn_box_pred.append(rpn_box_pred)

            fpn_cls_score = tf.concat(fpn_cls_score,
                                      axis=0,
                                      name='fpn_cls_score')
            fpn_box_pred = tf.concat(fpn_box_pred, axis=0, name='fpn_box_pred')
            fpn_cls_prob = slim.softmax(fpn_cls_score, scope='fpn_cls_prob')

        # 3. generate_anchors
        all_anchors = []
        mask_gt_list = []
        for i in range(len(cfgs.LEVLES)):
            level_name, p = cfgs.LEVLES[i], P_list[i]

            p_h, p_w = tf.shape(p)[1], tf.shape(p)[2]
            if cfgs.USE_SUPERVISED_MASK and i < len(
                    mask_list) and self.is_training:
                if cfgs.MASK_TYPE.strip() == 'h':
                    mask = tf.py_func(
                        mask_utils.make_gt_mask,
                        [p_h, p_w, img_shape[1], img_shape[2], gtboxes_batch],
                        Tout=tf.int32)
                elif cfgs.MASK_TYPE.strip() == 'r':
                    mask = tf.py_func(mask_utils.make_r_gt_mask, [
                        p_h, p_w, img_shape[1], img_shape[2], gtboxes_r_batch
                    ],
                                      Tout=tf.int32)
                if cfgs.BINARY_MASK:
                    mask = tf.where(tf.greater(mask, 0), tf.ones_like(mask),
                                    tf.zeros_like(mask))
                mask_gt_list.append(mask)
                mask_utils.vis_mask_tfsmry(mask, name="MASK/%s" % level_name)

            featuremap_height = tf.cast(p_h, tf.float32)
            featuremap_width = tf.cast(p_w, tf.float32)
            anchors = anchor_utils.make_anchors(
                base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[i],
                anchor_scales=cfgs.ANCHOR_SCALES,
                anchor_ratios=cfgs.ANCHOR_RATIOS,
                featuremap_height=featuremap_height,
                featuremap_width=featuremap_width,
                stride=cfgs.ANCHOR_STRIDE_LIST[i],
                name="make_anchors_for%s" % level_name)
            all_anchors.append(anchors)
        all_anchors = tf.concat(all_anchors, axis=0, name='all_anchors_of_FPN')

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_FPN'):
            rois, roi_scores = postprocess_rpn_proposals(
                rpn_bbox_pred=fpn_box_pred,
                rpn_cls_prob=fpn_cls_prob,
                img_shape=img_shape,
                anchors=all_anchors,
                is_training=self.is_training)

        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                fpn_labels, fpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_batch, img_shape, all_anchors],
                        [tf.float32, tf.float32])
                fpn_bbox_targets = tf.reshape(fpn_bbox_targets, [-1, 4])
                fpn_labels = tf.to_int32(fpn_labels, name="to_int32")
                fpn_labels = tf.reshape(fpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch,
                                         all_anchors,
                                         fpn_labels,
                                         method=0)

            # --------------------------------------add smry-----------------------------------------------------------

            fpn_cls_category = tf.argmax(fpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(fpn_labels, -1)),
                                    [-1])
            fpn_cls_category = tf.gather(fpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(fpn_cls_category,
                             tf.to_int64(tf.gather(fpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/fpn_accuracy', acc)

            with tf.control_dependencies([fpn_labels]):
                with tf.variable_scope('sample_RCNN_minibatch'):
                    rois, labels, bbox_targets = \
                        tf.py_func(proposal_target_layer,
                                   [rois, gtboxes_batch, gtboxes_r_batch],
                                   [tf.float32, tf.float32, tf.float32])
                    rois = tf.reshape(rois, [-1, 4])
                    labels = tf.to_int32(labels)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets = tf.reshape(bbox_targets,
                                              [-1, 5 * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry(input_img_batch,
                                                rois,
                                                labels,
                                                method=0)

        if not cfgs.USE_CONCAT:
            if self.is_training:
                rois_list, labels, bbox_targets = self.assign_levels(
                    all_rois=rois, labels=labels, bbox_targets=bbox_targets)
            else:
                rois_list = self.assign_levels(
                    all_rois=rois
                )  # rois_list: [P2_rois, P3_rois, P4_rois, P5_rois]

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        if not cfgs.USE_CONCAT:
            bbox_pred, cls_score = self.build_fastrcnn(P_list=P_list,
                                                       rois_list=rois_list,
                                                       img_shape=img_shape)
            rois = tf.concat(rois_list, axis=0, name='concat_rois')
        else:
            bbox_pred, cls_score = self.build_concat_fastrcnn(
                P_list=P_list, all_rois=rois, img_shape=img_shape)

        cls_prob = slim.softmax(cls_score, 'cls_prob')

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            cls_category = tf.argmax(cls_prob, axis=1)
            fast_acc = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc', fast_acc)

        #  6. postprocess_fastrcnn
        if self.is_training:
            self.build_loss(
                rpn_box_pred=fpn_box_pred,
                rpn_bbox_targets=fpn_bbox_targets,
                rpn_cls_score=fpn_cls_score,
                rpn_labels=fpn_labels,
                bbox_pred=bbox_pred,
                bbox_targets=bbox_targets,
                cls_score=cls_score,
                labels=labels,
                mask_list=mask_list if cfgs.USE_SUPERVISED_MASK else None,
                mask_gt_list=mask_gt_list
                if cfgs.USE_SUPERVISED_MASK else None)

        final_bbox, final_scores, final_category = self.postprocess_fastrcnn(
            rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, gpu_id=gpu_id)
        if self.is_training:
            return final_bbox, final_scores, final_category, self.loss_dict
        else:
            return final_bbox, final_scores, final_category
コード例 #4
0
    def build_whole_detection_network(self, input_img_batch, gtboxes_batch):

        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        P_list = self.build_base_network(input_img_batch)

        # 2. build rpn
        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):

            fpn_cls_score = []
            fpn_box_pred = []
            for level_name, p in zip(cfgs.LEVLES, P_list):
                if cfgs.SHARE_HEADS:
                    reuse_flag = None if level_name == cfgs.LEVLES[0] else True
                    scope_list = [
                        'rpn_conv/3x3', 'rpn_cls_score', 'rpn_bbox_pred'
                    ]
                else:
                    reuse_flag = None
                    scope_list = [
                        'rpn_conv/3x3_%s' % level_name,
                        'rpn_cls_score_%s' % level_name,
                        'rpn_bbox_pred_%s' % level_name
                    ]
                rpn_conv3x3 = slim.conv2d(p,
                                          512, [3, 3],
                                          trainable=self.is_training,
                                          weights_initializer=cfgs.INITIALIZER,
                                          padding="SAME",
                                          activation_fn=tf.nn.relu,
                                          scope=scope_list[0],
                                          reuse=reuse_flag)
                rpn_cls_score = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location * 2, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.INITIALIZER,
                    activation_fn=None,
                    padding="VALID",
                    scope=scope_list[1],
                    reuse=reuse_flag)
                rpn_box_pred = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location * 4, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.BBOX_INITIALIZER,
                    activation_fn=None,
                    padding="VALID",
                    scope=scope_list[2],
                    reuse=reuse_flag)
                rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
                rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])

                fpn_cls_score.append(rpn_cls_score)
                fpn_box_pred.append(rpn_box_pred)

            fpn_cls_score = tf.concat(fpn_cls_score,
                                      axis=0,
                                      name='fpn_cls_score')
            fpn_box_pred = tf.concat(fpn_box_pred, axis=0, name='fpn_box_pred')
            fpn_cls_prob = slim.softmax(fpn_cls_score, scope='fpn_cls_prob')

        # 3. generate_anchors
        all_anchors = []
        for i in range(len(cfgs.LEVLES)):
            level_name, p = cfgs.LEVLES[i], P_list[i]

            p_h, p_w = tf.shape(p)[1], tf.shape(p)[2]

            featuremap_height = tf.cast(p_h, tf.float32)
            featuremap_width = tf.cast(p_w, tf.float32)
            anchors = anchor_utils.make_anchors(
                base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[i],
                anchor_scales=cfgs.ANCHOR_SCALES,
                anchor_ratios=cfgs.ANCHOR_RATIOS,
                featuremap_height=featuremap_height,
                featuremap_width=featuremap_width,
                stride=cfgs.ANCHOR_STRIDE_LIST[i],
                name="make_anchors_for%s" % level_name)
            all_anchors.append(anchors)
        all_anchors = tf.concat(all_anchors, axis=0, name='all_anchors_of_FPN')

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_FPN'):
            rois, roi_scores = postprocess_rpn_proposals(
                rpn_bbox_pred=fpn_box_pred,
                rpn_cls_prob=fpn_cls_prob,
                img_shape=img_shape,
                anchors=all_anchors,
                is_training=self.is_training)
            # rois shape [-1, 4]
            # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++

            if self.is_training:
                score_gre_05 = tf.reshape(
                    tf.where(tf.greater_equal(roi_scores, 0.5)), [-1])
                score_gre_05_rois = tf.gather(rois, score_gre_05)
                score_gre_05_score = tf.gather(roi_scores, score_gre_05)
                score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores(
                    img_batch=input_img_batch,
                    boxes=score_gre_05_rois,
                    scores=score_gre_05_score)
                tf.summary.image('score_greater_05_rois', score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                fpn_labels, fpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_batch, img_shape, all_anchors],
                        [tf.float32, tf.float32])
                fpn_bbox_targets = tf.reshape(fpn_bbox_targets, [-1, 4])
                fpn_labels = tf.to_int32(fpn_labels, name="to_int32")
                fpn_labels = tf.reshape(fpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch, all_anchors,
                                         fpn_labels)

            # --------------------------------------add smry-----------------------------------------------------------

            fpn_cls_category = tf.argmax(fpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(fpn_labels, -1)),
                                    [-1])
            fpn_cls_category = tf.gather(fpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(fpn_cls_category,
                             tf.to_int64(tf.gather(fpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/fpn_accuracy', acc)

        # cascade rcnn
        total_loss_dict = {}
        cascade_bbox_pred = []
        cascade_cls_prob = []
        cascade_rois = []
        fg_thresholds = [0.5, 0.6, 0.7]
        for i in range(len(fg_thresholds)):
            if self.is_training:
                rois, bbox_pred, cls_prob, loss_dict = self.cascade_rcnn(
                    rois,
                    gtboxes_batch,
                    input_img_batch,
                    P_list,
                    img_shape,
                    fg_thresholds[i],
                    fpn_box_pred,
                    fpn_bbox_targets,
                    fpn_cls_score,
                    fpn_labels,
                    stage=i + 1)

                for k in loss_dict.keys():
                    if k not in total_loss_dict.keys():
                        total_loss_dict[k] = loss_dict[k]
                    else:
                        total_loss_dict[k] += loss_dict[k]
            else:
                rois, bbox_pred, cls_prob = self.cascade_rcnn(rois,
                                                              gtboxes_batch,
                                                              input_img_batch,
                                                              P_list,
                                                              img_shape,
                                                              fg_thresholds[i],
                                                              fpn_box_pred,
                                                              None,
                                                              fpn_cls_score,
                                                              None,
                                                              stage=i + 1)
            cascade_bbox_pred.append(bbox_pred)
            cascade_cls_prob.append(cls_prob)
            cascade_rois.append(rois)

        final_bbox, final_scores, final_category = self.postprocess_fastrcnn(
            rois=cascade_rois[-1],
            bbox_ppred=cascade_bbox_pred[-1],
            scores=cascade_cls_prob[-1],
            img_shape=img_shape)

        if self.is_training:
            return final_bbox, final_scores, final_category, total_loss_dict
        else:
            return final_bbox, final_scores, final_category
コード例 #5
0
    def build_whole_detection_network(self, input_img_batch, gtboxes_h_batch,
                                      gtboxes_r_batch):

        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6])
            gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32)
            gtboxes_batch = tf.reshape(gtboxes_h_batch, [-1, 5])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        feature_to_cropped = self.build_base_network(input_img_batch)

        # 2. build rpn
        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):

            rpn_conv3x3 = slim.conv2d(feature_to_cropped,
                                      512, [3, 3],
                                      trainable=self.is_training,
                                      weights_initializer=cfgs.INITIALIZER,
                                      activation_fn=tf.nn.relu,
                                      scope='rpn_conv/3x3')
            rpn_cls_score = slim.conv2d(rpn_conv3x3,
                                        self.num_anchors_per_location * 2,
                                        [1, 1],
                                        stride=1,
                                        trainable=self.is_training,
                                        weights_initializer=cfgs.INITIALIZER,
                                        activation_fn=None,
                                        scope='rpn_cls_score')
            rpn_box_pred = slim.conv2d(
                rpn_conv3x3,
                self.num_anchors_per_location * 4, [1, 1],
                stride=1,
                trainable=self.is_training,
                weights_initializer=cfgs.BBOX_INITIALIZER,
                activation_fn=None,
                scope='rpn_bbox_pred')
            rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob')

        # 3. generate_anchors
        featuremap_height, featuremap_width = tf.shape(
            feature_to_cropped)[1], tf.shape(feature_to_cropped)[2]
        featuremap_height = tf.cast(featuremap_height, tf.float32)
        featuremap_width = tf.cast(featuremap_width, tf.float32)

        anchors = anchor_utils.make_anchors(
            base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0],
            anchor_scales=cfgs.ANCHOR_SCALES,
            anchor_ratios=cfgs.ANCHOR_RATIOS,
            featuremap_height=featuremap_height,
            featuremap_width=featuremap_width,
            stride=cfgs.ANCHOR_STRIDE,
            name="make_anchors_forRPN")

        # with tf.variable_scope('make_anchors'):
        #     anchors = anchor_utils.make_anchors(height=featuremap_height,
        #                                         width=featuremap_width,
        #                                         feat_stride=cfgs.ANCHOR_STRIDE[0],
        #                                         anchor_scales=cfgs.ANCHOR_SCALES,
        #                                         anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16
        #                                         )

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_RPN'):
            # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2])
            # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob')
            # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rois, roi_scores = postprocess_rpn_proposals(
                rpn_bbox_pred=rpn_box_pred,
                rpn_cls_prob=rpn_cls_prob,
                img_shape=img_shape,
                anchors=anchors,
                is_training=self.is_training)
            # rois shape [-1, 4]
            # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++

            if self.is_training:
                rois_in_img = show_box_in_tensor.draw_boxes_with_scores(
                    img_batch=input_img_batch, boxes=rois, scores=roi_scores)
                tf.summary.image('all_rpn_rois', rois_in_img)

                score_gre_05 = tf.reshape(
                    tf.where(tf.greater_equal(roi_scores, 0.5)), [-1])
                score_gre_05_rois = tf.gather(rois, score_gre_05)
                score_gre_05_score = tf.gather(roi_scores, score_gre_05)
                score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores(
                    img_batch=input_img_batch,
                    boxes=score_gre_05_rois,
                    scores=score_gre_05_score)
                tf.summary.image('score_greater_05_rois', score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                rpn_labels, rpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_batch, img_shape, anchors],
                        [tf.float32, tf.float32])
                rpn_bbox_targets = tf.reshape(rpn_bbox_targets, [-1, 4])
                rpn_labels = tf.to_int32(rpn_labels, name="to_int32")
                rpn_labels = tf.reshape(rpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch, anchors, rpn_labels)

            # --------------------------------------add smry----------------------------------------------------------------

            rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)),
                                    [-1])
            rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(rpn_cls_category,
                             tf.to_int64(tf.gather(rpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/rpn_accuracy', acc)

            with tf.control_dependencies([rpn_labels]):
                with tf.variable_scope('sample_RCNN_minibatch_stage1'):
                    stage1_rois, stage1_labels,  stage1_bbox_targets = \
                    tf.py_func(proposal_target_layer_3,
                               [rois, rois, gtboxes_batch, gtboxes_r_batch, cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD[0]],
                               [tf.float32, tf.float32, tf.float32])
                    stage1_rois = tf.reshape(stage1_rois, [-1, 4])
                    stage1_labels = tf.to_int32(stage1_labels)
                    stage1_labels = tf.reshape(stage1_labels, [-1])
                    stage1_bbox_targets = tf.reshape(
                        stage1_bbox_targets, [-1, 5 * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry(input_img_batch, stage1_rois,
                                                stage1_labels, 'stage1')

                    #stage1_bbox_targets_h = boxes_utils.get_horizen_minAreaRectangle(stage1_bbox_targets, False)
        else:
            stage1_rois = rois

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN-before1                                                 #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN-before1
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)

        stage1_bbox_pred_fliter, stage1_bbox_pred, stage1_cls_score = self.build_fastrcnn(
            feature_to_cropped=feature_to_cropped,
            rois=stage1_rois,
            img_shape=img_shape,
            scope='stage1')
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        stage1_cls_prob = slim.softmax(stage1_cls_score, 'stage1_cls_prob')
        stage1_cls_category = tf.argmax(stage1_cls_prob, axis=1)
        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            stage1_fast_acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(stage1_cls_category, tf.to_int64(stage1_labels))))
            tf.summary.scalar('ACC/stage1_fast_acc', stage1_fast_acc)

        #  postprocess_fastrcnn_before1
        # return x,y,w,h,theta

        stage1_bbox = self.postprocess_cascade(
            rois=stage1_rois,
            bbox_ppred=stage1_bbox_pred_fliter,
            scope='stage1',
            five=False)

        #stage1_bbox_h = boxes_utils.get_horizen_minAreaRectangle(stage1_bbox, with_label=False)
        if self.is_training:

            overlaps = iou_rotate.iou_rotate_calculate(stage1_bbox,
                                                       gtboxes_r_batch[:, :-1],
                                                       use_gpu=True,
                                                       gpu_id=0)

        if self.is_training:
            with tf.control_dependencies([stage1_bbox]):
                with tf.variable_scope('sample_RCNN_minibatch_stage2'):
                    stage2_rois, stage2_labels, stage2_bbox_targets = \
                    tf.py_func(proposal_target_layer_r,
                               [stage1_bbox,gtboxes_r_batch, overlaps, cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD[1]],
                               [tf.float32,  tf.float32,tf.float32])
                    stage2_rois = tf.reshape(stage2_rois, [-1, 5])  # 斜
                    stage2_labels = tf.to_int32(stage2_labels)
                    stage2_labels = tf.reshape(stage2_labels, [-1])
                    stage2_bbox_targets = tf.reshape(
                        stage2_bbox_targets, [-1, 5 * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry_rotate(input_img_batch,
                                                       stage2_rois,
                                                       stage2_labels, 'stage2')
        else:
            stage2_rois = stage1_bbox

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN-before2                                                 #
        # -------------------------------------------------------------------------------------------------------------#

        # 6. build Fast-RCNN-before2
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)
        # stage2_rois = tf.stop_gradient(stage2_rois)
        stage2_rois_h = boxes_utils.get_horizen_minAreaRectangle(
            stage2_rois, with_label=False)  ##斜变正
        stage2_bbox_pred_fliter, stage2_bbox_pred, stage2_cls_score = self.build_fastrcnn(
            feature_to_cropped=feature_to_cropped,
            rois=stage2_rois_h,
            img_shape=img_shape,
            scope='stage2')
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        stage2_cls_prob = slim.softmax(stage2_cls_score, 'stage2_cls_prob')
        stage2_cls_category = tf.argmax(stage2_cls_prob, axis=1)
        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            stage2_fast_acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(stage2_cls_category, tf.to_int64(stage2_labels))))
            tf.summary.scalar('ACC/stage2_fast_acc', stage2_fast_acc)

        #  postprocess_fastrcnn_before2
        stage2_bbox = self.postprocess_cascade(
            rois=stage2_rois,
            bbox_ppred=stage2_bbox_pred_fliter,
            scope='stage2')
        #stage2_bbox_h = boxes_utils.get_horizen_minAreaRectangle(stage2_bbox, with_label=False)
        if self.is_training:
            overlaps = iou_rotate.iou_rotate_calculate(stage2_bbox,
                                                       gtboxes_r_batch[:, :-1],
                                                       use_gpu=True,
                                                       gpu_id=0)

        if self.is_training:
            with tf.control_dependencies([stage2_bbox]):
                with tf.variable_scope('sample_RCNN_minibatch_stage3'):
                    stage3_rois, stage3_labels,  stage3_bbox_targets = \
                    tf.py_func(proposal_target_layer_r,
                               [stage2_bbox,  gtboxes_r_batch, overlaps, cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD[2]],
                               [tf.float32,  tf.float32, tf.float32])
                    stage3_rois = tf.reshape(stage3_rois, [-1, 5])
                    stage3_labels = tf.to_int32(stage3_labels)
                    stage3_labels = tf.reshape(stage3_labels, [-1])
                    stage3_bbox_targets = tf.reshape(
                        stage3_bbox_targets, [-1, 5 * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry_rotate(input_img_batch,
                                                       stage3_rois,
                                                       stage3_labels, 'stage3')
        else:
            stage3_rois = stage2_bbox

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 7. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)
        # stage3_rois = tf.stop_gradient(stage3_rois)
        stage3_rois_h = boxes_utils.get_horizen_minAreaRectangle(
            stage3_rois, with_label=False)  ##斜变正
        stage3_bbox_pred, stage3_cls_score = self.build_fastrcnn(
            feature_to_cropped=feature_to_cropped,
            rois=stage3_rois_h,
            img_shape=img_shape,
            scope='stage3')
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        stage3_cls_prob = slim.softmax(stage3_cls_score, 'stage3_cls_prob')
        stage3_cls_category = tf.argmax(stage3_cls_prob, axis=1)

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            fast_acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(stage3_cls_category, tf.to_int64(stage3_labels))))
            tf.summary.scalar('ACC/fast_acc', fast_acc)

        #  postprocess_fastrcnn
        if not self.is_training:
            with slim.arg_scope([
                    slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose,
                    slim.separable_conv2d, slim.fully_connected
            ],
                                reuse=True):
                _, _, final_scores_stage2 = self.build_fastrcnn(
                    feature_to_cropped=feature_to_cropped,
                    rois=stage3_rois_h,
                    img_shape=img_shape,
                    scope='stage2')
                final_scores_stage2 = slim.softmax(final_scores_stage2,
                                                   'final_scores_stage2')

                _, _, final_scores_stage1 = self.build_fastrcnn(
                    feature_to_cropped=feature_to_cropped,
                    rois=stage3_rois_h,
                    img_shape=img_shape,
                    scope='stage1')
                final_scores_stage1 = slim.softmax(final_scores_stage1,
                                                   'final_scores_stage1')
                # choose which stage to export
                cls_prob = tf.add(final_scores_stage2, final_scores_stage1)
                cls_prob = tf.add(cls_prob, stage3_cls_prob) / 3
                return self.postprocess_fastrcnn_r(rois=stage3_rois,
                                                   bbox_ppred=stage3_bbox_pred,
                                                   scores=cls_prob,
                                                   img_shape=img_shape,
                                                   scope='stage3')
        else:
            '''
            when trian. We need build Loss
            '''
            loss_dict = self.build_loss(
                rpn_box_pred=rpn_box_pred,
                rpn_bbox_targets=rpn_bbox_targets,
                rpn_cls_score=rpn_cls_score,
                rpn_labels=rpn_labels,
                bbox_pred=stage3_bbox_pred,
                bbox_targets=stage3_bbox_targets,
                stage2_bbox_pred=stage2_bbox_pred,
                stage2_bbox_targets=stage2_bbox_targets,
                stage1_bbox_pred=stage1_bbox_pred,
                stage1_bbox_targets=stage1_bbox_targets,
                cls_score=stage3_cls_score,
                labels=stage3_labels,
                stage2_cls_score=stage2_cls_score,
                stage2_labels=stage2_labels,
                stage1_cls_score=stage1_cls_score,
                stage1_labels=stage1_labels)
            final_bbox, final_scores, final_category = self.postprocess_fastrcnn_r(
                rois=stage3_rois,
                bbox_ppred=stage3_bbox_pred,
                scores=stage3_cls_prob,
                img_shape=img_shape,
                scope='stage3')
            return final_bbox, final_scores, final_category, loss_dict
コード例 #6
0
    def faster_rcnn(self, input_img_batch, gtboxes_batch):

        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)
        # step 1 build base network
        feature_cropped = self.build_base_network(input_img_batch)
        # step 2 build rpn
        rpn_box_pred, rpn_cls_score = self.build_rpn_network(feature_cropped)
        rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob')
        # step 3 make anchor
        feature_height = tf.cast(tf.shape(feature_cropped)[1],
                                 dtype=tf.float32)
        feature_width = tf.cast(tf.shape(feature_cropped)[2], dtype=tf.float32)
        # step make anchor
        # reference anchor coordinate
        # (img_height*img_width*mum_anchor, 4)
        #++++++++++++++++++++++++++++++++++++generate anchors+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        anchors = make_anchors(base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0],
                               anchor_scales=cfgs.ANCHOR_SCALES,
                               anchor_ratios=cfgs.ANCHOR_RATIOS,
                               feature_height=feature_height,
                               feature_width=feature_width,
                               stride=cfgs.ANCHOR_STRIDE,
                               name='make_anchors_forRPN')
        # step 4 postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_RPN'):
            rois, roi_scores = self.postprocess_rpn_proposals(
                rpn_bbox_pred=rpn_box_pred,
                rpn_cls_prob=rpn_cls_prob,
                img_shape=img_shape,
                anchors=anchors,
                is_training=self.is_training)
            # +++++++++++++++++++++++++++++++++++++add img summary++++++++++++++++++++++++++++++++++++++++++++++++++++
            if self.is_training:
                rois_in_img = show_box_in_tensor.draw_boxes_with_scores(
                    img_batch=input_img_batch, boxes=rois, scores=roi_scores)
                tf.summary.image('all_rpn_rois', rois_in_img)

                score_gre_05 = tf.reshape(
                    tf.where(tf.greater_equal(roi_scores, 0.5)), [-1])
                score_gre_05_rois = tf.gather(rois, score_gre_05)
                score_gre_05_score = tf.gather(roi_scores, score_gre_05)
                score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores(
                    img_batch=input_img_batch,
                    boxes=score_gre_05_rois,
                    scores=score_gre_05_score)
                tf.summary.image('score_greater_05_rois', score_gre_05_in_img)
        #++++++++++++++++++++++++++++++++++++++++get rpn_lablel and rpn_bbox_target++++++++++++++++++++++++++++++++++++
        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                rpn_labels, rpn_box_targets = tf.py_func(
                    anchor_target_layer, [gtboxes_batch, img_shape, anchors],
                    [tf.float32, tf.float32])
                rpn_bbox_targets = tf.reshape(rpn_box_targets, shape=(-1, 4))

                rpn_labels = tf.cast(rpn_labels,
                                     dtype=tf.int32,
                                     name='to_int32')
                rpn_labels = tf.reshape(rpn_labels, shape=[-1])
                self.add_anchor_img_smry(input_img_batch, anchors, rpn_labels)

            #+++++++++++++++++++++++++++++++++++generate target boxes and labels++++++++++++++++++++++++++++++++++++++++
            rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1)
            # get positive and negative indices and ignore others where rpn label value equal to -1
            kept_rpn_indices = tf.reshape(tf.where(tf.not_equal(
                rpn_labels, -1)),
                                          shape=[-1])
            rpn_cls_category = tf.gather(rpn_cls_category,
                                         indices=kept_rpn_indices)
            rpn_cls_labels = tf.cast(tf.gather(rpn_labels,
                                               indices=kept_rpn_indices),
                                     dtype=tf.int64)
            # evaluate function
            acc = tf.reduce_mean(
                tf.cast(tf.equal(rpn_cls_category, rpn_cls_labels),
                        dtype=tf.float32))
            tf.summary.scalar('ACC/rpn_accuracy', acc)

            with tf.control_dependencies([rpn_labels]):
                with tf.variable_scope('sample_RCNN_minibatch'):
                    rois, labels, bbox_targets = tf.py_func(
                        proposal_target_layer, [rois, gtboxes_batch],
                        [tf.float32, tf.float32, tf.float32])
                    rois = tf.reshape(rois, [-1, 4])
                    labels = tf.cast(labels, dtype=tf.int32)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets = tf.reshape(bbox_targets,
                                              [-1, 4 * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry(input_img_batch, rois, labels)

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # step 5 build fast-RCNN
        bbox_pred, cls_score = self.build_fastrcnn(
            feature_crop=feature_cropped, rois=rois, img_shape=img_shape)

        cls_prob = slim.softmax(cls_score, 'cls_prob')

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            cls_category = tf.argmax(cls_prob, axis=1)
            fast_acc = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc', fast_acc)

        #  6. postprocess_fastrcnn
        if not self.is_training:
            return self.postprocess_fastrcnn(rois=rois,
                                             bbox_ppred=bbox_pred,
                                             scores=cls_prob,
                                             img_shape=img_shape)
        else:
            '''
            when trian. We need build Loss
            '''
            self.loss_dict = self.build_loss(rpn_box_pred=rpn_box_pred,
                                             rpn_bbox_targets=rpn_bbox_targets,
                                             rpn_cls_score=rpn_cls_score,
                                             rpn_labels=rpn_labels,
                                             bbox_pred=bbox_pred,
                                             bbox_targets=bbox_targets,
                                             cls_score=cls_score,
                                             labels=labels)

            final_bbox, final_scores, final_category = self.postprocess_fastrcnn(
                rois=rois,
                bbox_ppred=bbox_pred,
                scores=cls_prob,
                img_shape=img_shape)
            return final_bbox, final_scores, final_category
コード例 #7
0
    def build_whole_detection_network(self, input_img_batch, gtboxes_r_batch,
                                      gtboxes_h_batch):

        if self.is_training:
            # ensure shape is [M, 5] and [M, 6]
            gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6])
            gtboxes_h_batch = tf.reshape(gtboxes_h_batch, [-1, 5])
            gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32)
            gtboxes_h_batch = tf.cast(gtboxes_h_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        feature_to_cropped = self.build_base_network(input_img_batch)

        # 2. build rpn
        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):

            rpn_conv3x3 = slim.conv2d(feature_to_cropped,
                                      512, [3, 3],
                                      trainable=self.is_training,
                                      weights_initializer=cfgs.INITIALIZER,
                                      activation_fn=tf.nn.relu,
                                      scope='rpn_conv/3x3')
            rpn_cls_score = slim.conv2d(rpn_conv3x3,
                                        self.num_anchors_per_location * 2,
                                        [1, 1],
                                        stride=1,
                                        trainable=self.is_training,
                                        weights_initializer=cfgs.INITIALIZER,
                                        activation_fn=None,
                                        scope='rpn_cls_score')
            rpn_box_pred = slim.conv2d(
                rpn_conv3x3,
                self.num_anchors_per_location * 4, [1, 1],
                stride=1,
                trainable=self.is_training,
                weights_initializer=cfgs.BBOX_INITIALIZER,
                activation_fn=None,
                scope='rpn_bbox_pred')
            rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob')

        # 3. generate_anchors
        featuremap_height, featuremap_width = tf.shape(
            feature_to_cropped)[1], tf.shape(feature_to_cropped)[2]
        featuremap_height = tf.cast(featuremap_height, tf.float32)
        featuremap_width = tf.cast(featuremap_width, tf.float32)

        anchors = anchor_utils.make_anchors(
            base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0],
            anchor_scales=cfgs.ANCHOR_SCALES,
            anchor_ratios=cfgs.ANCHOR_RATIOS,
            featuremap_height=featuremap_height,
            featuremap_width=featuremap_width,
            stride=cfgs.ANCHOR_STRIDE,
            name="make_anchors_forRPN")

        # with tf.variable_scope('make_anchors'):
        #     anchors = anchor_utils.make_anchors(height=featuremap_height,
        #                                         width=featuremap_width,
        #                                         feat_stride=cfgs.ANCHOR_STRIDE[0],
        #                                         anchor_scales=cfgs.ANCHOR_SCALES,
        #                                         anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16
        #                                         )

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_RPN'):
            # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2])
            # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob')
            # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rois, roi_scores = postprocess_rpn_proposals(
                rpn_bbox_pred=rpn_box_pred,
                rpn_cls_prob=rpn_cls_prob,
                img_shape=img_shape,
                anchors=anchors,
                is_training=self.is_training)
            # rois shape [-1, 4]
            # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++

            if self.is_training:
                rois_in_img = show_box_in_tensor.draw_boxes_with_categories(
                    img_batch=input_img_batch, boxes=rois, scores=roi_scores)
                tf.summary.image('all_rpn_rois', rois_in_img)

                score_gre_05 = tf.reshape(
                    tf.where(tf.greater_equal(roi_scores, 0.5)), [-1])
                score_gre_05_rois = tf.gather(rois, score_gre_05)
                score_gre_05_score = tf.gather(roi_scores, score_gre_05)
                score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_categories(
                    img_batch=input_img_batch,
                    boxes=score_gre_05_rois,
                    scores=score_gre_05_score)
                tf.summary.image('score_greater_05_rois', score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                rpn_labels, rpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_h_batch, img_shape, anchors],
                        [tf.float32, tf.float32])
                rpn_bbox_targets = tf.reshape(rpn_bbox_targets, [-1, 4])
                rpn_labels = tf.to_int32(rpn_labels, name="to_int32")
                rpn_labels = tf.reshape(rpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch, anchors, rpn_labels)

            # --------------------------------------add smry-----------------------------------------------------------

            rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)),
                                    [-1])
            rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(rpn_cls_category,
                             tf.to_int64(tf.gather(rpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/rpn_accuracy', acc)

            with tf.control_dependencies([rpn_labels]):
                with tf.variable_scope('sample_RCNN_minibatch'):
                    rois, labels, bbox_targets_h, bbox_targets_r = \
                    tf.py_func(proposal_target_layer,
                               [rois, gtboxes_h_batch, gtboxes_r_batch],
                               [tf.float32, tf.float32, tf.float32, tf.float32])

                    rois = tf.reshape(rois, [-1, 4])
                    labels = tf.to_int32(labels)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets_h = tf.reshape(bbox_targets_h,
                                                [-1, 4 * (cfgs.CLASS_NUM + 1)])
                    bbox_targets_r = tf.reshape(bbox_targets_r,
                                                [-1, 5 * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry(input_img_batch, rois, labels)

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)
        bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r = self.build_fastrcnn(
            feature_to_cropped=feature_to_cropped,
            rois=rois,
            img_shape=img_shape)
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        cls_prob_h = slim.softmax(cls_score_h, 'cls_prob_h')
        cls_prob_r = slim.softmax(cls_score_r, 'cls_prob_r')

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            cls_category_h = tf.argmax(cls_prob_h, axis=1)
            fast_acc_h = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category_h, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc_h', fast_acc_h)

            cls_category_r = tf.argmax(cls_prob_r, axis=1)
            fast_acc_r = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category_r, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc_r', fast_acc_r)

        #  6. postprocess_fastrcnn
        if not self.is_training:
            final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h(
                rois=rois,
                bbox_ppred=bbox_pred_h,
                scores=cls_prob_h,
                img_shape=img_shape)
            final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r(
                rois=rois,
                bbox_ppred=bbox_pred_r,
                scores=cls_prob_r,
                img_shape=img_shape)
            return final_boxes_h, final_scores_h, final_category_h, final_boxes_r, final_scores_r, final_category_r
        else:
            '''
            when trian. We need build Loss
            '''
            loss_dict = self.build_loss(rpn_box_pred=rpn_box_pred,
                                        rpn_bbox_targets=rpn_bbox_targets,
                                        rpn_cls_score=rpn_cls_score,
                                        rpn_labels=rpn_labels,
                                        bbox_pred_h=bbox_pred_h,
                                        bbox_targets_h=bbox_targets_h,
                                        cls_score_h=cls_score_h,
                                        bbox_pred_r=bbox_pred_r,
                                        bbox_targets_r=bbox_targets_r,
                                        cls_score_r=cls_score_r,
                                        labels=labels)

            final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h(
                rois=rois,
                bbox_ppred=bbox_pred_h,
                scores=cls_prob_h,
                img_shape=img_shape)
            final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r(
                rois=rois,
                bbox_ppred=bbox_pred_r,
                scores=cls_prob_r,
                img_shape=img_shape)

            return final_boxes_h, final_scores_h, final_category_h, \
                   final_boxes_r, final_scores_r, final_category_r, loss_dict
コード例 #8
0
ファイル: build_whole_network.py プロジェクト: cugszu/yuncong
    def build_whole_detection_network(self, input_img_batch, gtboxes_batch):

        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        feature_stride8, feature_stride16 = self.build_base_network(
            input_img_batch)

        # feature_stride8 = tf.image.resize_bilinear(feature_stride8, [tf.shape(feature_stride8)[1] * 2,
        #                                                              tf.shape(feature_stride8)[2] * 2],
        #                                            name='upsampling_stride8')

        # 2. build rpn
        with tf.variable_scope('build_ssh',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):

            ssh_max_pool = slim.max_pool2d(inputs=feature_stride16,
                                           kernel_size=[2, 2],
                                           scope='ssh_max_pool')

            cls_score_m3, box_pred_m3 = self.detection_module(
                ssh_max_pool, self.m3_num_anchors_per_location,
                'detection_module_m3')
            box_pred_m3 = tf.reshape(box_pred_m3,
                                     [-1, 4 * (cfgs.CLASS_NUM + 1)])
            cls_score_m3 = tf.reshape(cls_score_m3, [-1, (cfgs.CLASS_NUM + 1)])
            cls_prob_m3 = slim.softmax(cls_score_m3, scope='cls_prob_m3')

            cls_score_m2, box_pred_m2 = self.detection_module(
                feature_stride16, self.m2_num_anchors_per_location,
                'detection_module_m2')
            box_pred_m2 = tf.reshape(box_pred_m2,
                                     [-1, 4 * (cfgs.CLASS_NUM + 1)])
            cls_score_m2 = tf.reshape(cls_score_m2, [-1, (cfgs.CLASS_NUM + 1)])
            cls_prob_m2 = slim.softmax(cls_score_m2, scope='cls_prob_m2')

            channels_16 = feature_stride16.get_shape().as_list()[-1]
            channels_8 = feature_stride8.get_shape().as_list()[-1]
            feature8_shape = tf.shape(feature_stride8)
            conv1x1_1 = slim.conv2d(inputs=feature_stride16,
                                    num_outputs=channels_16 // 4,
                                    kernel_size=[1, 1],
                                    trainable=self.is_training,
                                    weights_initializer=cfgs.INITIALIZER,
                                    activation_fn=tf.nn.relu,
                                    scope='conv1x1_1')
            upsampling = tf.image.resize_bilinear(
                conv1x1_1, [feature8_shape[1], feature8_shape[2]],
                name='upsampling')

            conv1x1_2 = slim.conv2d(inputs=feature_stride8,
                                    num_outputs=channels_8 // 2,
                                    kernel_size=[1, 1],
                                    trainable=self.is_training,
                                    weights_initializer=cfgs.INITIALIZER,
                                    activation_fn=tf.nn.relu,
                                    scope='conv1x1_2')

            eltwise_sum = upsampling + conv1x1_2

            conv3x3 = slim.conv2d(inputs=eltwise_sum,
                                  num_outputs=channels_8 // 2,
                                  kernel_size=[3, 3],
                                  trainable=self.is_training,
                                  weights_initializer=cfgs.INITIALIZER,
                                  activation_fn=tf.nn.relu,
                                  scope='conv3x3')

            cls_score_m1, box_pred_m1 = self.detection_module(
                conv3x3, self.m1_num_anchors_per_location,
                'detection_module_m1')
            box_pred_m1 = tf.reshape(box_pred_m1,
                                     [-1, 4 * (cfgs.CLASS_NUM + 1)])
            cls_score_m1 = tf.reshape(cls_score_m1, [-1, (cfgs.CLASS_NUM + 1)])
            cls_prob_m1 = slim.softmax(cls_score_m1, scope='cls_prob_m1')

        # 3. generate_anchors
        featuremap_height_m1, featuremap_width_m1 = tf.shape(feature_stride8)[1], \
                                                    tf.shape(feature_stride8)[2]
        featuremap_height_m1 = tf.cast(featuremap_height_m1, tf.float32)
        featuremap_width_m1 = tf.cast(featuremap_width_m1, tf.float32)

        anchors_m1 = anchor_utils.make_anchors(
            base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0],
            anchor_scales=cfgs.M1_ANCHOR_SCALES,
            anchor_ratios=cfgs.ANCHOR_RATIOS,
            featuremap_height=featuremap_height_m1,
            featuremap_width=featuremap_width_m1,
            stride=[cfgs.ANCHOR_STRIDE[0]],
            name="make_anchors_for_m1")

        featuremap_height_m2, featuremap_width_m2 = tf.shape(feature_stride16)[1], \
                                                    tf.shape(feature_stride16)[2]
        featuremap_height_m2 = tf.cast(featuremap_height_m2, tf.float32)
        featuremap_width_m2 = tf.cast(featuremap_width_m1, tf.float32)

        anchors_m2 = anchor_utils.make_anchors(
            base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0],
            anchor_scales=cfgs.M2_ANCHOR_SCALES,
            anchor_ratios=cfgs.ANCHOR_RATIOS,
            featuremap_height=featuremap_height_m2,
            featuremap_width=featuremap_width_m2,
            stride=[cfgs.ANCHOR_STRIDE[1]],
            name="make_anchors_for_m2")

        featuremap_height_m3, featuremap_width_m3 = tf.shape(ssh_max_pool)[1], \
                                                    tf.shape(ssh_max_pool)[2]
        featuremap_height_m3 = tf.cast(featuremap_height_m3, tf.float32)
        featuremap_width_m3 = tf.cast(featuremap_width_m3, tf.float32)

        anchors_m3 = anchor_utils.make_anchors(
            base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0],
            anchor_scales=cfgs.M3_ANCHOR_SCALES,
            anchor_ratios=cfgs.ANCHOR_RATIOS,
            featuremap_height=featuremap_height_m3,
            featuremap_width=featuremap_width_m3,
            stride=[cfgs.ANCHOR_STRIDE[2]],
            name="make_anchors_for_m3")
        # refer to paper: Seeing Small Faces from Robust Anchor’s Perspective
        if cfgs.EXTRA_SHIFTED_ANCHOR:
            shift_anchors_m1 = anchor_utils.shift_anchor(
                anchors_m1, cfgs.ANCHOR_STRIDE[0])
            shift_anchors_m2 = anchor_utils.shift_anchor(
                anchors_m2, cfgs.ANCHOR_STRIDE[1])
            shift_anchors_m3 = anchor_utils.shift_anchor(
                anchors_m3, cfgs.ANCHOR_STRIDE[2])
        else:
            shift_anchors_m1, shift_anchors_m2, shift_anchors_m3 = [], [], []

        if cfgs.FACE_SHIFT_JITTER:
            jitter_anchors_m1 = anchor_utils.shift_jitter(
                anchors_m1, cfgs.ANCHOR_STRIDE[0])
            jitter_anchors_m2 = anchor_utils.shift_jitter(
                anchors_m2, cfgs.ANCHOR_STRIDE[1])
            jitter_anchors_m3 = anchor_utils.shift_jitter(
                anchors_m3, cfgs.ANCHOR_STRIDE[2])
        else:
            jitter_anchors_m1, jitter_anchors_m2, jitter_anchors_m3 = [], [], []

        anchors_m1 = [anchors_m1] + shift_anchors_m1 + jitter_anchors_m1
        anchors_m1 = tf.reshape(tf.stack(anchors_m1, axis=1), [-1, 4])

        anchors_m2 = [anchors_m2] + shift_anchors_m2 + jitter_anchors_m2
        anchors_m2 = tf.reshape(tf.stack(anchors_m2, axis=1), [-1, 4])

        anchors_m3 = [anchors_m3] + shift_anchors_m3 + jitter_anchors_m3
        anchors_m3 = tf.reshape(tf.stack(anchors_m3, axis=1), [-1, 4])

        if self.is_training:
            with tf.variable_scope('sample_ssh_minibatch_m1'):
                rois_m1, labels_m1, bbox_targets_m1, keep_inds_m1 = \
                    tf.py_func(proposal_target_layer,
                               [anchors_m1, gtboxes_batch, 'M1'],
                               [tf.float32, tf.float32, tf.float32, tf.int32])
                rois_m1 = tf.reshape(rois_m1, [-1, 4])
                labels_m1 = tf.to_int32(labels_m1)
                labels_m1 = tf.reshape(labels_m1, [-1])
                bbox_targets_m1 = tf.reshape(bbox_targets_m1,
                                             [-1, 4 * (cfgs.CLASS_NUM + 1)])
                self.add_roi_batch_img_smry(input_img_batch, rois_m1,
                                            labels_m1, 'm1')

            with tf.variable_scope('sample_ssh_minibatch_m2'):
                rois_m2, labels_m2, bbox_targets_m2, keep_inds_m2 = \
                    tf.py_func(proposal_target_layer,
                               [anchors_m2, gtboxes_batch, 'M2'],
                               [tf.float32, tf.float32, tf.float32, tf.int32])
                rois_m2 = tf.reshape(rois_m2, [-1, 4])
                labels_m2 = tf.to_int32(labels_m2)
                labels_m2 = tf.reshape(labels_m2, [-1])
                bbox_targets_m2 = tf.reshape(bbox_targets_m2,
                                             [-1, 4 * (cfgs.CLASS_NUM + 1)])
                self.add_roi_batch_img_smry(input_img_batch, rois_m2,
                                            labels_m2, 'm2')

            with tf.variable_scope('sample_ssh_minibatch_m3'):
                rois_m3, labels_m3, bbox_targets_m3, keep_inds_m3 = \
                    tf.py_func(proposal_target_layer,
                               [anchors_m3, gtboxes_batch, 'M3'],
                               [tf.float32, tf.float32, tf.float32, tf.int32])
                rois_m3 = tf.reshape(rois_m3, [-1, 4])
                labels_m3 = tf.to_int32(labels_m3)
                labels_m3 = tf.reshape(labels_m3, [-1])
                bbox_targets_m3 = tf.reshape(bbox_targets_m3,
                                             [-1, 4 * (cfgs.CLASS_NUM + 1)])
                self.add_roi_batch_img_smry(input_img_batch, rois_m3,
                                            labels_m3, 'm3')

        if not self.is_training:
            with tf.variable_scope('postprocess_ssh_m1'):
                final_bbox_m1, final_scores_m1, final_category_m1 = self.postprocess_ssh(
                    rois=anchors_m1,
                    bbox_ppred=box_pred_m1,
                    scores=cls_prob_m1,
                    img_shape=img_shape,
                    iou_threshold=cfgs.M1_NMS_IOU_THRESHOLD)

            with tf.variable_scope('postprocess_ssh_m2'):
                final_bbox_m2, final_scores_m2, final_category_m2 = self.postprocess_ssh(
                    rois=anchors_m2,
                    bbox_ppred=box_pred_m2,
                    scores=cls_prob_m2,
                    img_shape=img_shape,
                    iou_threshold=cfgs.M2_NMS_IOU_THRESHOLD)

            with tf.variable_scope('postprocess_ssh_m3'):
                final_bbox_m3, final_scores_m3, final_category_m3 = self.postprocess_ssh(
                    rois=anchors_m3,
                    bbox_ppred=box_pred_m3,
                    scores=cls_prob_m3,
                    img_shape=img_shape,
                    iou_threshold=cfgs.M3_NMS_IOU_THRESHOLD)

            result_dict = {
                'final_bbox_m1': final_bbox_m1,
                'final_scores_m1': final_scores_m1,
                'final_category_m1': final_category_m1,
                'final_bbox_m2': final_bbox_m2,
                'final_scores_m2': final_scores_m2,
                'final_category_m2': final_category_m2,
                'final_bbox_m3': final_bbox_m3,
                'final_scores_m3': final_scores_m3,
                'final_category_m3': final_category_m3
            }
            return result_dict

        else:
            with tf.variable_scope('ssh_loss_m1'):

                if not cfgs.M1_MINIBATCH_SIZE == -1:

                    box_pred_m1 = tf.gather(box_pred_m1, keep_inds_m1)
                    cls_score_m1 = tf.gather(cls_score_m1, keep_inds_m1)
                    cls_prob_m1 = tf.reshape(
                        tf.gather(cls_prob_m1, keep_inds_m1),
                        [-1, (cfgs.CLASS_NUM + 1)])

                    bbox_loss_m1 = losses.smooth_l1_loss_rcnn(
                        bbox_pred=box_pred_m1,
                        bbox_targets=bbox_targets_m1,
                        label=labels_m1,
                        num_classes=cfgs.CLASS_NUM + 1,
                        sigma=cfgs.M1_SIGMA)

                    cls_loss_m1 = tf.reduce_mean(
                        tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits=cls_score_m1, labels=labels_m1))

            with tf.variable_scope('postprocess_ssh_m1'):
                final_bbox_m1, final_scores_m1, final_category_m1 = self.postprocess_ssh(
                    rois=rois_m1,
                    bbox_ppred=box_pred_m1,
                    scores=cls_prob_m1,
                    img_shape=img_shape,
                    iou_threshold=cfgs.M2_NMS_IOU_THRESHOLD)

            with tf.variable_scope('ssh_loss_m2'):
                if not cfgs.M2_MINIBATCH_SIZE == -1:

                    box_pred_m2 = tf.gather(box_pred_m2, keep_inds_m2)
                    cls_score_m2 = tf.gather(cls_score_m2, keep_inds_m2)
                    cls_prob_m2 = tf.reshape(
                        tf.gather(cls_prob_m2, keep_inds_m2),
                        [-1, (cfgs.CLASS_NUM + 1)])

                    bbox_loss_m2 = losses.smooth_l1_loss_rcnn(
                        bbox_pred=box_pred_m2,
                        bbox_targets=bbox_targets_m2,
                        label=labels_m2,
                        num_classes=cfgs.CLASS_NUM + 1,
                        sigma=cfgs.M2_SIGMA)

                    cls_loss_m2 = tf.reduce_mean(
                        tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits=cls_score_m2, labels=labels_m2))

            with tf.variable_scope('postprocess_ssh_m2'):
                final_bbox_m2, final_scores_m2, final_category_m2 = self.postprocess_ssh(
                    rois=rois_m2,
                    bbox_ppred=box_pred_m2,
                    scores=cls_prob_m2,
                    img_shape=img_shape,
                    iou_threshold=cfgs.M2_NMS_IOU_THRESHOLD)

            with tf.variable_scope('ssh_loss_m3'):
                if not cfgs.M3_MINIBATCH_SIZE == -1:

                    box_pred_m3 = tf.gather(box_pred_m3, keep_inds_m3)
                    cls_score_m3 = tf.gather(cls_score_m3, keep_inds_m3)
                    cls_prob_m3 = tf.reshape(
                        tf.gather(cls_prob_m3, keep_inds_m3),
                        [-1, (cfgs.CLASS_NUM + 1)])

                    bbox_loss_m3 = losses.smooth_l1_loss_rcnn(
                        bbox_pred=box_pred_m3,
                        bbox_targets=bbox_targets_m3,
                        label=labels_m3,
                        num_classes=cfgs.CLASS_NUM + 1,
                        sigma=cfgs.M3_SIGMA)

                    cls_loss_m3 = tf.reduce_mean(
                        tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits=cls_score_m3, labels=labels_m3))

            with tf.variable_scope('postprocess_ssh_m3'):
                final_bbox_m3, final_scores_m3, final_category_m3 = self.postprocess_ssh(
                    rois=rois_m3,
                    bbox_ppred=box_pred_m3,
                    scores=cls_prob_m3,
                    img_shape=img_shape,
                    iou_threshold=cfgs.M3_NMS_IOU_THRESHOLD)

            result_dict = {
                'final_bbox_m1': final_bbox_m1,
                'final_scores_m1': final_scores_m1,
                'final_category_m1': final_category_m1,
                'final_bbox_m2': final_bbox_m2,
                'final_scores_m2': final_scores_m2,
                'final_category_m2': final_category_m2,
                'final_bbox_m3': final_bbox_m3,
                'final_scores_m3': final_scores_m3,
                'final_category_m3': final_category_m3
            }

            losses_dict = {
                'bbox_loss_m1': bbox_loss_m1,
                'cls_loss_m1': cls_loss_m1,
                'bbox_loss_m2': bbox_loss_m2,
                'cls_loss_m2': cls_loss_m2,
                'bbox_loss_m3': bbox_loss_m3,
                'cls_loss_m3': cls_loss_m3
            }

            return result_dict, losses_dict
コード例 #9
0
    def build_whole_detection_network(self, input_img_batch, gtboxes_batch):

        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        P_dict = self.build_base_network(input_img_batch)  # [P2, P3, P4, P5, P6]

        # 2. build fpn  by build rpn for each level
        with tf.variable_scope("build_FPN", regularizer=slim.l2_regularizer(cfgs.WEIGHT_DECAY)):
            fpn_box_delta = {}
            fpn_cls_score = {}
            fpn_cls_prob = {}
            for key in cfgs.LEVLES:
                if cfgs.SHARE_HEADS:
                    reuse_flag = None if key == cfgs.LEVLES[0] else True
                    scope_list = ['fpn_conv/3x3', 'fpn_cls_score', 'fpn_bbox_pred']
                else:
                    reuse_flag = None
                    scope_list= ['fpn_conv/3x3_%s' % key, 'fpn_cls_score_%s' % key, 'fpn_bbox_pred_%s' % key]
                rpn_conv3x3 = slim.conv2d(
                    P_dict[key], 512, [3, 3],
                    trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, padding="SAME",
                    activation_fn=tf.nn.relu,
                    scope=scope_list[0],
                    reuse=reuse_flag)
                rpn_cls_score = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location*2, [1, 1], stride=1,
                                            trainable=self.is_training, weights_initializer=cfgs.INITIALIZER,
                                            activation_fn=None, padding="VALID",
                                            scope=scope_list[1],
                                            reuse=reuse_flag)
                rpn_box_delta = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location*4, [1, 1], stride=1,
                                            trainable=self.is_training, weights_initializer=cfgs.BBOX_INITIALIZER,
                                            activation_fn=None, padding="VALID",
                                            scope=scope_list[2],
                                            reuse=reuse_flag)
                fpn_box_delta[key] = tf.reshape(rpn_box_delta, [-1, 4])
                fpn_cls_score[key] = tf.reshape(rpn_cls_score, [-1, 2])
                fpn_cls_prob[key] = slim.softmax(fpn_cls_score[key])

        # 3. generate anchors for fpn. (by generate for each level)
        anchors_dict = {}
        anchor_list = []
        with tf.name_scope("generate_FPN_anchors"):
            for key in cfgs.LEVLES:
                p_h, p_w = tf.to_float(tf.shape(P_dict[key])[1]), tf.to_float(tf.shape(P_dict[key])[2])
                id_ = int(key[-1]) - int(cfgs.LEVLES[0][-1]) # such as : 2-2, 3-3
                tmp_anchors = anchor_utils.make_anchors(base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[id_],
                                                        anchor_scales=cfgs.ANCHOR_SCALES,
                                                        anchor_ratios=cfgs.ANCHOR_RATIOS,
                                                        stride=cfgs.ANCHOR_STRIDE_LIST[id_],
                                                        featuremap_height=p_h,
                                                        featuremap_width=p_w,
                                                        name='%s_make_anchors' % key)
                anchors_dict[key] = tmp_anchors
                anchor_list.append(tmp_anchors)
        all_anchors = tf.concat(anchor_list, axis=0)

        # 4. postprocess fpn proposals. such as: decode, clip, NMS
        #    Need to Note: Here we NMS for each level instead of NMS for all anchors.
        rois_list = []
        rois_scores_list = []
        with tf.name_scope("postproces_fpn"):
            for key in cfgs.LEVLES:
                tmp_rois, tmp_roi_scores = postprocess_rpn_proposals(rpn_bbox_pred=fpn_box_delta[key],
                                                                     rpn_cls_prob=fpn_cls_prob[key],
                                                                     img_shape=img_shape,
                                                                     anchors=anchors_dict[key],
                                                                     is_training=self.is_training)
                rois_list.append(tmp_rois)
                rois_scores_list.append(tmp_roi_scores)
            allrois = tf.concat(rois_list, axis=0)
            allrois_scores = tf.concat(rois_scores_list, axis=0)
            fpn_topk = cfgs.FPN_TOP_K_PER_LEVEL_TRAIN if self.is_training else cfgs.FPN_TOP_K_PER_LEVEL_TEST
            topk = tf.minimum(fpn_topk, tf.shape(allrois)[0])

            rois_scores, topk_indices = tf.nn.top_k(allrois_scores, k=topk)

            rois = tf.stop_gradient(tf.gather(allrois, topk_indices))
            rois_scores = tf.stop_gradient(rois_scores)

            # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++
            if self.is_training:
                score_gre_05 = tf.reshape(tf.where(tf.greater_equal(rois_scores, 0.5)), [-1])
                score_gre_05_rois = tf.gather(rois, score_gre_05)
                score_gre_05_score = tf.gather(rois_scores, score_gre_05)
                score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores(img_batch=input_img_batch,
                                                                                boxes=score_gre_05_rois,
                                                                                scores=score_gre_05_score)
                tf.summary.image('score_greater_05_rois', score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        # sample for fpn. We should concat all the anchors
        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                fpn_labels, fpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_batch, img_shape, all_anchors],
                        [tf.float32, tf.float32])
                fpn_bbox_targets = tf.reshape(fpn_bbox_targets, [-1, 4])
                fpn_labels = tf.to_int32(fpn_labels, name="to_int32")
                fpn_labels = tf.reshape(fpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch, all_anchors, fpn_labels)

            with tf.control_dependencies([fpn_labels]):
                with tf.variable_scope('sample_RCNN_minibatch'):
                    rois, labels, bbox_targets = \
                    tf.py_func(proposal_target_layer,
                               [rois, gtboxes_batch],
                               [tf.float32, tf.float32, tf.float32])
                    rois = tf.reshape(rois, [-1, 4])
                    labels = tf.to_int32(labels)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets = tf.reshape(bbox_targets, [-1, 4*(cfgs.CLASS_NUM+1)])
                    self.add_roi_batch_img_smry(input_img_batch, rois, labels)
        if self.is_training:
            rois_list, labels, bbox_targets = self.assign_levels(all_rois=rois,
                                                                 labels=labels,
                                                                 bbox_targets=bbox_targets)
        else:
            rois_list = self.assign_levels(all_rois=rois)  # rois_list: [P2_rois, P3_rois, P4_rois, P5_rois]

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)
        bbox_pred, cls_score = self.build_fastrcnn(P_list=[P_dict[key] for key in cfgs.LEVLES],
                                                   rois_list=rois_list,
                                                   img_shape=img_shape)
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        cls_prob = slim.softmax(cls_score, 'cls_prob')


        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            cls_category = tf.argmax(cls_prob, axis=1)
            fast_acc = tf.reduce_mean(tf.to_float(tf.equal(cls_category, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc', fast_acc)

        rois = tf.concat(rois_list, axis=0, name='concat_rois')
        #  6. postprocess_fastrcnn
        if not self.is_training:
            return self.postprocess_fastrcnn(rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, img_shape=img_shape)
        else:
            '''
            when trian. We need build Loss
            '''
            loss_dict = self.build_loss(rpn_box_pred=tf.concat([fpn_box_delta[key] for key in cfgs.LEVLES], axis=0),
                                        rpn_bbox_targets=fpn_bbox_targets,
                                        rpn_cls_score=tf.concat([fpn_cls_score[key] for key in cfgs.LEVLES], axis=0),
                                        rpn_labels=fpn_labels,
                                        bbox_pred=bbox_pred,
                                        bbox_targets=bbox_targets,
                                        cls_score=cls_score,
                                        labels=labels)

            final_bbox, final_scores, final_category = self.postprocess_fastrcnn(rois=rois,
                                                                                 bbox_ppred=bbox_pred,
                                                                                 scores=cls_prob,
                                                                                 img_shape=img_shape)
            return final_bbox, final_scores, final_category, loss_dict
コード例 #10
0
    def build_whole_detection_network(self, input_img_batch, gtboxes_batch):

        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1.0 build base network
        rpn_base = self.build_base_network(input_img_batch)

        # 1.1 build the head_base network
        if self.base_network_name.startswith('resnet_v1'):
            rfcn_base = resnet.restnet_head(rpn_base,
                                            scope_name=self.base_network_name,
                                            is_training=self.is_training)

        elif self.base_network_name.startswith('MobilenetV2'):
            rfcn_base = mobilenet_v2.mobilenetv2_head(
                rpn_base, is_training=self.is_training)

        else:
            raise ValueError('Sorry, we only support resnet or mobilenet_v2')

        # 2. build rpn head
        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):

            rpn_conv3x3 = slim.conv2d(rpn_base,
                                      128, [3, 3],
                                      trainable=self.is_training,
                                      weights_initializer=cfgs.INITIALIZER,
                                      activation_fn=tf.nn.relu,
                                      scope='rpn_conv/3x3')
            rpn_cls_score = slim.conv2d(rpn_conv3x3,
                                        self.num_anchors_per_location * 2,
                                        [1, 1],
                                        stride=1,
                                        trainable=self.is_training,
                                        weights_initializer=cfgs.INITIALIZER,
                                        activation_fn=None,
                                        scope='rpn_cls_score')
            rpn_box_pred = slim.conv2d(
                rpn_conv3x3,
                self.num_anchors_per_location * 4, [1, 1],
                stride=1,
                trainable=self.is_training,
                weights_initializer=cfgs.BBOX_INITIALIZER,
                activation_fn=None,
                scope='rpn_bbox_pred')
            rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob')

        # 3. generate_anchors
        featuremap_height, featuremap_width = tf.shape(rpn_base)[1], tf.shape(
            rpn_base)[2]
        featuremap_height = tf.cast(featuremap_height, tf.float32)
        featuremap_width = tf.cast(featuremap_width, tf.float32)

        anchors = anchor_utils.make_anchors(
            base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0],
            anchor_scales=cfgs.ANCHOR_SCALES,
            anchor_ratios=cfgs.ANCHOR_RATIOS,
            featuremap_height=featuremap_height,
            featuremap_width=featuremap_width,
            stride=cfgs.ANCHOR_STRIDE,
            name="make_anchors_forRPN")

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_RPN'):
            rois, roi_scores = postprocess_rpn_proposals(
                rpn_bbox_pred=rpn_box_pred,
                rpn_cls_prob=rpn_cls_prob,
                img_shape=img_shape,
                anchors=anchors,
                is_training=self.is_training)
            # rois shape [-1, 4]
            # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++

            if self.is_training:
                rois_in_img = show_box_in_tensor.draw_boxes_with_scores(
                    img_batch=input_img_batch, boxes=rois, scores=roi_scores)
                tf.summary.image('all_rpn_rois', rois_in_img)

                score_gre_05 = tf.reshape(
                    tf.where(tf.greater_equal(roi_scores, 0.5)), [-1])
                score_gre_05_rois = tf.gather(rois, score_gre_05)
                score_gre_05_score = tf.gather(roi_scores, score_gre_05)
                score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores(
                    img_batch=input_img_batch,
                    boxes=score_gre_05_rois,
                    scores=score_gre_05_score)
                tf.summary.image('score_greater_05_rois', score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                rpn_labels, rpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_batch, img_shape, anchors],
                        [tf.float32, tf.float32])
                rpn_bbox_targets = tf.reshape(rpn_bbox_targets, [-1, 4])
                rpn_labels = tf.to_int32(rpn_labels, name="to_int32")
                rpn_labels = tf.reshape(rpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch, anchors, rpn_labels)

            # --------------------------------------add smry----------------------------------------------------------

            rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)),
                                    [-1])
            rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(rpn_cls_category,
                             tf.to_int64(tf.gather(rpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/rpn_accuracy', acc)

            with tf.control_dependencies([rpn_labels]):
                with tf.variable_scope('sample_RFCN_minibatch'):
                    rois, labels, bbox_targets = \
                    tf.py_func(proposal_target_layer,
                               [rois, gtboxes_batch],
                               [tf.float32, tf.float32, tf.float32])
                    rois = tf.reshape(rois, [-1, 4])
                    labels = tf.to_int32(labels)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets = tf.reshape(bbox_targets,
                                              [-1, 4 * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry(input_img_batch, rois, labels)

        # -------------------------------------------------------------------------------------------------------------#
        #                                                          RFCN                                                #
        # -------------------------------------------------------------------------------------------------------------#
        # 5. build rfcn head
        bbox_pred, cls_score = self.build_rfcn_head(
            rfcn_base=rfcn_base,
            rois=rois,
            img_shape=img_shape,
            bin_nums=[3, 3],
            crop_size=[9, 9])  # crop_size is the total size
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        cls_prob = slim.softmax(cls_score, 'cls_prob')

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            cls_category = tf.argmax(cls_prob, axis=1)
            rfcn_acc = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category, tf.to_int64(labels))))
            tf.summary.scalar('ACC/rfcn_acc', rfcn_acc)

        #  6. postprocess_rfcn
        if not self.is_training:
            return self.postprocess_rfcn(rois=rois,
                                         bbox_ppred=bbox_pred,
                                         scores=cls_prob,
                                         img_shape=img_shape)
        else:
            '''
            when trian. We need build Loss
            '''
            loss_dict = self.build_loss(rpn_box_pred=rpn_box_pred,
                                        rpn_bbox_targets=rpn_bbox_targets,
                                        rpn_cls_score=rpn_cls_score,
                                        rpn_labels=rpn_labels,
                                        bbox_pred=bbox_pred,
                                        bbox_targets=bbox_targets,
                                        cls_score=cls_score,
                                        labels=labels)

            final_bbox, final_scores, final_category = self.postprocess_rfcn(
                rois=rois,
                bbox_ppred=bbox_pred,
                scores=cls_prob,
                img_shape=img_shape)
            return final_bbox, final_scores, final_category, loss_dict
コード例 #11
0
    def build_whole_detection_network(self, input_img_batch, gtboxes_r_batch,
                                      gtboxes_h_batch):

        if self.is_training:
            # ensure shape is [M, 5] and [M, 6]
            gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6])
            gtboxes_h_batch = tf.reshape(gtboxes_h_batch, [-1, 5])
            gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32)
            gtboxes_h_batch = tf.cast(gtboxes_h_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        C2_, C4 = self.build_base_network(input_img_batch)

        C2 = slim.conv2d(C2_,
                         num_outputs=1024,
                         kernel_size=[1, 1],
                         stride=1,
                         scope='build_C2_to_1024')

        self.feature_pyramid = {'C2': C2, 'C4': C4}

        # 2. build rpn

        rpn_all_encode_boxes = {}
        rpn_all_boxes_scores = {}
        rpn_all_cls_score = {}
        anchors = {}

        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):
            i = 0
            for level in self.level:
                rpn_conv3x3 = slim.conv2d(
                    self.feature_pyramid[level],
                    512, [3, 3],
                    trainable=self.is_training,
                    weights_initializer=cfgs.INITIALIZER,
                    activation_fn=tf.nn.relu,
                    scope='rpn_conv/3x3_{}'.format(level))
                rpn_cls_score = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location[i] * 2, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.INITIALIZER,
                    activation_fn=None,
                    scope='rpn_cls_score_{}'.format(level))
                rpn_box_pred = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location[i] * 4, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.BBOX_INITIALIZER,
                    activation_fn=None,
                    scope='rpn_bbox_pred_{}'.format(level))
                rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
                rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
                rpn_cls_prob = slim.softmax(
                    rpn_cls_score,
                    scope='rpn_cls_prob_{}'.format(level))  # do the softmax

                rpn_all_cls_score[level] = rpn_cls_score
                rpn_all_boxes_scores[level] = rpn_cls_prob  # do the softmax
                rpn_all_encode_boxes[level] = rpn_box_pred
                i += 1

        # 3. generate_anchors
        i = 0
        for level, base_anchor_size, stride in zip(self.level,
                                                   self.base_anchor_size_list,
                                                   self.stride):
            featuremap_height, featuremap_width = tf.shape(
                self.feature_pyramid[level])[1], tf.shape(
                    self.feature_pyramid[level])[2]

            featuremap_height = tf.cast(featuremap_height, tf.float32)
            featuremap_width = tf.cast(featuremap_width, tf.float32)

            #anchor_scale = tf.constant(self.anchor_scales[i], dtype=tf.float32)
            #)anchor_ratio = tf.constant(self.anchor_ratios[i], dtype=tf.float32)
            anchor_scale = self.anchor_scales[i]
            anchor_ratio = self.anchor_ratios[i]

            tmp_anchors = anchor_utils.make_anchors(
                base_anchor_size=base_anchor_size,
                anchor_scales=anchor_scale,
                anchor_ratios=anchor_ratio,
                featuremap_height=featuremap_height,
                featuremap_width=featuremap_width,
                stride=stride,
                name="make_anchors_forRPN_{}".format(level))
            tmp_anchors = tf.reshape(tmp_anchors, [-1, 4])
            anchors[level] = tmp_anchors
            i += 1

        # with tf.variable_scope('make_anchors'):
        #     anchors = anchor_utils.make_anchors(height=featuremap_height,
        #                                         width=featuremap_width,
        #                                         feat_stride=cfgs.ANCHOR_STRIDE[0],
        #                                         anchor_scales=cfgs.ANCHOR_SCALES,
        #                                         anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16
        #                                         )

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        rois = {}
        roi_scores = {}
        with tf.variable_scope('postprocess_RPN'):
            # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2])
            # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob')
            # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            for level in self.level:
                rois_rpn, roi_scores_rpn = postprocess_rpn_proposals(
                    rpn_bbox_pred=rpn_all_encode_boxes[level],
                    rpn_cls_prob=rpn_all_boxes_scores[level],
                    img_shape=img_shape,
                    anchors=anchors[level],
                    is_training=self.is_training)
                # rois[level] = rois
                # roi_scores[level] = roi_scores
                # rois shape [-1, 4]
                # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++
                rois[level] = rois_rpn
                roi_scores[level] = roi_scores_rpn

                if self.is_training:
                    rois_in_img = show_box_in_tensor.draw_boxes_with_categories(
                        img_batch=input_img_batch,
                        boxes=rois_rpn,
                        scores=roi_scores_rpn)
                    tf.summary.image('all_rpn_rois_{}'.format(level),
                                     rois_in_img)

                    score_gre_05 = tf.reshape(
                        tf.where(tf.greater_equal(roi_scores_rpn, 0.5)), [-1])
                    score_gre_05_rois = tf.gather(rois_rpn, score_gre_05)
                    score_gre_05_score = tf.gather(roi_scores_rpn,
                                                   score_gre_05)
                    score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_categories(
                        img_batch=input_img_batch,
                        boxes=score_gre_05_rois,
                        scores=score_gre_05_score)
                    tf.summary.image('score_greater_05_rois_{}'.format(level),
                                     score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        rpn_labels = {}
        rpn_bbox_targets = {}
        labels_all = []
        labels = {}
        bbox_targets_h = {}
        bbox_targets_r = {}
        bbox_targets_all_h = []
        bbox_targets_all_r = []

        if self.is_training:
            for level in self.level:
                with tf.variable_scope(
                        'sample_anchors_minibatch_{}'.format(level)):
                    rpn_labels_one, rpn_bbox_targets_one = \
                        tf.py_func(
                            anchor_target_layer,
                            [gtboxes_h_batch, img_shape, anchors[level]],
                            [tf.float32, tf.float32])
                    rpn_bbox_targets_one = tf.reshape(rpn_bbox_targets_one,
                                                      [-1, 4])
                    rpn_labels_one = tf.to_int32(
                        rpn_labels_one, name="to_int32_{}".format(level))
                    rpn_labels_one = tf.reshape(rpn_labels_one, [-1])
                    self.add_anchor_img_smry(input_img_batch, anchors[level],
                                             rpn_labels_one)

                    # -----------------------------add to the dict-------------------------------------------------------------
                    rpn_labels[level] = rpn_labels_one
                    rpn_bbox_targets[level] = rpn_bbox_targets_one
                # --------------------------------------add smry-----------------------------------------------------------

                rpn_cls_category = tf.argmax(rpn_all_boxes_scores[level],
                                             axis=1)
                kept_rpppn = tf.reshape(
                    tf.where(tf.not_equal(rpn_labels_one, -1)), [-1])
                rpn_cls_category = tf.gather(rpn_cls_category,
                                             kept_rpppn)  # 预测
                acc = tf.reduce_mean(
                    tf.to_float(
                        tf.equal(
                            rpn_cls_category,
                            tf.to_int64(tf.gather(rpn_labels_one,
                                                  kept_rpppn)))))
                tf.summary.scalar('ACC/rpn_accuracy_{}'.format(level), acc)

                with tf.control_dependencies([rpn_labels[level]]):
                    with tf.variable_scope(
                            'sample_RCNN_minibatch_{}'.format(level)):
                        rois_, labels_, bbox_targets_h_, bbox_targets_r_ = \
                        tf.py_func(proposal_target_layer,
                                   [rois[level], gtboxes_h_batch, gtboxes_r_batch],
                                   [tf.float32, tf.float32, tf.float32, tf.float32])

                        rois_fast = tf.reshape(rois_, [-1, 4])
                        labels_fast = tf.to_int32(labels_)
                        labels_fast = tf.reshape(labels_fast, [-1])
                        bbox_targets_h_fast = tf.reshape(
                            bbox_targets_h_, [-1, 4 * (cfgs.CLASS_NUM + 1)])
                        bbox_targets_r_fast = tf.reshape(
                            bbox_targets_r_, [-1, 5 * (cfgs.CLASS_NUM + 1)])
                        self.add_roi_batch_img_smry(input_img_batch, rois_fast,
                                                    labels_fast)
                        #----------------------new_add----------------------
                        rois[level] = rois_fast
                        labels[level] = labels_fast
                        bbox_targets_h[level] = bbox_targets_h_fast
                        bbox_targets_r[level] = bbox_targets_r_fast
                        labels_all.append(labels_fast)
                        bbox_targets_all_h.append(bbox_targets_h_fast)
                        bbox_targets_all_r.append(bbox_targets_r_fast)

            fast_labels = tf.concat(labels_all, axis=0)
            fast_bbox_targets_h = tf.concat(bbox_targets_all_h, axis=0)
            fast_bbox_targets_r = tf.concat(bbox_targets_all_r, axis=0)
        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)

        bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r = self.build_fastrcnn(
            feature_to_cropped=self.feature_pyramid,
            rois_all=rois,
            img_shape=img_shape)

        # 这里的feature_to_cropped是feature maps 特征图
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        cls_prob_h = slim.softmax(cls_score_h,
                                  'cls_prob_h')  # 根据代码可看到水平和旋转的处理过程是分开的
        cls_prob_r = slim.softmax(cls_score_r, 'cls_prob_r')

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:

            cls_category_h = tf.argmax(cls_prob_h, axis=1)
            fast_acc_h = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category_h,
                                     tf.to_int64(fast_labels))))
            tf.summary.scalar('ACC/fast_acc_h', fast_acc_h)

            cls_category_r = tf.argmax(cls_prob_r, axis=1)
            fast_acc_r = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category_r,
                                     tf.to_int64(fast_labels))))
            tf.summary.scalar('ACC/fast_acc_r', fast_acc_r)

        #  6. postprocess_fastrcnn
        if not self.is_training:

            rois_all = []
            for level in self.level:
                rois_all.append(rois[level])
            rois = tf.concat(rois_all, axis=0)

            final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h(
                rois=rois,
                bbox_ppred=bbox_pred_h,
                scores=cls_prob_h,
                img_shape=img_shape)
            final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r(
                rois=rois,
                bbox_ppred=bbox_pred_r,
                scores=cls_prob_r,
                img_shape=img_shape)
            return final_boxes_h, final_scores_h, final_category_h, final_boxes_r, final_scores_r, final_category_r
        else:
            '''
            when trian. We need build Loss
            '''
            loss_dict = self.build_loss(rpn_box_pred=rpn_all_encode_boxes,
                                        rpn_bbox_targets=rpn_bbox_targets,
                                        rpn_cls_score=rpn_all_cls_score,
                                        rpn_labels=rpn_labels,
                                        bbox_pred_h=bbox_pred_h,
                                        bbox_targets_h=fast_bbox_targets_h,
                                        cls_score_h=cls_score_h,
                                        bbox_pred_r=bbox_pred_r,
                                        bbox_targets_r=fast_bbox_targets_r,
                                        cls_score_r=cls_score_r,
                                        labels=fast_labels)
            rois_all = []
            for level in self.level:
                rois_all.append(rois[level])
            rois = tf.concat(rois_all, axis=0)

            final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h(
                rois=rois,
                bbox_ppred=bbox_pred_h,
                scores=cls_prob_h,
                img_shape=img_shape)
            final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r(
                rois=rois,
                bbox_ppred=bbox_pred_r,
                scores=cls_prob_r,
                img_shape=img_shape)

            return final_boxes_h, final_scores_h, final_category_h, \
                   final_boxes_r, final_scores_r, final_category_r, loss_dict
コード例 #12
0
    def build_whole_detection_network(self, input_img_batch, gtboxes_r_batch,
                                      gtboxes_h_batch):

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        feature_to_cropped = self.build_base_network(input_img_batch)

        # 2. build rpn
        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):

            rpn_conv3x3 = slim.conv2d(feature_to_cropped,
                                      512, [3, 3],
                                      trainable=self.is_training,
                                      weights_initializer=cfgs.INITIALIZER,
                                      activation_fn=tf.nn.relu,
                                      scope='rpn_conv/3x3')
            rpn_cls_score = slim.conv2d(rpn_conv3x3,
                                        self.num_anchors_per_location * 2,
                                        [1, 1],
                                        stride=1,
                                        trainable=self.is_training,
                                        weights_initializer=cfgs.INITIALIZER,
                                        activation_fn=None,
                                        scope='rpn_cls_score')
            rpn_box_pred = slim.conv2d(
                rpn_conv3x3,
                self.num_anchors_per_location * 4, [1, 1],
                stride=1,
                trainable=self.is_training,
                weights_initializer=cfgs.BBOX_INITIALIZER,
                activation_fn=None,
                scope='rpn_bbox_pred')
            rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob')

        # 3. generate_anchors
        featuremap_height, featuremap_width = tf.shape(
            feature_to_cropped)[1], tf.shape(feature_to_cropped)[2]
        featuremap_height = tf.cast(featuremap_height, tf.float32)
        featuremap_width = tf.cast(featuremap_width, tf.float32)

        anchors = anchor_utils.make_anchors(
            base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0],
            anchor_scales=cfgs.ANCHOR_SCALES,
            anchor_ratios=cfgs.ANCHOR_RATIOS,
            featuremap_height=featuremap_height,
            featuremap_width=featuremap_width,
            stride=cfgs.ANCHOR_STRIDE,
            name="make_anchors_forRPN")

        # with tf.variable_scope('make_anchors'):
        #     anchors = anchor_utils.make_anchors(height=featuremap_height,
        #                                         width=featuremap_width,
        #                                         feat_stride=cfgs.ANCHOR_STRIDE[0],
        #                                         anchor_scales=cfgs.ANCHOR_SCALES,
        #                                         anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16
        #                                         )

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_RPN'):
            # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2])
            # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob')
            # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
            rois, roi_scores = postprocess_rpn_proposals(
                rpn_bbox_pred=rpn_box_pred,
                rpn_cls_prob=rpn_cls_prob,
                img_shape=img_shape,
                anchors=anchors,
                is_training=self.is_training)
            # rois shape [-1, 4]

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)
        bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r = self.build_fastrcnn(
            feature_to_cropped=feature_to_cropped,
            rois=rois,
            img_shape=img_shape)
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        cls_prob_h = slim.softmax(cls_score_h, 'cls_prob_h')
        cls_prob_r = slim.softmax(cls_score_r, 'cls_prob_r')

        # ----------------------------------------------add smry-------------------------------------------------------

        #  6. postprocess_fastrcnn
        final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h(
            rois=rois,
            bbox_ppred=bbox_pred_h,
            scores=cls_prob_h,
            img_shape=img_shape)
        final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r(
            rois=rois,
            bbox_ppred=bbox_pred_r,
            scores=cls_prob_r,
            img_shape=img_shape)
        return final_boxes_h, final_scores_h, final_category_h, final_boxes_r, final_scores_r, final_category_r