Esempio n. 1
0
    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_batch_h,
                                      gtboxes_batch_r,
                                      gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)

            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        feature_pyramid = self.build_base_network(input_img_batch)

        # 2. build rpn
        rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net(
            feature_pyramid, 'rpn_net')

        # 3. generate_anchors
        anchor_list = self.make_anchors(feature_pyramid)

        rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0)
        rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0)
        # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0)
        anchors = tf.concat(anchor_list, axis=0)

        if self.is_training:
            with tf.variable_scope('build_loss'):
                labels, target_delta, anchor_states, target_boxes = tf.py_func(
                    func=anchor_target_layer,
                    inp=[gtboxes_batch_h, gtboxes_batch_r, anchors],
                    Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

                if self.method == 'H':
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 0)
                else:
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 1)

                cls_loss = losses.focal_loss(labels, rpn_cls_score,
                                             anchor_states)
                if cfgs.USE_IOU_FACTOR:
                    reg_loss = losses.iou_smooth_l1_loss_(
                        target_delta, rpn_box_pred, anchor_states,
                        target_boxes, anchors)
                    # reg_loss = losses.adiou_smooth_l1_loss(target_delta, rpn_box_pred, anchor_states, target_boxes, anchors)
                else:
                    reg_loss = losses.smooth_l1_loss(target_delta,
                                                     rpn_box_pred,
                                                     anchor_states)

                self.losses_dict['cls_loss'] = cls_loss * cfgs.CLS_WEIGHT
                self.losses_dict['reg_loss'] = reg_loss * cfgs.REG_WEIGHT

        box_pred_list, cls_prob_list, proposal_list = rpn_box_pred_list, rpn_cls_prob_list, anchor_list

        all_box_pred_list, all_cls_prob_list, all_proposal_list = [], [], []

        for i in range(cfgs.NUM_REFINE_STAGE):
            box_pred_list, cls_prob_list, proposal_list = self.refine_stage(
                input_img_batch,
                gtboxes_batch_r,
                box_pred_list,
                cls_prob_list,
                proposal_list,
                feature_pyramid,
                gpu_id,
                pos_threshold=cfgs.REFINE_IOU_POSITIVE_THRESHOLD[i],
                neg_threshold=cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[i],
                stage='' if i == 0 else '_stage{}'.format(i + 2),
                proposal_filter=True if i == 0 else False)

            if not self.is_training:
                all_box_pred_list.extend(box_pred_list)
                all_cls_prob_list.extend(cls_prob_list)
                all_proposal_list.extend(proposal_list)
            else:
                all_box_pred_list, all_cls_prob_list, all_proposal_list = box_pred_list, cls_prob_list, proposal_list

        with tf.variable_scope('postprocess_detctions'):
            box_pred = tf.concat(all_box_pred_list, axis=0)
            cls_prob = tf.concat(all_cls_prob_list, axis=0)
            proposal = tf.concat(all_proposal_list, axis=0)

            boxes, scores, category = postprocess_detctions(
                refine_bbox_pred=box_pred,
                refine_cls_prob=cls_prob,
                anchors=proposal,
                is_training=self.is_training)
            boxes = tf.stop_gradient(boxes)
            scores = tf.stop_gradient(scores)
            category = tf.stop_gradient(category)

        if self.is_training:
            return boxes, scores, category, self.losses_dict
        else:
            return boxes, scores, category
Esempio n. 2
0
    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_batch_h,
                                      gtboxes_batch_r,
                                      gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)

            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

        # 1. build base network
        feature_pyramid = self.build_base_network(input_img_batch)

        # 2. build rpn
        rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net(
            feature_pyramid, 'rpn_net')

        # 3. generate_anchors
        anchor_list = self.make_anchors(feature_pyramid)
        rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0)
        rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0)
        # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0)
        anchors = tf.concat(anchor_list, axis=0)

        if self.is_training:
            with tf.variable_scope('build_loss'):
                labels, target_delta, anchor_states, target_boxes = tf.py_func(
                    func=anchor_target_layer,
                    inp=[gtboxes_batch_h, gtboxes_batch_r, anchors],
                    Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

                if self.method == 'H':
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 0)
                else:
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 1)

                cls_loss = losses.focal_loss(labels, rpn_cls_score,
                                             anchor_states)
                if cfgs.USE_IOU_FACTOR:
                    reg_loss = losses.iou_smooth_l1_loss(
                        target_delta, rpn_box_pred, anchor_states,
                        target_boxes, anchors)
                else:
                    reg_loss = losses.smooth_l1_loss(target_delta,
                                                     rpn_box_pred,
                                                     anchor_states)

                self.losses_dict['cls_loss'] = cls_loss * cfgs.CLS_WEIGHT
                self.losses_dict['reg_loss'] = reg_loss * cfgs.REG_WEIGHT

        with tf.variable_scope('refine_feature_pyramid'):
            refine_feature_pyramid = {}
            for level in cfgs.LEVEL:
                feature_1x5 = slim.conv2d(
                    inputs=feature_pyramid[level],
                    num_outputs=256,
                    kernel_size=[1, 5],
                    weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER,
                    biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER,
                    stride=1,
                    activation_fn=None,
                    scope='refine_1x5_{}'.format(level))

                feature5x1 = slim.conv2d(
                    inputs=feature_1x5,
                    num_outputs=256,
                    kernel_size=[5, 1],
                    weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER,
                    biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER,
                    stride=1,
                    activation_fn=None,
                    scope='refine_5x1_{}'.format(level))

                feature_1x1 = slim.conv2d(
                    inputs=feature_pyramid[level],
                    num_outputs=256,
                    kernel_size=[1, 1],
                    weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER,
                    biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER,
                    stride=1,
                    activation_fn=None,
                    scope='refine_1x1_{}'.format(level))
                refine_feature_pyramid[level] = feature5x1 + feature_1x1

        refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list = self.refine_net(
            refine_feature_pyramid, 'refine_net')
        # refine_box_pred_list, refine_cls_score_list, refine_cls_prob_list = self.refine_net(feature_pyramid, 'refine_net')

        refine_box_pred = tf.concat(refine_box_pred_list, axis=0)
        refine_cls_score = tf.concat(refine_cls_score_list, axis=0)
        refine_cls_prob = tf.concat(refine_cls_prob_list, axis=0)
        # refine_boxes = tf.concat(refine_boxes_list, axis=0)

        if cfgs.METHOD == 'H':
            x_c = (anchors[:, 2] + anchors[:, 0]) / 2
            y_c = (anchors[:, 3] + anchors[:, 1]) / 2
            h = anchors[:, 2] - anchors[:, 0] + 1
            w = anchors[:, 3] - anchors[:, 1] + 1
            theta = -90 * tf.ones_like(x_c)
            anchors = tf.transpose(tf.stack([x_c, y_c, w, h, theta]))

        refine_boxes = bbox_transform.rbbox_transform_inv(boxes=anchors,
                                                          deltas=rpn_box_pred)

        # 4. postprocess rpn proposals. such as: decode, clip, filter
        if not self.is_training:
            with tf.variable_scope('postprocess_detctions'):
                boxes, scores, category = postprocess_detctions(
                    refine_bbox_pred=refine_box_pred,
                    refine_cls_prob=refine_cls_prob,
                    anchors=refine_boxes,
                    is_training=self.is_training)
                return boxes, scores, category

        #  5. build loss
        else:
            with tf.variable_scope('build_refine_loss'):
                refine_labels, refine_target_delta, refine_box_states, refine_target_boxes = tf.py_func(
                    func=refinebox_target_layer,
                    inp=[
                        gtboxes_batch_r, refine_boxes,
                        cfgs.REFINE_IOU_POSITIVE_THRESHOLD[0],
                        cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[0], gpu_id
                    ],
                    Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

                self.add_anchor_img_smry(input_img_batch, refine_boxes,
                                         refine_box_states, 1)

                refine_cls_loss = losses.focal_loss(refine_labels,
                                                    refine_cls_score,
                                                    refine_box_states)
                if cfgs.USE_IOU_FACTOR:
                    refine_reg_loss = losses.iou_smooth_l1_loss(
                        refine_target_delta, refine_box_pred,
                        refine_box_states, refine_target_boxes, refine_boxes)
                else:
                    refine_reg_loss = losses.smooth_l1_loss(
                        refine_target_delta, refine_box_pred,
                        refine_box_states)

                self.losses_dict[
                    'refine_cls_loss'] = refine_cls_loss * cfgs.CLS_WEIGHT
                self.losses_dict[
                    'refine_reg_loss'] = refine_reg_loss * cfgs.REG_WEIGHT

            with tf.variable_scope('postprocess_detctions'):
                boxes, scores, category = postprocess_detctions(
                    refine_bbox_pred=refine_box_pred,
                    refine_cls_prob=refine_cls_prob,
                    anchors=refine_boxes,
                    is_training=self.is_training)
                boxes = tf.stop_gradient(boxes)
                scores = tf.stop_gradient(scores)
                category = tf.stop_gradient(category)

                return boxes, scores, category, self.losses_dict
    def build_whole_detection_network(self,
                                      input_img_batch,
                                      gtboxes_batch_h,
                                      gtboxes_batch_r,
                                      gpu_id=0):

        if self.is_training:
            gtboxes_batch_h = tf.reshape(gtboxes_batch_h, [-1, 5])
            gtboxes_batch_h = tf.cast(gtboxes_batch_h, tf.float32)

            gtboxes_batch_r = tf.reshape(gtboxes_batch_r, [-1, 6])
            gtboxes_batch_r = tf.cast(gtboxes_batch_r, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        if cfgs.USE_SUPERVISED_MASK:
            feature_pyramid, mask_list, dot_layer_list = self.build_base_network(
                input_img_batch)
        else:
            feature_pyramid = self.build_base_network(input_img_batch)
            dot_layer_list = None
            mask_list = []

        # 2. build rpn
        # if cfgs.USE_SUPERVISED_MASK:
        #     for i, d in enumerate(dot_layer_list):
        #         feature_pyramid['P{}'.format(i + 3)] *= d
        rpn_box_pred_list, rpn_cls_score_list, rpn_cls_prob_list = self.rpn_net(
            feature_pyramid, 'rpn_net')

        # 3. generate anchors and mask
        anchor_list = self.make_anchors(feature_pyramid)

        if cfgs.USE_SUPERVISED_MASK:
            mask_gt_list = self.generate_mask(mask_list, img_shape,
                                              gtboxes_batch_h, gtboxes_batch_r,
                                              feature_pyramid)

        rpn_box_pred = tf.concat(rpn_box_pred_list, axis=0)
        rpn_cls_score = tf.concat(rpn_cls_score_list, axis=0)
        # rpn_cls_prob = tf.concat(rpn_cls_prob_list, axis=0)
        anchors = tf.concat(anchor_list, axis=0)

        if self.is_training:
            with tf.variable_scope('build_loss'):
                labels, target_delta, anchor_states, target_boxes = tf.py_func(
                    func=anchor_target_layer,
                    inp=[gtboxes_batch_h, gtboxes_batch_r, anchors],
                    Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

                if self.method == 'H':
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 0)
                else:
                    self.add_anchor_img_smry(input_img_batch, anchors,
                                             anchor_states, 1)

                cls_loss = losses.focal_loss(labels, rpn_cls_score,
                                             anchor_states)
                if cfgs.USE_IOU_FACTOR:
                    reg_loss = losses.iou_smooth_l1_loss_(
                        target_delta, rpn_box_pred, anchor_states,
                        target_boxes, anchors)
                else:
                    reg_loss = losses.smooth_l1_loss(target_delta,
                                                     rpn_box_pred,
                                                     anchor_states)

                if cfgs.USE_SUPERVISED_MASK:
                    with tf.variable_scope("supervised_mask_loss"):
                        mask_loss = 0.0
                        for i in range(len(mask_list)):
                            a_mask, a_mask_gt = mask_list[i], mask_gt_list[i]
                            # b, h, w, c = a_mask.shape
                            last_dim = 2 if cfgs.BINARY_MASK else cfgs.CLASS_NUM + 1
                            a_mask = tf.reshape(a_mask, shape=[-1, last_dim])
                            a_mask_gt = tf.reshape(a_mask_gt, shape=[-1])
                            a_mask_loss = tf.reduce_mean(
                                tf.nn.sparse_softmax_cross_entropy_with_logits(
                                    logits=a_mask, labels=a_mask_gt))
                            mask_loss += a_mask_loss
                        self.losses_dict[
                            'mask_loss'] = mask_loss * cfgs.SUPERVISED_MASK_LOSS_WEIGHT / float(
                                len(mask_list))

                self.losses_dict['cls_loss'] = cls_loss * cfgs.CLS_WEIGHT
                self.losses_dict['reg_loss'] = reg_loss * cfgs.REG_WEIGHT

        box_pred_list, cls_prob_list, proposal_list = rpn_box_pred_list, rpn_cls_prob_list, anchor_list

        all_box_pred_list, all_cls_prob_list, all_proposal_list = [], [], []

        for i in range(cfgs.NUM_REFINE_STAGE):
            box_pred_list, cls_prob_list, proposal_list = self.refine_stage(
                input_img_batch,
                gtboxes_batch_r,
                box_pred_list,
                cls_prob_list,
                proposal_list,
                feature_pyramid,
                dot_layer_list,
                gpu_id,
                pos_threshold=cfgs.REFINE_IOU_POSITIVE_THRESHOLD[i],
                neg_threshold=cfgs.REFINE_IOU_NEGATIVE_THRESHOLD[i],
                stage='' if i == 0 else '_stage{}'.format(i + 2),
                proposal_filter=True if i == 0 else False)

            if not self.is_training:
                all_box_pred_list.extend(box_pred_list)
                all_cls_prob_list.extend(cls_prob_list)
                all_proposal_list.extend(proposal_list)
            else:
                all_box_pred_list, all_cls_prob_list, all_proposal_list = box_pred_list, cls_prob_list, proposal_list

        with tf.variable_scope('postprocess_detctions'):
            box_pred = tf.concat(all_box_pred_list, axis=0)
            cls_prob = tf.concat(all_cls_prob_list, axis=0)
            proposal = tf.concat(all_proposal_list, axis=0)

            boxes, scores, category = postprocess_detctions(
                refine_bbox_pred=box_pred,
                refine_cls_prob=cls_prob,
                anchors=proposal,
                is_training=self.is_training,
                gpu_id=gpu_id)
            boxes = tf.stop_gradient(boxes)
            scores = tf.stop_gradient(scores)
            category = tf.stop_gradient(category)

        if self.is_training:
            return boxes, scores, category, self.losses_dict
        else:
            return boxes, scores, category