Exemple #1
0
def iou_loss(bbox_pred, bbox_targets, gtbox, label, num_classes):
    """
    :param bbox_pred: [-1, (cfgs.CLS_NUM +1) * 4]
    :param bbox_targets: [-1, (cfgs.CLS_NUM +1) * 4]
    :param gtbox: [-1, 4]
    :param label: [-1]
    :param num_classes:
    :return:
    """

    gtbox = tf.tile(gtbox, [1, num_classes])
    bbox_pred = tf.reshape(bbox_pred, [-1, 4])
    bbox_targets = tf.reshape(bbox_targets, [-1, 4])
    gtbox = tf.reshape(gtbox, [-1, 4])
    pred_box = encode_and_decode.decode_boxes(bbox_pred, gtbox, scale_factors=cfgs.ROI_SCALE_FACTORS)
    gt_box = encode_and_decode.decode_boxes(bbox_targets, gtbox, scale_factors=cfgs.ROI_SCALE_FACTORS)

    inside_mask = tf.one_hot(tf.reshape(label, [-1, 1]),
                             depth=num_classes, axis=1)

    inside_mask = tf.reshape(inside_mask, [-1, ])
    iou = iou_calculate(pred_box, gt_box)
    iou_loss = tf.reduce_mean(-tf.log(iou*inside_mask+1e-5))

    pred = tf.cast(tf.greater(iou, 0.5), tf.float32)
    pred = tf.reshape(pred, [-1, num_classes])
    pred_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label))

    loss = iou_loss * 0.1 + pred_loss * 0.0
    return loss
Exemple #2
0
    def rpn_proposals(self):
        with tf.variable_scope('rpn_proposals'):
            rpn_decode_boxes = encode_and_decode.decode_boxes(
                encode_boxes=self.rpn_encode_boxes,
                reference_boxes=self.anchors,
                scale_factors=self.scale_factors)

            rpn_softmax_scores = slim.softmax(self.rpn_scores)
            rpn_object_score = rpn_softmax_scores[:,
                                                  1]  # second column represent object

            if self.top_k_nms:
                rpn_object_score, top_k_indices = tf.nn.top_k(rpn_object_score,
                                                              k=self.top_k_nms)
                rpn_decode_boxes = tf.gather(rpn_decode_boxes, top_k_indices)

            # NMS
            valid_indices = tf_wrapper.nms_rotate_tf(
                boxes_list=rpn_decode_boxes,
                scores=rpn_object_score,
                iou_threshold=self.rpn_nms_iou_threshold,
                max_output_size=self.max_proposals_num,
                use_gpu=cfgs.NMS_USE_GPU)

            valid_boxes = tf.gather(rpn_decode_boxes, valid_indices)
            valid_scores = tf.gather(rpn_object_score, valid_indices)
            # print_tensors(valid_scores, 'rpn_score')
            rpn_proposals_boxes, rpn_proposals_scores = tf.cond(
                tf.less(tf.shape(valid_boxes)[0], self.max_proposals_num),
                lambda: boxes_utils.padd_boxes_with_zeros(
                    valid_boxes, valid_scores, self.max_proposals_num), lambda:
                (valid_boxes, valid_scores))

            return rpn_proposals_boxes, rpn_proposals_scores
    def rpn_proposals(self):
        with tf.variable_scope('rpn_proposals'):
            rpn_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=self.rpn_encode_boxes,
                                                              reference_boxes=self.anchors,
                                                              scale_factors=self.scale_factors)

            if not self.is_training:  # when test, clip proposals to img boundaries
                img_shape = tf.shape(self.img_batch)
                rpn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(rpn_decode_boxes, img_shape)

            rpn_softmax_scores = slim.softmax(self.rpn_scores)
            rpn_object_score = rpn_softmax_scores[:, 1]  # second column represent object

            if self.top_k_nms:
                rpn_object_score, top_k_indices = tf.nn.top_k(rpn_object_score, k=self.top_k_nms)
                rpn_decode_boxes = tf.gather(rpn_decode_boxes, top_k_indices)

            valid_indices = nms.non_maximal_suppression(boxes=rpn_decode_boxes,
                                                        scores=rpn_object_score,
                                                        max_output_size=self.max_proposals_num,
                                                        iou_threshold=self.rpn_nms_iou_threshold)

            valid_boxes = tf.gather(rpn_decode_boxes, valid_indices)
            valid_scores = tf.gather(rpn_object_score, valid_indices)
            rpn_proposals_boxes, rpn_proposals_scores = tf.cond(
                tf.less(tf.shape(valid_boxes)[0], self.max_proposals_num),
                lambda: boxes_utils.padd_boxes_with_zeros(valid_boxes, valid_scores,
                                                          self.max_proposals_num),
                lambda: (valid_boxes, valid_scores))

            return rpn_proposals_boxes, rpn_proposals_scores
    def fast_rcnn_predict(self):

        with tf.variable_scope('fast_rcnn_predict'):
            fast_rcnn_softmax_scores = slim.softmax(
                self.fast_rcnn_scores)  # [-1, num_classes+1]

            fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes,
                                                [-1, 4])

            reference_boxes = tf.tile(
                self.fast_rcnn_all_level_proposals,
                [1, self.num_classes])  # [N, 4*num_classes]
            reference_boxes = tf.reshape(reference_boxes,
                                         [-1, 4])  # [N*num_classes, 4]
            fast_rcnn_decode_boxes = encode_and_decode.decode_boxes(
                encode_boxes=fast_rcnn_encode_boxes,
                reference_boxes=reference_boxes,
                scale_factors=self.scale_factors)

            fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
                fast_rcnn_decode_boxes, img_shape=self.img_shape)

            # mutilclass NMS
            fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes,
                                                [-1, self.num_classes * 4])
            fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \
                self.fast_rcnn_proposals(fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_scores)

            return fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category
Exemple #5
0
    def fast_rcnn_predict(self):

        with tf.variable_scope('fast_rcnn_predict'):
            fast_rcnn_softmax_scores = slim.softmax(
                self.fast_rcnn_scores)  # [-1, num_classes+1]

            fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes,
                                                [-1, 5])

            # reference_boxes = tf.tile(self.fast_rcnn_all_level_horizontal_proposals, [1, self.num_classes])
            reference_boxes = tf.tile(
                self.fast_rcnn_all_level_rotate_proposals,
                [1, self.num_classes])  # [N, 5*num_classes]

            reference_boxes = tf.reshape(reference_boxes,
                                         [-1, 5])  # [N*num_classes, 5]
            fast_rcnn_decode_boxes = encode_and_decode.decode_boxes(
                encode_boxes=fast_rcnn_encode_boxes,
                reference_boxes=reference_boxes,
                scale_factors=self.scale_factors)

            # mutilclass NMS
            fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes,
                                                [-1, self.num_classes * 5])
            fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \
                self.fast_rcnn_proposals(fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_scores)

            return fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category
Exemple #6
0
    def postprocess_cascadercnn(self, rois, bbox_pred, cls_score, stage):
        '''
        :param rois:[-1, 4]
        :param bbox_ppred: bbox_ppred: [-1, 4]
        :param scores: [-1, 1]
        :return:
        '''
        # rois = tf.stop_gradient(rois)
        # bbox_pred = tf.stop_gradient(bbox_pred)
        bbox_pred_ins = tf.reshape(bbox_pred, [-1, cfgs.CLASS_NUM + 1, 4])

        # only keep a box which score is the bigest
        keep_abox = tf.argmax(cls_score, axis=1)
        keep_inds = tf.reshape(
            tf.transpose(
                tf.stack([tf.cumsum(tf.ones_like(keep_abox)) - 1, keep_abox])),
            [-1, 2])
        bbox_pred_fliter = tf.reshape(tf.gather_nd(bbox_pred_ins, keep_inds),
                                      [-1, 4])

        # decode boxes
        decoded_boxes = encode_and_decode.decode_boxes(
            encoded_boxes=bbox_pred_fliter,
            reference_boxes=rois,
            scale_factors=cfgs.ROI_SCALE_FACTORS[stage - 1])

        return decoded_boxes
    def rpn_proposals(self):
        with tf.variable_scope('rpn_proposals'):
            rpn_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=self.rpn_encode_boxes,
                                                              reference_boxes=self.anchors,
                                                              scale_factors=self.scale_factors)

            rpn_softmax_scores = slim.softmax(self.rpn_scores)
            rpn_object_score = rpn_softmax_scores[:, 1]  # second column represent object

            if self.top_k_nms:
                rpn_object_score, top_k_indices = tf.nn.top_k(rpn_object_score, k=self.top_k_nms)
                rpn_decode_boxes = tf.gather(rpn_decode_boxes, top_k_indices)

            # NMS
            valid_indices = tf_wrapper.nms_rotate_tf(boxes_list=rpn_decode_boxes,
                                                     scores=rpn_object_score,
                                                     iou_threshold=self.rpn_nms_iou_threshold,
                                                     max_output_size=self.max_proposals_num,
                                                     use_gpu=cfgs.NMS_USE_GPU)

            valid_boxes = tf.gather(rpn_decode_boxes, valid_indices)
            valid_scores = tf.gather(rpn_object_score, valid_indices)
            # print_tensors(valid_scores, 'rpn_score')
            rpn_proposals_boxes, rpn_proposals_scores = tf.cond(
                tf.less(tf.shape(valid_boxes)[0], self.max_proposals_num),
                lambda: boxes_utils.padd_boxes_with_zeros(valid_boxes, valid_scores,
                                                          self.max_proposals_num),
                lambda: (valid_boxes, valid_scores))

            return rpn_proposals_boxes, rpn_proposals_scores
Exemple #8
0
    def rpn_proposals(self):
        '''
        :param:self.anchors: shape:[-1, 4]->[ymin, xmin, ymax, xmax]
        :param:self.rpn_scores: shape:[-1, 2]->[backgroud, foreground]
        :param:self.rpn_encode_boxes: shape:[-1, 4]->[ycenter, xcenter, h, w]
        :return: valid_boxes [2000, 4]
        :return: valid_scores [2000,]
        '''
        with tf.variable_scope('rpn_proposals'):
            rpn_decode_boxes = encode_and_decode.decode_boxes(
                encode_boxes=self.rpn_encode_boxes,
                reference_boxes=self.anchors,
                scale_factors=self.scale_factors)
            if not self.is_training:
                image_shape = tf.shape(self.img_batch)
                rpn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
                    rpn_decode_boxes, image_shape)

            rpn_softmax_scores = slim.softmax(self.rpn_scores)
            rpn_object_score = rpn_softmax_scores[:, 1]

            if self.top_k_nms:
                rpn_object_score, top_k_indices = tf.nn.top_k(rpn_object_score,
                                                              k=self.top_k_nms)
                rpn_decode_boxes = tf.gather(rpn_decode_boxes, top_k_indices)

            nms_indices = boxes_utils.non_maximal_suppression(
                rpn_decode_boxes, rpn_object_score, self.rpn_nms_iou_threshold,
                self.max_proposal_num)
            valid_scores = tf.gather(rpn_object_score, nms_indices)
            valid_boxes = tf.gather(rpn_decode_boxes, nms_indices)

        return valid_boxes, valid_scores
Exemple #9
0
    def rpn_losses(self):
        with tf.variable_scope('rpn_losses'):
            minibatch_indices, minibatch_anchor_matched_gtboxes, object_mask, minibatch_labels_one_hot = \
                self.make_minibatch(self.anchors)

            minibatch_anchors = tf.gather(self.anchors, minibatch_indices)
            minibatch_encode_boxes = tf.gather(self.rpn_encode_boxes,
                                               minibatch_indices)
            minibatch_boxes_scores = tf.gather(self.rpn_scores,
                                               minibatch_indices)

            # encode gtboxes
            minibatch_encode_gtboxes = encode_and_decode.encode_boxes(
                unencode_boxes=minibatch_anchor_matched_gtboxes,
                reference_boxes=minibatch_anchors,
                scale_factors=self.scale_factors)

            positive_anchors_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_anchors * tf.expand_dims(object_mask, 1),
                text=tf.shape(tf.where(tf.equal(object_mask, 1.0)))[0])

            negative_mask = tf.cast(
                tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32)
            negative_anchors_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_anchors * tf.expand_dims(negative_mask, 1),
                text=tf.shape(tf.where(tf.equal(object_mask, 0.0)))[0])

            minibatch_decode_boxes = encode_and_decode.decode_boxes(
                encode_boxes=minibatch_encode_boxes,
                reference_boxes=minibatch_anchors,
                scale_factors=self.scale_factors)

            tf.summary.image('/positive_anchors', positive_anchors_in_img)
            tf.summary.image('/negative_anchors', negative_anchors_in_img)
            top_k_scores, top_k_indices = tf.nn.top_k(
                minibatch_boxes_scores[:, 1], k=5)

            top_detections_in_img = draw_box_with_color(
                self.img_batch,
                tf.gather(minibatch_decode_boxes, top_k_indices),
                text=tf.shape(top_k_scores)[0])
            tf.summary.image('/top_5', top_detections_in_img)

            # losses
            with tf.variable_scope('rpn_location_loss'):
                location_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_encode_boxes,
                    gtboxes=minibatch_encode_gtboxes,
                    object_weights=object_mask)
                slim.losses.add_loss(
                    location_loss)  # add smooth l1 loss to losses collection

            with tf.variable_scope('rpn_classification_loss'):
                classification_loss = slim.losses.softmax_cross_entropy(
                    logits=minibatch_boxes_scores,
                    onehot_labels=minibatch_labels_one_hot)

            return location_loss, classification_loss
    def fast_rcnn_prediction(self):
        '''
        :param: self.fast_rcnn_cls_scores, [2000, num_cls+1], num_cls+background
        :param: self.fast_rcnn_encode_boxes, [2000, num_cls*4]
        :return: fast_rcnn_decode_boxes, [-1, 4]
        :return: fast_rcnn_category, [-1, ]
        :return: fast_rcnn_scores, [-1, ]
        :return: num_object, [-1, ]
        '''
        with tf.variable_scope('fast_rcnn_predict'):
            fast_rcnn_softmax_score = slim.softmax(self.fast_rcnn_cls_scores)

            fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes,
                                                [-1, 4])
            fast_rcnn_reference_boxes = tf.tile(self.rois_boxes,
                                                [1, self.num_cls])
            fast_rcnn_reference_boxes = tf.reshape(fast_rcnn_reference_boxes,
                                                   [-1, 4])

            # ues encode boxes to decode the reference boxes
            fast_rcnn_decode_boxes = decode_boxes(
                encode_boxes=fast_rcnn_encode_boxes,
                reference_boxes=fast_rcnn_reference_boxes,
                scale_factors=self.scale_factors)
            # clip decode boxes to image shape
            fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
                boxes=fast_rcnn_decode_boxes, img_shape=self.img_shape)

            # mutil-class nms
            fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes,
                                                [-1, 4 * self.num_cls])
            fast_rcnn_decode_boxes, fast_rcnn_category, fast_rcnn_scores, num_object = \
                self.mutil_class_nms(boxes=fast_rcnn_decode_boxes,
                                     scores=fast_rcnn_softmax_score)
            return fast_rcnn_decode_boxes, fast_rcnn_category, fast_rcnn_scores, num_object
    def rpn_losses(self):
        with tf.variable_scope('rpn_losses'):
            minibatch_indices, minibatch_anchor_matched_gtboxes, \
            object_mask, minibatch_labels_one_hot = self.make_minibatch(self.anchors)

            minibatch_anchors = tf.gather(self.anchors, minibatch_indices)
            minibatch_encode_boxes = tf.gather(self.rpn_encode_boxes, minibatch_indices)
            minibatch_boxes_scores = tf.gather(self.rpn_scores, minibatch_indices)

            # encode gtboxes
            minibatch_encode_gtboxes = encode_and_decode.encode_boxes(unencode_boxes=minibatch_anchor_matched_gtboxes,
                                                                      reference_boxes=minibatch_anchors,
                                                                      scale_factors=self.scale_factors)

            positive_anchors_in_img = draw_box_with_color(self.img_batch,
                                                          minibatch_anchors * tf.expand_dims(object_mask, 1),
                                                          text=tf.shape(tf.where(tf.equal(object_mask, 1.0)))[0])

            negative_mask = tf.cast(tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32)
            negative_anchors_in_img = draw_box_with_color(self.img_batch,
                                                          minibatch_anchors * tf.expand_dims(negative_mask, 1),
                                                          text=tf.shape(tf.where(tf.equal(object_mask, 0.0)))[0])

            minibatch_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=minibatch_encode_boxes,
                                                                    reference_boxes=minibatch_anchors,
                                                                    scale_factors=self.scale_factors)

            tf.summary.image('/positive_anchors', positive_anchors_in_img)
            tf.summary.image('/negative_anchors', negative_anchors_in_img)

            minibatch_boxes_softmax_scores = tf.gather(slim.softmax(self.rpn_scores), minibatch_indices)
            top_k_scores, top_k_indices = tf.nn.top_k(minibatch_boxes_softmax_scores[:, 1], k=20)

            top_k_boxes = tf.gather(minibatch_decode_boxes, top_k_indices)
            top_detections_in_img = draw_boxes_with_scores(self.img_batch,
                                                           boxes=top_k_boxes,
                                                           scores=top_k_scores)

            tf.summary.image('/top_20', top_detections_in_img)

            temp_indices = tf.reshape(tf.where(tf.greater(top_k_scores, cfgs.FINAL_SCORE_THRESHOLD)), [-1])
            rpn_predict_boxes = tf.gather(top_k_boxes, temp_indices)
            rpn_predict_scores = tf.gather(top_k_scores, temp_indices)

            # losses
            with tf.variable_scope('rpn_location_loss'):
                location_loss = losses.l1_smooth_losses(predict_boxes=minibatch_encode_boxes,
                                                        gtboxes=minibatch_encode_gtboxes,
                                                        object_weights=object_mask)
                slim.losses.add_loss(location_loss)  # add smooth l1 loss to losses collection

            with tf.variable_scope('rpn_classification_loss'):
                classification_loss = slim.losses.softmax_cross_entropy(logits=minibatch_boxes_scores,
                                                                        onehot_labels=minibatch_labels_one_hot)

            return location_loss, classification_loss, rpn_predict_boxes, rpn_predict_scores
Exemple #12
0
def postprocess_rpn_proposals(rpn_bbox_pred, rpn_cls_prob, img_shape, anchors,
                              is_training):
    '''

    :param rpn_bbox_pred: [-1, 4]
    :param rpn_cls_prob: [-1, 2]
    :param img_shape:
    :param anchors:[-1, 4]
    :param is_training:
    :return:
    '''

    if is_training:
        pre_nms_topN = cfgs.RPN_TOP_K_NMS_TRAIN
        post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TARIN
        nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD
    else:
        pre_nms_topN = cfgs.RPN_TOP_K_NMS_TEST
        post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TEST
        nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD

    cls_prob = rpn_cls_prob[:, 1]

    # 1. decode boxes
    decode_boxes = encode_and_decode.decode_boxes(
        encode_boxes=rpn_bbox_pred,
        reference_boxes=anchors,
        scale_factors=cfgs.ANCHOR_SCALE_FACTORS)

    # decode_boxes = encode_and_decode.decode_boxes(boxes=anchors,
    #                                               deltas=rpn_bbox_pred,
    #                                               scale_factor=None)

    # 2. clip to img boundaries
    decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
        decode_boxes=decode_boxes, img_shape=img_shape)

    # 3. get top N to NMS
    if pre_nms_topN > 0:
        pre_nms_topN = tf.minimum(pre_nms_topN,
                                  tf.shape(decode_boxes)[0],
                                  name='avoid_unenough_boxes')
        cls_prob, top_k_indices = tf.nn.top_k(cls_prob, k=pre_nms_topN)
        decode_boxes = tf.gather(decode_boxes, top_k_indices)

    # 4. NMS
    keep = tf.image.non_max_suppression(boxes=decode_boxes,
                                        scores=cls_prob,
                                        max_output_size=post_nms_topN,
                                        iou_threshold=nms_thresh)

    final_boxes = tf.gather(decode_boxes, keep)
    final_probs = tf.gather(cls_prob, keep)

    return final_boxes, final_probs
    def fast_rcnn_predict(self):

        with tf.variable_scope('fast_rcnn_predict'):
            fast_rcnn_softmax_scores = slim.softmax(
                self.fast_rcnn_scores)  # [-1, num_classes+1]
            fast_rcnn_softmax_scores_rotate = slim.softmax(
                self.fast_rcnn_scores_rotate)  # [-1, num_classes+1]

            fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes,
                                                [-1, 4])
            fast_rcnn_encode_boxes_rotate = tf.reshape(
                self.fast_rcnn_encode_boxes_rotate, [-1, 5])

            # Class agnostic regression without tile
            # reference_boxes = tf.tile(self.fast_rcnn_all_level_proposals, [1, self.num_classes])  # [N, 4*num_classes]
            reference_boxes = self.fast_rcnn_all_level_proposals

            reference_boxes = tf.reshape(reference_boxes,
                                         [-1, 4])  # [N*num_classes, 4]
            fast_rcnn_decode_boxes = encode_and_decode.decode_boxes(
                encode_boxes=fast_rcnn_encode_boxes,
                reference_boxes=reference_boxes,
                scale_factors=self.scale_factors)

            fast_rcnn_decode_boxes_rotate = \
                encode_and_decode.decode_boxes_rotate(encode_boxes=fast_rcnn_encode_boxes_rotate,
                                                      reference_boxes=reference_boxes,
                                                      scale_factors=self.scale_factors)

            fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
                fast_rcnn_decode_boxes, img_shape=self.img_shape)

            # mutilclass NMS
            # Class-agnostic regression without reshape
            # fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes, [-1, self.num_classes*4])
            # fast_rcnn_decode_boxes_rotate = tf.reshape(fast_rcnn_decode_boxes_rotate, [-1, self.num_classes * 5])

            fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \
                self.fast_rcnn_proposals(fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_scores)

            fast_rcnn_decode_boxes_rotate, fast_rcnn_score_rotate, fast_rcnn_head_quadrant, \
            num_of_objects_rotate, detection_category_rotate = \
                self.fast_rcnn_proposals_rotate(fast_rcnn_decode_boxes_rotate,
                                                scores=fast_rcnn_softmax_scores_rotate,
                                                head_quadrant=self.fast_rcnn_head_quadrant)

            return fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category,\
                   fast_rcnn_decode_boxes_rotate, fast_rcnn_score_rotate, fast_rcnn_head_quadrant, \
                   num_of_objects_rotate, detection_category_rotate
Exemple #14
0
    def postprocess_rpn_proposals(self, rpn_bbox_pred, rpn_cls_prob, img_shape,
                                  anchors, is_training):
        """
        rpn proposals operation
        :param rpn_bbox_pred: predict bbox
        :param rpn_cls_prob: probability of rpn classification
        :param img_shape: image_shape
        :param anchor: all reference anchor
        :param is_training:
        :return:
        """
        if is_training:
            pre_nms_topN = cfgs.RPN_TOP_K_NMS_TRAIN
            post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TARIN
            nms_threshold = cfgs.RPN_NMS_IOU_THRESHOLD
        else:
            pre_nms_topN = cfgs.RPN_TOP_K_NMS_TEST
            post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TEST
            nms_threshold = cfgs.RPN_NMS_IOU_THRESHOLD

        cls_prob = rpn_cls_prob[:, 1]  #(, 2) =>(negtive, postive)

        # step 1  decode boxes
        decode_boxes = encode_and_decode.decode_boxes(
            encoded_boxes=rpn_bbox_pred,
            reference_boxes=anchors,
            scale_factors=cfgs.ANCHOR_SCALE_FACTORS)
        # step 2 clip to image boundaries
        decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
            decode_boxes, img_shape=img_shape)
        # step 3 get top N to NMS
        if pre_nms_topN > 0:
            pre_nms_topN = tf.minimum(pre_nms_topN,
                                      tf.shape(decode_boxes)[0],
                                      name='minimum_boxes')
            cls_prob, top_k_indices = tf.nn.top_k(cls_prob, k=pre_nms_topN)
            decode_boxes = tf.gather(params=decode_boxes,
                                     indices=top_k_indices)

        # step 4 NMS(Non Max Suppression)
        keep_indices = tf.image.non_max_suppression(
            boxes=decode_boxes,
            scores=cls_prob,
            max_output_size=post_nms_topN,
            iou_threshold=nms_threshold)
        final_boxes = tf.gather(decode_boxes, keep_indices)
        final_probs = tf.gather(cls_prob, keep_indices)
        return final_boxes, final_probs
        def batch_slice_rpn_proposals(rpn_encode_boxes, rpn_scores, anchors,
                                      config, rpn_proposals_num):

            with tf.variable_scope('rpn_proposals'):
                rpn_softmax_scores = slim.softmax(rpn_scores)
                rpn_object_score = rpn_softmax_scores[:,
                                                      1]  # second column represent object
                if config.RPN_TOP_K_NMS:
                    top_k_indices = tf.nn.top_k(rpn_object_score,
                                                k=config.RPN_TOP_K_NMS).indices
                    rpn_object_score = tf.gather(rpn_object_score,
                                                 top_k_indices)
                    rpn_encode_boxes = tf.gather(rpn_encode_boxes,
                                                 top_k_indices)
                    anchors = tf.gather(anchors, top_k_indices)

                rpn_decode_boxes = encode_and_decode.decode_boxes(
                    encode_boxes=rpn_encode_boxes,
                    reference_boxes=anchors,
                    dev_factors=config.RPN_BBOX_STD_DEV)

                valid_indices = boxes_utils.non_maximal_suppression(
                    boxes=rpn_decode_boxes,
                    scores=rpn_object_score,
                    max_output_size=rpn_proposals_num,
                    iou_threshold=config.RPN_NMS_IOU_THRESHOLD)
                rpn_decode_boxes = tf.gather(rpn_decode_boxes, valid_indices)
                rpn_object_score = tf.gather(rpn_object_score, valid_indices)
                # clip proposals to img boundaries(replace the out boundary with image boundary)
                rpn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
                    rpn_decode_boxes,
                    [0, 0, config.TARGET_SIDE - 1, config.TARGET_SIDE - 1])
                # Pad if needed
                padding = tf.maximum(
                    rpn_proposals_num - tf.shape(rpn_decode_boxes)[0], 0)
                # care about why we don't use tf.pad in there
                zeros_padding = tf.zeros((padding, 4), dtype=tf.float32)
                rpn_proposals_boxes = tf.concat(
                    [rpn_decode_boxes, zeros_padding], axis=0)
                rpn_object_score = tf.pad(rpn_object_score, [(0, padding)])

                return rpn_proposals_boxes, rpn_object_score
    def fast_rcnn_predict(self):

        with tf.variable_scope('fast_rcnn_predict'):
            fast_rcnn_softmax_scores = slim.softmax(self.fast_rcnn_scores)  # [-1, num_classes+1]
            fast_rcnn_softmax_scores_rotate = slim.softmax(self.fast_rcnn_scores_rotate)  # [-1, num_classes+1]

            fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes, [-1, 4])
            fast_rcnn_encode_boxes_rotate = tf.reshape(self.fast_rcnn_encode_boxes_rotate, [-1, 5])

            reference_boxes = tf.tile(self.fast_rcnn_all_level_proposals, [1, self.num_classes])  # [N, 4*num_classes]
            reference_boxes = tf.reshape(reference_boxes, [-1, 4])   # [N*num_classes, 4]
            fast_rcnn_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=fast_rcnn_encode_boxes,
                                                                    reference_boxes=reference_boxes,
                                                                    scale_factors=self.scale_factors)

            fast_rcnn_decode_boxes_rotate = \
                encode_and_decode.decode_boxes_rotate(encode_boxes=fast_rcnn_encode_boxes_rotate,
                                                      reference_boxes=reference_boxes,
                                                      scale_factors=self.scale_factors)

            fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(fast_rcnn_decode_boxes,
                                                                              img_shape=self.img_shape)

            # mutilclass NMS
            fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes, [-1, self.num_classes*4])
            fast_rcnn_decode_boxes_rotate = tf.reshape(fast_rcnn_decode_boxes_rotate, [-1, self.num_classes * 5])

            fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \
                self.fast_rcnn_proposals(fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_scores)

            fast_rcnn_decode_boxes_rotate, fast_rcnn_score_rotate, num_of_objects_rotate, detection_category_rotate = \
                self.fast_rcnn_proposals_rotate(fast_rcnn_decode_boxes_rotate, scores=fast_rcnn_softmax_scores_rotate)

            fast_rcnn_decode_boxes_rotate_reorder = tf.py_func(read_reorder,
                                                               inp=[fast_rcnn_decode_boxes_rotate],
                                                               Tout=tf.float32)

            fast_rcnn_decode_boxes_rotate_original = fast_rcnn_decode_boxes_rotate
            fast_rcnn_decode_boxes_rotate = fast_rcnn_decode_boxes_rotate_reorder
            return fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category,\
                   fast_rcnn_decode_boxes_rotate_original, fast_rcnn_decode_boxes_rotate, fast_rcnn_decode_boxes_rotate_reorder, fast_rcnn_score_rotate, num_of_objects_rotate, detection_category_rotate
    def fast_rcnn_predict(self):

        with tf.variable_scope('fast_rcnn_predict'):
            fast_rcnn_softmax_scores = slim.softmax(self.fast_rcnn_scores)  # [-1, num_classes+1]

            fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes, [-1, 5])

            # reference_boxes = tf.tile(self.fast_rcnn_all_level_horizontal_proposals, [1, self.num_classes])
            reference_boxes = tf.tile(self.fast_rcnn_all_level_rotate_proposals, [1, self.num_classes]) # [N, 5*num_classes]

            reference_boxes = tf.reshape(reference_boxes, [-1, 5])   # [N*num_classes, 5]
            fast_rcnn_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=fast_rcnn_encode_boxes,
                                                                    reference_boxes=reference_boxes,
                                                                    scale_factors=self.scale_factors)

            # mutilclass NMS
            fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes, [-1, self.num_classes*5])
            fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \
                self.fast_rcnn_proposals(fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_scores)

            return fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category
    def postprocess_cascade(self, rois, bbox_ppred, scope):
        '''

        :param rois:[-1, 4]
        :param bbox_ppred: bbox_ppred: [-1, 4]
        :param scores: [-1, 1]
        :return:

        '''

        with tf.name_scope('postprocess_cascade_{}'.format(scope)):
            rois = tf.stop_gradient(rois)
            bbox_ppred = tf.stop_gradient(bbox_ppred)

            # decode boxes
            decoded_boxes = encode_and_decode.decode_boxes(
                encoded_boxes=bbox_ppred,
                reference_boxes=rois,
                scale_factors=cfgs.ROI_SCALE_FACTORS)

        return decoded_boxes
    def postprocess_fastrcnn(self, rois, bbox_ppred, scores, img_shape):
        '''

        :param rois:[-1, 4]
        :param bbox_ppred: [-1, (cfgs.Class_num+1) * 4]
        :param scores: [-1, cfgs.Class_num + 1]
        :return:
        '''

        with tf.name_scope('postprocess_fastrcnn'):
            rois = tf.stop_gradient(rois)
            scores = tf.stop_gradient(scores)
            bbox_ppred = tf.reshape(bbox_ppred, [-1, cfgs.CLASS_NUM + 1, 4])
            bbox_ppred = tf.stop_gradient(bbox_ppred)

            bbox_pred_list = tf.unstack(bbox_ppred, axis=1)
            score_list = tf.unstack(scores, axis=1)

            allclasses_boxes = []
            allclasses_scores = []
            categories = []
            for i in range(1, cfgs.CLASS_NUM + 1):

                # 1. decode boxes in each class
                tmp_encoded_box = bbox_pred_list[i]
                tmp_score = score_list[i]
                tmp_decoded_boxes = encode_and_decode.decode_boxes(
                    encoded_boxes=tmp_encoded_box,
                    reference_boxes=rois,
                    scale_factors=cfgs.ROI_SCALE_FACTORS)
                # tmp_decoded_boxes = encode_and_decode.decode_boxes(boxes=rois,
                #                                                    deltas=tmp_encoded_box,
                #                                                    scale_factor=cfgs.ROI_SCALE_FACTORS)

                # 2. clip to img boundaries
                tmp_decoded_boxes = boxes_utils.clip_boxes_to_img_boundaries(
                    decode_boxes=tmp_decoded_boxes, img_shape=img_shape)

                # 3. NMS
                keep = tf.image.non_max_suppression(
                    boxes=tmp_decoded_boxes,
                    scores=tmp_score,
                    max_output_size=cfgs.FAST_RCNN_NMS_MAX_BOXES_PER_CLASS,
                    iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD)

                perclass_boxes = tf.gather(tmp_decoded_boxes, keep)
                perclass_scores = tf.gather(tmp_score, keep)

                allclasses_boxes.append(perclass_boxes)
                allclasses_scores.append(perclass_scores)
                categories.append(tf.ones_like(perclass_scores) * i)

            final_boxes = tf.concat(allclasses_boxes, axis=0)
            final_scores = tf.concat(allclasses_scores, axis=0)
            final_category = tf.concat(categories, axis=0)

            if self.is_training:
                '''
                in training. We should show the detecitons in the tensorboard. So we add this.
                '''
                kept_indices = tf.reshape(
                    tf.where(
                        tf.greater_equal(final_scores,
                                         cfgs.SHOW_SCORE_THRSHOLD)), [-1])

                final_boxes = tf.gather(final_boxes, kept_indices)
                final_scores = tf.gather(final_scores, kept_indices)
                final_category = tf.gather(final_category, kept_indices)

        return final_boxes, final_scores, final_category
    def build_whole_detection_network(self, input_img_batch, gtboxes_batch):

        if self.is_training:
            # ensure shape is [M, 5]
            gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5])
            gtboxes_batch = tf.cast(gtboxes_batch, tf.float32)

        img_shape = tf.shape(input_img_batch)

        # 1. build base network
        P_list = self.build_base_network(
            input_img_batch)  # [P2, P3, P4, P5, P6]

        # 2. build rpn
        with tf.variable_scope('build_rpn',
                               regularizer=slim.l2_regularizer(
                                   cfgs.WEIGHT_DECAY)):

            fpn_cls_score = []
            fpn_box_pred = []
            for level_name, p in zip(cfgs.LEVLES, P_list):
                if cfgs.SHARE_HEADS:
                    reuse_flag = None if level_name == cfgs.LEVLES[0] else True
                    scope_list = [
                        'rpn_conv/3x3', 'rpn_cls_score', 'rpn_bbox_pred'
                    ]
                else:
                    reuse_flag = None
                    scope_list = [
                        'rpn_conv/3x3_%s' % level_name,
                        'rpn_cls_score_%s' % level_name,
                        'rpn_bbox_pred_%s' % level_name
                    ]
                rpn_conv3x3 = slim.conv2d(p,
                                          512, [3, 3],
                                          trainable=self.is_training,
                                          weights_initializer=cfgs.INITIALIZER,
                                          padding="SAME",
                                          activation_fn=tf.nn.relu,
                                          scope=scope_list[0],
                                          reuse=reuse_flag)
                rpn_cls_score = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location * 2, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.INITIALIZER,
                    activation_fn=None,
                    padding="VALID",
                    scope=scope_list[1],
                    reuse=reuse_flag)
                rpn_box_pred = slim.conv2d(
                    rpn_conv3x3,
                    self.num_anchors_per_location * 4, [1, 1],
                    stride=1,
                    trainable=self.is_training,
                    weights_initializer=cfgs.BBOX_INITIALIZER,
                    activation_fn=None,
                    padding="VALID",
                    scope=scope_list[2],
                    reuse=reuse_flag)
                rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4])
                rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])

                fpn_cls_score.append(rpn_cls_score)
                fpn_box_pred.append(rpn_box_pred)

            fpn_cls_score = tf.concat(fpn_cls_score,
                                      axis=0,
                                      name='fpn_cls_score')
            fpn_box_pred = tf.concat(fpn_box_pred, axis=0, name='fpn_box_pred')
            fpn_cls_prob = slim.softmax(fpn_cls_score, scope='fpn_cls_prob')

        # 3. generate_anchors
        all_anchors = []
        for i in range(len(cfgs.LEVLES)):
            level_name, p = cfgs.LEVLES[i], P_list[i]

            p_h, p_w = tf.shape(p)[1], tf.shape(p)[2]
            featuremap_height = tf.cast(p_h, tf.float32)
            featuremap_width = tf.cast(p_w, tf.float32)
            anchors = anchor_utils.make_anchors(
                base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[i],
                anchor_scales=cfgs.ANCHOR_SCALES,
                anchor_ratios=cfgs.ANCHOR_RATIOS,
                featuremap_height=featuremap_height,
                featuremap_width=featuremap_width,
                stride=cfgs.ANCHOR_STRIDE_LIST[i],
                name="make_anchors_for%s" % level_name)
            all_anchors.append(anchors)
        all_anchors = tf.concat(all_anchors, axis=0, name='all_anchors_of_FPN')

        # 4. postprocess rpn proposals. such as: decode, clip, NMS
        with tf.variable_scope('postprocess_FPN'):
            rois, roi_scores = postprocess_rpn_proposals(
                rpn_bbox_pred=fpn_box_pred,
                rpn_cls_prob=fpn_cls_prob,
                img_shape=img_shape,
                anchors=all_anchors,
                is_training=self.is_training)
            # rois shape [-1, 4]
            # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++

            if self.is_training:
                score_gre_05 = tf.reshape(
                    tf.where(tf.greater_equal(roi_scores, 0.5)), [-1])
                score_gre_05_rois = tf.gather(rois, score_gre_05)
                score_gre_05_score = tf.gather(roi_scores, score_gre_05)
                score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores(
                    img_batch=input_img_batch,
                    boxes=score_gre_05_rois,
                    scores=score_gre_05_score)
                tf.summary.image('score_greater_05_rois', score_gre_05_in_img)
            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        if self.is_training:
            with tf.variable_scope('sample_anchors_minibatch'):
                fpn_labels, fpn_bbox_targets = \
                    tf.py_func(
                        anchor_target_layer,
                        [gtboxes_batch, img_shape, all_anchors],
                        [tf.float32, tf.float32])
                fpn_bbox_targets = tf.reshape(fpn_bbox_targets, [-1, 4])
                fpn_labels = tf.to_int32(fpn_labels, name="to_int32")
                fpn_labels = tf.reshape(fpn_labels, [-1])
                self.add_anchor_img_smry(input_img_batch, all_anchors,
                                         fpn_labels)

            # --------------------------------------add smry-----------------------------------------------------------

            fpn_cls_category = tf.argmax(fpn_cls_prob, axis=1)
            kept_rpppn = tf.reshape(tf.where(tf.not_equal(fpn_labels, -1)),
                                    [-1])
            fpn_cls_category = tf.gather(fpn_cls_category, kept_rpppn)
            acc = tf.reduce_mean(
                tf.to_float(
                    tf.equal(fpn_cls_category,
                             tf.to_int64(tf.gather(fpn_labels, kept_rpppn)))))
            tf.summary.scalar('ACC/fpn_accuracy', acc)

            with tf.control_dependencies([fpn_labels]):
                with tf.variable_scope('sample_RCNN_minibatch'):
                    rois, labels, bbox_targets = \
                    tf.py_func(proposal_target_layer,
                               [rois, gtboxes_batch],
                               [tf.float32, tf.float32, tf.float32])
                    rois = tf.reshape(rois, [-1, 4])
                    labels = tf.to_int32(labels)
                    labels = tf.reshape(labels, [-1])
                    bbox_targets = tf.reshape(bbox_targets,
                                              [-1, 4 * (cfgs.CLASS_NUM + 1)])
                    self.add_roi_batch_img_smry(input_img_batch, rois, labels)
        if self.is_training:
            rois_list, labels, bbox_targets = self.assign_levels(
                all_rois=rois, labels=labels, bbox_targets=bbox_targets)
        else:
            rois_list = self.assign_levels(
                all_rois=rois
            )  # rois_list: [P2_rois, P3_rois, P4_rois, P5_rois]

        # -------------------------------------------------------------------------------------------------------------#
        #                                            Fast-RCNN                                                         #
        # -------------------------------------------------------------------------------------------------------------#

        # 5. build Fast-RCNN
        # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10)
        bbox_pred, cls_score = self.build_fastrcnn(P_list=P_list,
                                                   rois_list=rois_list,
                                                   img_shape=img_shape)
        # bbox_pred shape: [-1, 4*(cls_num+1)].
        # cls_score shape: [-1, cls_num+1]

        cls_prob = slim.softmax(cls_score, 'cls_prob')

        # ----------------------------------------------add smry-------------------------------------------------------
        if self.is_training:
            cls_category = tf.argmax(cls_prob, axis=1)
            fast_acc = tf.reduce_mean(
                tf.to_float(tf.equal(cls_category, tf.to_int64(labels))))
            tf.summary.scalar('ACC/fast_acc', fast_acc)

        rois = tf.concat(rois_list, axis=0, name='concat_rois')
        #  6. postprocess_fastrcnn
        if not self.is_training:
            return self.postprocess_fastrcnn(rois=rois,
                                             bbox_ppred=bbox_pred,
                                             scores=cls_prob,
                                             img_shape=img_shape)
        else:
            '''
            when trian. We need build Loss
            '''

            #GIOU loss需要先解码
            fpn_pred = encode_and_decode.decode_boxes(
                encoded_boxes=fpn_box_pred,
                reference_boxes=all_anchors,
                scale_factors=cfgs.ROI_SCALE_FACTORS)

            loss_dict = self.build_loss(rpn_box_pred=fpn_pred,
                                        rpn_bbox_targets=all_anchors,
                                        rpn_cls_score=fpn_cls_score,
                                        rpn_labels=fpn_labels,
                                        bbox_pred=bbox_pred,
                                        bbox_targets=bbox_targets,
                                        cls_score=cls_score,
                                        labels=labels)

            final_bbox, final_scores, final_category = self.postprocess_fastrcnn(
                rois=rois,
                bbox_ppred=bbox_pred,
                scores=cls_prob,
                img_shape=img_shape)
            return final_bbox, final_scores, final_category, loss_dict
Exemple #21
0
def postprocess_fastrcnn_proposals(bbox_ppred, scores, img_shape, rois,
                                   is_training):
    '''

    :param rpn_bbox_pred: [-1, 4]
    :param rpn_cls_prob: [-1, 2]
    :param img_shape:
    :param anchors:[-1, 4]
    :param is_training:
    :return:
    '''

    if is_training:
        pre_nms_topN = 2000  #cfgs.RPN_TOP_K_NMS_TRAIN
        post_nms_topN = 500  #cfgs.RPN_MAXIMUM_PROPOSAL_TARIN
        nms_thresh = 0.8  #cfgs.RPN_NMS_IOU_THRESHOLD
    else:
        pre_nms_topN = 1500  #cfgs.RPN_TOP_K_NMS_TEST
        post_nms_topN = 300  #cfgs.RPN_MAXIMUM_PROPOSAL_TEST
        nms_thresh = 0.7  #cfgs.RPN_NMS_IOU_THRESHOLD

    #rois = tf.stop_gradient(rois)
    #scores = tf.stop_gradient(scores)
    bbox_ppred = tf.reshape(bbox_ppred, [-1, cfgs.CLASS_NUM + 1, 4])
    #bbox_ppred = tf.stop_gradient(bbox_ppred)

    bbox_pred_list = tf.unstack(bbox_ppred, axis=1)
    score_list = tf.unstack(scores, axis=1)

    allclasses_boxes = []
    allclasses_scores = []
    categories = []
    for i in range(1, cfgs.CLASS_NUM + 1):
        # 1. decode boxes in each class
        tmp_encoded_box = bbox_pred_list[i]
        tmp_score = score_list[i]
        tmp_decoded_boxes = encode_and_decode.decode_boxes(
            encode_boxes=tmp_encoded_box,
            reference_boxes=rois,
            scale_factors=cfgs.ROI_SCALE_FACTORS)
        # tmp_decoded_boxes = encode_and_decode.decode_boxes(boxes=rois,
        #                                                    deltas=tmp_encoded_box,
        #                                                    scale_factor=cfgs.ROI_SCALE_FACTORS)

        # 2. clip to img boundaries
        tmp_decoded_boxes = boxes_utils.clip_boxes_to_img_boundaries(
            decode_boxes=tmp_decoded_boxes, img_shape=img_shape)

        # 3. NMS

        pre_nms_topN = tf.minimum(pre_nms_topN,
                                  tf.shape(tmp_decoded_boxes)[0],
                                  name='avoid_unenough_boxes')
        cls_prob, top_k_indices = tf.nn.top_k(tmp_score, k=pre_nms_topN)
        decode_boxes = tf.gather(tmp_decoded_boxes, top_k_indices)  # 取索引

        # 4. NMS
        keep = tf.image.non_max_suppression(boxes=decode_boxes,
                                            scores=cls_prob,
                                            max_output_size=post_nms_topN,
                                            iou_threshold=nms_thresh)

        perclass_boxes = tf.gather(tmp_decoded_boxes, keep)
        perclass_scores = tf.gather(tmp_score, keep)

        allclasses_boxes.append(perclass_boxes)
        allclasses_scores.append(perclass_scores)
        categories.append(tf.ones_like(perclass_scores) * i)

    final_boxes = tf.concat(allclasses_boxes, axis=0)
    final_scores = tf.concat(allclasses_scores, axis=0)
    final_category = tf.concat(categories, axis=0)

    return final_boxes, final_scores
Exemple #22
0
    def rpn_proposals(self):
        with tf.variable_scope('rpn_proposals'):
            rpn_decode_boxes = encode_and_decode.decode_boxes(
                encode_boxes=self.rpn_encode_boxes,
                reference_boxes=self.anchors,
                scale_factors=self.scale_factors)

            # if not self.is_training:  # when test, clip proposals to img boundaries
            #     img_shape = tf.shape(self.img_batch)
            #     rpn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(rpn_decode_boxes, img_shape)

            rpn_softmax_scores = slim.softmax(self.rpn_scores)
            rpn_object_score = rpn_softmax_scores[:,
                                                  1]  # second column represent object

            if self.top_k_nms:
                rpn_object_score, top_k_indices = tf.nn.top_k(rpn_object_score,
                                                              k=self.top_k_nms)
                rpn_decode_boxes = tf.gather(rpn_decode_boxes, top_k_indices)

            if not cfgs.USE_HORIZONTAL_NMS:
                valid_indices = nms_rotate.nms_rotate(
                    decode_boxes=rpn_decode_boxes,
                    scores=rpn_object_score,
                    iou_threshold=self.rpn_nms_iou_threshold,
                    max_output_size=self.max_proposals_num,
                    use_angle_condition=self.use_angles_condition,
                    angle_threshold=self.anchor_angle_threshold,
                    use_gpu=cfgs.NMS_USE_GPU)

            ############################################################################################################
            else:
                rpn_decode_boxes_convert = tf.py_func(
                    coordinate_convert.forward_convert,
                    inp=[rpn_decode_boxes],
                    Tout=tf.float32)

                rpn_decode_boxes_convert = tf.reshape(
                    rpn_decode_boxes_convert,
                    [tf.shape(rpn_decode_boxes)[0], 8])
                x1, y1, x2, y2, x3, y3, x4, y4 = tf.unstack(
                    rpn_decode_boxes_convert, axis=1)
                x = tf.transpose(tf.stack([x1, x2, x3, x4]))
                y = tf.transpose(tf.stack([y1, y2, y3, y4]))
                min_x = tf.reduce_min(x, axis=1)
                max_x = tf.reduce_max(x, axis=1)
                min_y = tf.reduce_min(y, axis=1)
                max_y = tf.reduce_max(y, axis=1)
                rpn_decode_boxes_convert = tf.transpose(
                    tf.stack([min_x, min_y, max_x, max_y]))

                valid_indices = tf.image.non_max_suppression(
                    boxes=rpn_decode_boxes_convert,
                    scores=rpn_object_score,
                    max_output_size=self.max_proposals_num,
                    iou_threshold=self.rpn_nms_iou_threshold,
                    name='rpn_horizontal_nms')

            ############################################################################################################

            valid_boxes = tf.gather(rpn_decode_boxes, valid_indices)
            valid_scores = tf.gather(rpn_object_score, valid_indices)
            rpn_proposals_boxes, rpn_proposals_scores = tf.cond(
                tf.less(tf.shape(valid_boxes)[0], self.max_proposals_num),
                lambda: boxes_utils.padd_boxes_with_zeros(
                    valid_boxes, valid_scores, self.max_proposals_num), lambda:
                (valid_boxes, valid_scores))

            return rpn_proposals_boxes, rpn_proposals_scores
Exemple #23
0
    def rpn_loss(self):
        '''
        :param: self.gtboxes_and_label: [n, 5]->[ymin, xmin, ymax, xmax, cls]
        :param: self.anchors: [m, 4]-> [ymin, xmin, ymax, xmax]
        :param:self.rpn_encode_boxes: [m, 4]->[ycenter, xcenter, h, w]
        :return:
        '''
        with tf.variable_scope('rpn_loss'):
            minibatch_indices,\
            minibatch_anchor_matched_gtboxes,\
            object_mask,\
            minibatch_label_onehot = self.make_minibatch()

            minibatch_anchors = tf.gather(self.anchors, minibatch_indices)
            minibatch_rpn_encode_boxes = tf.gather(self.rpn_encode_boxes,
                                                   minibatch_indices)
            minibatch_rpn_scores = tf.gather(self.rpn_scores,
                                             minibatch_indices)

            minibatch_encode_boxes_label = encode_and_decode.encode_boxes(
                minibatch_anchors, minibatch_anchor_matched_gtboxes,
                self.scale_factors)
            # summary
            positive_anchors_in_img = draw_box_with_tensor(
                img_batch=self.img_batch,
                boxes=minibatch_anchors * tf.expand_dims(object_mask, 1),
                text=tf.shape(tf.where(tf.equal(object_mask, 1)))[0])
            negative_mask = tf.cast(
                tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32)
            negative_anchors_in_img = draw_box_with_tensor(
                img_batch=self.img_batch,
                boxes=minibatch_anchors * tf.expand_dims(negative_mask, 1),
                text=tf.shape(tf.where(tf.equal(negative_mask, 1)))[0])

            minibatch_decode_anchors = encode_and_decode.decode_boxes(
                encode_boxes=minibatch_rpn_encode_boxes,
                reference_boxes=minibatch_anchors,
                scale_factors=self.scale_factors)
            # clip boxes into image shape
            minibatch_decode_anchors = boxes_utils.clip_boxes_to_img_boundaries(
                minibatch_decode_anchors, tf.shape(self.img_batch))
            positive_decode_anchor_in_img = \
                draw_box_with_tensor(img_batch=self.img_batch,
                                     boxes=minibatch_decode_anchors*tf.expand_dims(object_mask, 1),
                                     text=tf.shape(tf.where(tf.equal(object_mask, 1)))[0]
                                     )

            tf.summary.image('images/rpn/losses/anchors_positive_minibatch',
                             positive_anchors_in_img)
            tf.summary.image('images/rpn/losses/anchors_negative_minibatch',
                             negative_anchors_in_img)
            tf.summary.image('images/rpn/losses/decode_anchor_positive',
                             positive_decode_anchor_in_img)

            # losses
            with tf.variable_scope('rpn_localization_losses'):
                classify_loss = slim.losses.softmax_cross_entropy(
                    logits=minibatch_rpn_scores,
                    onehot_labels=minibatch_label_onehot)

                location_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_rpn_encode_boxes,
                    gtboxes=minibatch_encode_boxes_label,
                    object_weights=object_mask)
                slim.losses.add_loss(
                    location_loss)  # add location loss to losses collections

            return location_loss, classify_loss
def postprocess_rpn_proposals(rpn_bbox_pred, rpn_cls_prob, img_shape, anchors,
                              is_training):
    '''

    :param rpn_bbox_pred: [-1, 4]
    :param rpn_cls_prob: [-1, 2]
    :param img_shape:
    :param anchors:[-1, 4]
    :param is_training:
    :return:
    '''

    if is_training:
        pre_nms_topN = cfgs.RPN_TOP_K_NMS_TRAIN  # 默认12000
        post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TARIN  # 默认2000
        nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD  # 默认0.7
    else:
        pre_nms_topN = cfgs.RPN_TOP_K_NMS_TEST  # 默认6000
        post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TEST  # 默认300
        nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD  # 默认0.7

    cls_prob = rpn_cls_prob[:, 1]

    # 1. decode boxes
    # clw note:这个函数接受RPN网络的预测框位置,以及预测的类别(两类),图像的尺寸大小,以及生成的锚点作为输入。
    #           经过解码后,得到的是真实的预测框的位置,因为有可能预测的框比设定的选取前N个框的个数还小,
    #           因此在预测框的数目以及设定的数目之间取最小值,之后再采用 tf.image.non_max_suppression抑制,
    #           选取最终的非极大值抑制后的Top K个框,原论文中未采用NMS之前为12000个(就是上面的cfgs.RPN_TOP_K_NMS_TRAIN),
    #           NMS后为2000个(就是上面的cfgs.RPN_MAXIMUM_PROPOSAL_TARIN)。
    #           这里还没有具体的分类那个框是那个目标,只是选出了前K个可能存在目标的框。
    decode_boxes = encode_and_decode.decode_boxes(
        encoded_boxes=rpn_bbox_pred,
        reference_boxes=anchors,
        scale_factors=cfgs.ANCHOR_SCALE_FACTORS)

    # decode_boxes = encode_and_decode.decode_boxes(boxes=anchors,
    #                                               deltas=rpn_bbox_pred,
    #                                               scale_factor=None)

    # 2. clip to img boundaries
    decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
        decode_boxes=decode_boxes, img_shape=img_shape)

    # 3. get top N to NMS
    if pre_nms_topN > 0:  # clw note:初步得到一系列框(~60*40*9=20k)之后,如果是训练集,会去掉与边界相交的anchors,因此
        #           数量会大大减小,即NMS之前的TopK个框(这里默认值是12k,文中给的6k),之后再进行NMS。
        pre_nms_topN = tf.minimum(pre_nms_topN,
                                  tf.shape(decode_boxes)[0],
                                  name='avoid_unenough_boxes')
        cls_prob, top_k_indices = tf.nn.top_k(cls_prob, k=pre_nms_topN)
        decode_boxes = tf.gather(decode_boxes, top_k_indices)

    # 4. NMS
    keep = tf.image.non_max_suppression(boxes=decode_boxes,
                                        scores=cls_prob,
                                        max_output_size=post_nms_topN,
                                        iou_threshold=nms_thresh)

    final_boxes = tf.gather(decode_boxes, keep)
    final_probs = tf.gather(cls_prob, keep)

    return final_boxes, final_probs
    def fast_rcnn_loss(self):
        with tf.variable_scope('fast_rcnn_loss'):
            minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, minibatch_object_mask, \
            minibatch_label_one_hot = self.fast_rcnn_minibatch(self.fast_rcnn_all_level_proposals)

            minibatch_reference_boxes = tf.gather(
                self.fast_rcnn_all_level_proposals, minibatch_indices)

            minibatch_encode_boxes = tf.gather(
                self.fast_rcnn_encode_boxes,
                minibatch_indices)  # [minibatch_size, num_classes*4]
            minibatch_scores = tf.gather(self.fast_rcnn_scores,
                                         minibatch_indices)

            positive_proposals_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_reference_boxes *
                tf.expand_dims(minibatch_object_mask, 1),
                text=tf.shape(tf.where(tf.equal(minibatch_object_mask,
                                                1.0)))[0])

            negative_mask = tf.cast(
                tf.logical_not(tf.cast(minibatch_object_mask, tf.bool)),
                tf.float32)
            negative_proposals_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_reference_boxes * tf.expand_dims(negative_mask, 1),
                text=tf.shape(tf.where(tf.equal(minibatch_object_mask,
                                                0.0)))[0])

            tf.summary.image('/positive_proposals', positive_proposals_in_img)
            tf.summary.image('/negative_proposals', negative_proposals_in_img)

            if cfgs.CLASS_NUM == 1:
                minibatch_decode_boxes = encode_and_decode.decode_boxes(
                    encode_boxes=minibatch_encode_boxes,
                    reference_boxes=minibatch_reference_boxes,
                    scale_factors=self.scale_factors)

                minibatch_softmax_scores = tf.gather(
                    slim.softmax(self.fast_rcnn_scores), minibatch_indices)
                top_k_scores, top_k_indices = tf.nn.top_k(
                    minibatch_softmax_scores[:, 1], k=5)

                top_detections_in_img = draw_boxes_with_scores(
                    self.img_batch,
                    boxes=tf.gather(minibatch_decode_boxes, top_k_indices),
                    scores=top_k_scores)
                tf.summary.image('/top_5', top_detections_in_img)

            # encode gtboxes
            minibatch_encode_gtboxes = \
                encode_and_decode.encode_boxes(
                    unencode_boxes=minibatch_reference_boxes_mattached_gtboxes,
                    reference_boxes=minibatch_reference_boxes,
                    scale_factors=self.scale_factors
                )

            # [minibatch_size, num_classes*4]
            minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes,
                                               [1, self.num_classes])

            class_weights_list = []
            category_list = tf.unstack(minibatch_label_one_hot, axis=1)
            for i in range(1, self.num_classes + 1):
                tmp_class_weights = tf.ones(
                    shape=[tf.shape(minibatch_encode_boxes)[0], 4],
                    dtype=tf.float32)
                tmp_class_weights = tmp_class_weights * tf.expand_dims(
                    category_list[i], axis=1)
                class_weights_list.append(tmp_class_weights)
            class_weights = tf.concat(
                class_weights_list, axis=1)  # [minibatch_size, num_classes*4]

            # loss
            with tf.variable_scope('fast_rcnn_classification_loss'):

                logits = tf.cast(minibatch_scores, tf.float32)
                onehot_labels = tf.cast(minibatch_label_one_hot, tf.float32)
                one = tf.ones(shape=tf.shape(onehot_labels), dtype=tf.float32)
                predictions_pt = tf.where(tf.equal(onehot_labels, 1), logits,
                                          1 - logits)

                # add small value to avoid
                alpha_t = tf.scalar_mul(0.25, one)
                alpha_t = tf.where(tf.equal(onehot_labels, 1), alpha_t,
                                   1 - alpha_t)
                gamma = tf.scalar_mul(2, one)
                new_gamma = tf.where(tf.less(predictions_pt, 0.5), -gamma,
                                     gamma)
                ##PFL
                fast_rcnn_classification_loss = tf.multiply(
                    tf.multiply(
                        alpha_t,
                        slim.losses.softmax_cross_entropy(
                            logits=logits, onehot_labels=onehot_labels)),
                    tf.pow(1 - predictions_pt, new_gamma))
                ##FL
                # fast_rcnn_classification_loss = tf.multiply(tf.multiply(alpha_t, slim.losses.softmax_cross_entropy(logits=logits,
                #                                                    onehot_labels=onehot_labels)), tf.pow(1-predictions_pt, 2))

                #FL和PFL,不注释这句;CE注释这句
                fast_rcnn_classification_loss = tf.reduce_sum(
                    fast_rcnn_classification_loss[:, 0] +
                    fast_rcnn_classification_loss[:, 1])

                ##CE
                # fast_rcnn_classification_loss = slim.losses.softmax_cross_entropy(logits=tf.clip_by_value(minibatch_scores,1e-8,tf.reduce_max(minibatch_scores)),
                #                                                                   onehot_labels=minibatch_label_one_hot)
                # fast_rcnn_classification_loss = slim.losses.softmax_cross_entropy(
                #     logits=minibatch_scores,
                #     onehot_labels=minibatch_label_one_hot)

            with tf.variable_scope('fast_rcnn_location_loss'):
                fast_rcnn_location_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_encode_boxes,
                    gtboxes=minibatch_encode_gtboxes,
                    object_weights=minibatch_object_mask,
                    classes_weights=class_weights)
                slim.losses.add_loss(fast_rcnn_location_loss)

            return fast_rcnn_location_loss, fast_rcnn_classification_loss
    def rpn_losses(self):
        with tf.variable_scope('rpn_losses'):
            minibatch_indices, minibatch_anchor_matched_gtboxes, object_mask, minibatch_labels_one_hot = \
                self.make_minibatch(self.anchors)

            minibatch_anchors = tf.gather(self.anchors, minibatch_indices)
            minibatch_encode_boxes = tf.gather(self.rpn_encode_boxes,
                                               minibatch_indices)
            minibatch_boxes_scores = tf.gather(self.rpn_scores,
                                               minibatch_indices)

            # encode gtboxes
            minibatch_encode_gtboxes = encode_and_decode.encode_boxes(
                unencode_boxes=minibatch_anchor_matched_gtboxes,
                reference_boxes=minibatch_anchors,
                scale_factors=self.scale_factors)

            positive_anchors_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_anchors * tf.expand_dims(object_mask, 1),
                text=tf.shape(tf.where(tf.equal(object_mask, 1.0)))[0])

            negative_mask = tf.cast(
                tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32)
            negative_anchors_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_anchors * tf.expand_dims(negative_mask, 1),
                text=tf.shape(tf.where(tf.equal(object_mask, 0.0)))[0])

            minibatch_decode_boxes = encode_and_decode.decode_boxes(
                encode_boxes=minibatch_encode_boxes,
                reference_boxes=minibatch_anchors,
                scale_factors=self.scale_factors)

            tf.summary.image('/positive_anchors', positive_anchors_in_img)
            tf.summary.image('/negative_anchors', negative_anchors_in_img)
            top_k_scores, top_k_indices = tf.nn.top_k(
                minibatch_boxes_scores[:, 1], k=1)

            top_detections_in_img = draw_box_with_color(
                self.img_batch,
                tf.gather(minibatch_decode_boxes, top_k_indices),
                text=tf.shape(top_k_scores)[0])
            tf.summary.image('/top_1', top_detections_in_img)

            # losses
            with tf.variable_scope('rpn_location_loss'):
                location_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_encode_boxes,
                    gtboxes=minibatch_encode_gtboxes,
                    object_weights=object_mask)
                slim.losses.add_loss(
                    location_loss)  # add smooth l1 loss to losses collection

            with tf.variable_scope('rpn_classification_loss'):

                # logits = tf.cast(minibatch_boxes_scores, tf.float32)
                # onehot_labels = tf.cast(minibatch_labels_one_hot, tf.float32)
                # one = tf.ones(shape=tf.shape(onehot_labels), dtype=tf.float32)
                # predictions_pt = tf.where(tf.equal(onehot_labels, 1), logits, 1-logits)
                #
                # # add small value to avoid
                # alpha_t = tf.scalar_mul(0.25, one)
                # alpha_t = tf.where(tf.equal(onehot_labels, 1), alpha_t, 1 - alpha_t)
                # gamma = tf.scalar_mul(2, one)
                # new_gamma = tf.where(tf.less(predictions_pt, 0.5), -gamma, gamma)
                # classification_loss = tf.multiply(tf.multiply(alpha_t, slim.losses.softmax_cross_entropy(logits=logits,
                #                                                   onehot_labels=onehot_labels)), tf.pow((1-predictions_pt), 2))
                # # classification_loss = tf.multiply(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,
                # #                                                    labels=onehot_labels), tf.pow((1-predictions_pt), 2))
                # classification_loss = tf.reduce_sum(classification_loss[:,0]+classification_loss[:,1])
                # # classification_loss = slim.losses.softmax_cross_entropy(logits=tf.clip_by_value(minibatch_boxes_scores,1e-8,tf.reduce_max(minibatch_boxes_scores)),
                # #                                                         onehot_labels=minibatch_labels_one_hot)
                classification_loss = slim.losses.softmax_cross_entropy(
                    logits=minibatch_boxes_scores,
                    onehot_labels=minibatch_labels_one_hot)
            return location_loss, classification_loss
    def fast_rcnn_loss(self):
        with tf.variable_scope('fast_rcnn_loss'):
            minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, minibatch_object_mask, \
                minibatch_label_one_hot = self.fast_rcnn_minibatch(self.fast_rcnn_all_level_proposals)

            minibatch_reference_boxes = tf.gather(
                self.fast_rcnn_all_level_proposals, minibatch_indices)

            minibatch_encode_boxes = tf.gather(
                self.fast_rcnn_encode_boxes,
                minibatch_indices)  # [minibatch_size, num_classes*4]
            minibatch_scores = tf.gather(self.fast_rcnn_scores,
                                         minibatch_indices)

            positive_proposals_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_reference_boxes *
                tf.expand_dims(minibatch_object_mask, 1),
                text=tf.shape(tf.where(tf.equal(minibatch_object_mask,
                                                1.0)))[0])

            negative_mask = tf.cast(
                tf.logical_not(tf.cast(minibatch_object_mask, tf.bool)),
                tf.float32)
            negative_proposals_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_reference_boxes * tf.expand_dims(negative_mask, 1),
                text=tf.shape(tf.where(tf.equal(minibatch_object_mask,
                                                0.0)))[0])

            tf.summary.image('/positive_proposals', positive_proposals_in_img)
            tf.summary.image('/negative_proposals', negative_proposals_in_img)

            if cfgs.CLASS_NUM == 1:
                minibatch_decode_boxes = encode_and_decode.decode_boxes(
                    encode_boxes=minibatch_encode_boxes,
                    reference_boxes=minibatch_reference_boxes,
                    scale_factors=self.scale_factors)

                minibatch_softmax_scores = tf.gather(
                    slim.softmax(self.fast_rcnn_scores), minibatch_indices)
                top_k_scores, top_k_indices = tf.nn.top_k(
                    minibatch_softmax_scores[:, 1], k=5)

                top_detections_in_img = draw_boxes_with_scores(
                    self.img_batch,
                    boxes=tf.gather(minibatch_decode_boxes, top_k_indices),
                    scores=top_k_scores)
                tf.summary.image('/top_5', top_detections_in_img)

            # encode gtboxes
            minibatch_encode_gtboxes = \
                encode_and_decode.encode_boxes(
                    unencode_boxes=minibatch_reference_boxes_mattached_gtboxes,
                    reference_boxes=minibatch_reference_boxes,
                    scale_factors=self.scale_factors
                )

            # [minibatch_size, num_classes*4]
            minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes,
                                               [1, self.num_classes])

            class_weights_list = []
            category_list = tf.unstack(minibatch_label_one_hot, axis=1)
            for i in range(1, self.num_classes + 1):
                tmp_class_weights = tf.ones(
                    shape=[tf.shape(minibatch_encode_boxes)[0], 4],
                    dtype=tf.float32)
                tmp_class_weights = tmp_class_weights * tf.expand_dims(
                    category_list[i], axis=1)
                class_weights_list.append(tmp_class_weights)
            class_weights = tf.concat(
                class_weights_list, axis=1)  # [minibatch_size, num_classes*4]

            # loss
            with tf.variable_scope('fast_rcnn_classification_loss'):
                fast_rcnn_classification_loss = tf.losses.softmax_cross_entropy(
                    logits=minibatch_scores,
                    onehot_labels=minibatch_label_one_hot)

            with tf.variable_scope('fast_rcnn_location_loss'):
                fast_rcnn_location_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_encode_boxes,
                    gtboxes=minibatch_encode_gtboxes,
                    object_weights=minibatch_object_mask,
                    classes_weights=class_weights)
                tf.losses.add_loss(fast_rcnn_location_loss)

            return fast_rcnn_location_loss, fast_rcnn_classification_loss
    def fast_rcnn_loss(self):
        with tf.variable_scope('fast_rcnn_loss'):
            minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, minibatch_object_mask, \
            minibatch_label_one_hot = self.fast_rcnn_minibatch(self.fast_rcnn_all_level_rotate_proposals) #######################

            # minibatch_reference_boxes = tf.gather(self.fast_rcnn_all_level_horizontal_proposals, minibatch_indices)
            minibatch_reference_boxes = tf.gather(self.fast_rcnn_all_level_rotate_proposals, minibatch_indices)

            minibatch_encode_boxes = tf.gather(self.fast_rcnn_encode_boxes,
                                               minibatch_indices)  # [minibatch_size, num_classes*5]

            minibatch_scores = tf.gather(self.fast_rcnn_scores, minibatch_indices)

            positive_proposals_in_img = draw_box_with_color(self.img_batch,
                                                            minibatch_reference_boxes * tf.expand_dims(
                                                                   minibatch_object_mask, 1),
                                                            text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 1.0)))[0])

            negative_mask = tf.cast(tf.logical_not(tf.cast(minibatch_object_mask, tf.bool)), tf.float32)
            negative_proposals_in_img = draw_box_with_color(self.img_batch,
                                                            minibatch_reference_boxes * tf.expand_dims(negative_mask, 1),
                                                            text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 0.0)))[0])

            tf.summary.image('/positive_proposals', positive_proposals_in_img)
            tf.summary.image('/negative_proposals', negative_proposals_in_img)

            minibatch_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=minibatch_encode_boxes,
                                                                    reference_boxes=minibatch_reference_boxes,
                                                                    scale_factors=self.scale_factors)

            minibatch_softmax_scores = tf.gather(slim.softmax(self.fast_rcnn_scores), minibatch_indices)
            top_k_scores, top_k_indices = tf.nn.top_k(minibatch_softmax_scores[:, 1], k=5)

            top_detections_in_img = draw_boxes_with_scores(self.img_batch,
                                                           boxes=tf.gather(minibatch_decode_boxes, top_k_indices),
                                                           scores=top_k_scores)
            tf.summary.image('/top_5', top_detections_in_img)


            # encode gtboxes
            minibatch_encode_gtboxes = \
                encode_and_decode.encode_boxes(
                    unencode_boxes=minibatch_reference_boxes_mattached_gtboxes,
                    reference_boxes=minibatch_reference_boxes,
                    scale_factors=self.scale_factors)

            # [minibatch_size, num_classes*5]
            minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes, [1, self.num_classes])

            class_weights_list = []
            category_list = tf.unstack(minibatch_label_one_hot, axis=1)
            for i in range(1, self.num_classes+1):
                tmp_class_weights = tf.ones(shape=[tf.shape(minibatch_encode_boxes)[0], 5], dtype=tf.float32)
                tmp_class_weights = tmp_class_weights * tf.expand_dims(category_list[i], axis=1)
                class_weights_list.append(tmp_class_weights)
            class_weights = tf.concat(class_weights_list, axis=1)  # [minibatch_size, num_classes*5]

            # loss
            with tf.variable_scope('fast_rcnn_classification_loss'):
                fast_rcnn_classification_loss = slim.losses.softmax_cross_entropy(logits=minibatch_scores,
                                                                                  onehot_labels=minibatch_label_one_hot)
                # if DEBUG:
                #     print_tensors(minibatch_scores, 'minibatch_scores')
                #     print_tensors(classification_loss, '2nd_cls_loss')
            with tf.variable_scope('fast_rcnn_location_loss'):
                fast_rcnn_location_loss = losses.l1_smooth_losses(predict_boxes=minibatch_encode_boxes,
                                                                  gtboxes=minibatch_encode_gtboxes,
                                                                  object_weights=minibatch_object_mask,
                                                                  classes_weights=class_weights)
                slim.losses.add_loss(fast_rcnn_location_loss)

            return fast_rcnn_location_loss, fast_rcnn_classification_loss
Exemple #29
0
        def batch_slice_head_proposals(rpn_proposal_bbox, encode_boxes,
                                       categories, scores, image_height,
                                       image_width):
            """
            mutilclass NMS
            :param rpn_proposal_bbox: (N, 4)
            :param encode_boxes: (N, 4)
            :param categories:(N, )
            :param scores: (N, )
            :param image_window:(y1, x1, y2, x2) the boundary of image
            :return:
            detection_boxes_scores_labels : (-1, 6)[y1, x1, y2, x2, scores, labels]
            """
            with tf.name_scope('head_proposals'):
                # trim the zero graph
                rpn_proposal_bbox, non_zeros = boxes_utils.trim_zeros_graph(
                    rpn_proposal_bbox, name="trim_proposals_detection")
                encode_boxes = tf.boolean_mask(encode_boxes, non_zeros)
                categories = tf.boolean_mask(categories, non_zeros)
                scores = tf.boolean_mask(scores, non_zeros)
                fast_rcnn_decode_boxes = encode_and_decode.decode_boxes(
                    encode_boxes=encode_boxes,
                    reference_boxes=rpn_proposal_bbox,
                    scale_factors=cfgs.BBOX_STD_DEV)
                fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
                    fast_rcnn_decode_boxes, image_height, image_width)

                # remove the background
                keep = tf.cast(tf.where(categories > 0)[:, 0], tf.int32)
                if cfgs.DEBUG:
                    print_categories = tf.gather(categories, keep)
                    print_scores = tf.gather(scores, keep)
                    num_item = tf.minimum(tf.shape(print_scores)[0], 100)
                    print_scores_vision, print_index = tf.nn.top_k(
                        print_scores, k=num_item)
                    print_categories_vision = tf.gather(
                        print_categories, print_index)
                    boxes_utils.print_tensors(print_categories_vision,
                                              "categories")
                    boxes_utils.print_tensors(print_scores_vision, "scores")
                # Filter out low confidence boxes
                if cfgs.FINAL_SCORE_THRESHOLD:  # 0.7
                    conf_keep = tf.cast(
                        tf.where(scores >= cfgs.FINAL_SCORE_THRESHOLD)[:, 0],
                        tf.int32)
                    keep = tf.sets.set_intersection(
                        tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0))
                    keep = tf.sparse_tensor_to_dense(keep)[0]

                pre_nms_class_ids = tf.gather(categories, keep)
                pre_nms_scores = tf.gather(scores, keep)
                pre_nms_rois = tf.gather(fast_rcnn_decode_boxes, keep)
                unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0]

                def nms_keep_map(class_id):
                    """Apply Non-Maximum Suppression on ROIs of the given class."""
                    # Indices of ROIs of the given class
                    ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0]
                    # Apply NMS
                    class_keep = tf.image.non_max_suppression(
                        tf.gather(pre_nms_rois, ixs),
                        tf.gather(pre_nms_scores, ixs),
                        max_output_size=cfgs.DETECTION_MAX_INSTANCES,  # 最多200条
                        iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD
                    )  # 0.3 太高就过滤完了
                    # Map indicies
                    class_keep = tf.gather(keep, tf.gather(ixs, class_keep))
                    # Pad with -1 so returned tensors have the same shape
                    gap = cfgs.DETECTION_MAX_INSTANCES - tf.shape(
                        class_keep)[0]
                    class_keep = tf.pad(class_keep, [(0, gap)],
                                        mode='CONSTANT',
                                        constant_values=-1)
                    # Set shape so map_fn() can infer result shape
                    class_keep.set_shape([cfgs.DETECTION_MAX_INSTANCES])
                    return class_keep

                # 2. Map over class IDs
                nms_keep = tf.map_fn(nms_keep_map,
                                     unique_pre_nms_class_ids,
                                     dtype=tf.int32)
                # 3. Merge results into one list, and remove -1 padding
                nms_keep = tf.reshape(nms_keep, [-1])
                nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0])
                # 4. Compute intersection between keep and nms_keep
                keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
                                                tf.expand_dims(nms_keep, 0))
                keep = tf.sparse_tensor_to_dense(keep)[0]
                # Keep top detections
                roi_count = cfgs.DETECTION_MAX_INSTANCES
                class_scores_keep = tf.gather(scores, keep)
                num_keep = tf.minimum(
                    tf.shape(class_scores_keep)[0], roi_count)
                top_ids = tf.nn.top_k(class_scores_keep,
                                      k=num_keep,
                                      sorted=True)[1]
                keep = tf.gather(keep, top_ids)

                # Arrange output as [N, (y1, x1, y2, x2, class_id, score)]
                # Coordinates are normalized.
                detections = tf.concat([
                    tf.gather(fast_rcnn_decode_boxes, keep),
                    tf.to_float(tf.gather(categories, keep))[..., tf.newaxis],
                    tf.gather(scores, keep)[..., tf.newaxis]
                ],
                                       axis=1)

                # Pad with zeros if detections < DETECTION_MAX_INSTANCES
                gap = cfgs.DETECTION_MAX_INSTANCES - tf.shape(detections)[0]
                detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT")

                return detections