def fast_rcnn_predict(self):

        with tf.variable_scope('fast_rcnn_predict'):
            fast_rcnn_softmax_scores = slim.softmax(
                self.fast_rcnn_scores)  # [-1, num_classes+1]

            fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes,
                                                [-1, 4])

            reference_boxes = tf.tile(
                self.fast_rcnn_all_level_proposals,
                [1, self.num_classes])  # [N, 4*num_classes]
            reference_boxes = tf.reshape(reference_boxes,
                                         [-1, 4])  # [N*num_classes, 4]
            fast_rcnn_decode_boxes = encode_and_decode.decode_boxes(
                encode_boxes=fast_rcnn_encode_boxes,
                reference_boxes=reference_boxes,
                scale_factors=self.scale_factors)

            fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
                fast_rcnn_decode_boxes, img_shape=self.img_shape)

            # mutilclass NMS
            fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes,
                                                [-1, self.num_classes * 4])
            fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \
                self.fast_rcnn_proposals(fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_scores)

            return fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category
    def fast_rcnn_prediction(self):
        '''
        :param: self.fast_rcnn_cls_scores, [2000, num_cls+1], num_cls+background
        :param: self.fast_rcnn_encode_boxes, [2000, num_cls*4]
        :return: fast_rcnn_decode_boxes, [-1, 4]
        :return: fast_rcnn_category, [-1, ]
        :return: fast_rcnn_scores, [-1, ]
        :return: num_object, [-1, ]
        '''
        with tf.variable_scope('fast_rcnn_predict'):
            fast_rcnn_softmax_score = slim.softmax(self.fast_rcnn_cls_scores)

            fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes,
                                                [-1, 4])
            fast_rcnn_reference_boxes = tf.tile(self.rois_boxes,
                                                [1, self.num_cls])
            fast_rcnn_reference_boxes = tf.reshape(fast_rcnn_reference_boxes,
                                                   [-1, 4])

            # ues encode boxes to decode the reference boxes
            fast_rcnn_decode_boxes = decode_boxes(
                encode_boxes=fast_rcnn_encode_boxes,
                reference_boxes=fast_rcnn_reference_boxes,
                scale_factors=self.scale_factors)
            # clip decode boxes to image shape
            fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
                boxes=fast_rcnn_decode_boxes, img_shape=self.img_shape)

            # mutil-class nms
            fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes,
                                                [-1, 4 * self.num_cls])
            fast_rcnn_decode_boxes, fast_rcnn_category, fast_rcnn_scores, num_object = \
                self.mutil_class_nms(boxes=fast_rcnn_decode_boxes,
                                     scores=fast_rcnn_softmax_score)
            return fast_rcnn_decode_boxes, fast_rcnn_category, fast_rcnn_scores, num_object
Exemple #3
0
    def rpn_proposals(self):
        '''
        :param:self.anchors: shape:[-1, 4]->[ymin, xmin, ymax, xmax]
        :param:self.rpn_scores: shape:[-1, 2]->[backgroud, foreground]
        :param:self.rpn_encode_boxes: shape:[-1, 4]->[ycenter, xcenter, h, w]
        :return: valid_boxes [2000, 4]
        :return: valid_scores [2000,]
        '''
        with tf.variable_scope('rpn_proposals'):
            rpn_decode_boxes = encode_and_decode.decode_boxes(
                encode_boxes=self.rpn_encode_boxes,
                reference_boxes=self.anchors,
                scale_factors=self.scale_factors)
            if not self.is_training:
                image_shape = tf.shape(self.img_batch)
                rpn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
                    rpn_decode_boxes, image_shape)

            rpn_softmax_scores = slim.softmax(self.rpn_scores)
            rpn_object_score = rpn_softmax_scores[:, 1]

            if self.top_k_nms:
                rpn_object_score, top_k_indices = tf.nn.top_k(rpn_object_score,
                                                              k=self.top_k_nms)
                rpn_decode_boxes = tf.gather(rpn_decode_boxes, top_k_indices)

            nms_indices = boxes_utils.non_maximal_suppression(
                rpn_decode_boxes, rpn_object_score, self.rpn_nms_iou_threshold,
                self.max_proposal_num)
            valid_scores = tf.gather(rpn_object_score, nms_indices)
            valid_boxes = tf.gather(rpn_decode_boxes, nms_indices)

        return valid_boxes, valid_scores
Exemple #4
0
def postprocess_detctions(rpn_bbox_pred, rpn_cls_prob, img_shape, anchors,
                          is_training):

    # 1. decode boxes
    boxes_pred = bbox_transform.bbox_transform_inv(boxes=anchors,
                                                   deltas=rpn_bbox_pred)

    # 2. clip to img boundaries
    boxes_pred = boxes_utils.clip_boxes_to_img_boundaries(boxes=boxes_pred,
                                                          img_shape=img_shape)

    return_boxes_pred = []
    return_scores = []
    return_labels = []
    for j in range(0, cfgs.CLASS_NUM):
        indices = filter_detections(boxes_pred, rpn_cls_prob[:, j],
                                    is_training)
        tmp_boxes_pred = tf.reshape(tf.gather(boxes_pred, indices), [-1, 4])
        tmp_scores = tf.reshape(tf.gather(rpn_cls_prob[:, j], indices), [
            -1,
        ])

        return_boxes_pred.append(tmp_boxes_pred)
        return_scores.append(tmp_scores)
        return_labels.append(tf.ones_like(tmp_scores) * (j + 1))

    return_boxes_pred = tf.concat(return_boxes_pred, axis=0)
    return_scores = tf.concat(return_scores, axis=0)
    return_labels = tf.concat(return_labels, axis=0)

    return return_boxes_pred, return_scores, return_labels
    def rpn_proposals(self):
        with tf.variable_scope('rpn_proposals'):
            rpn_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=self.rpn_encode_boxes,
                                                              reference_boxes=self.anchors,
                                                              scale_factors=self.scale_factors)

            if not self.is_training:  # when test, clip proposals to img boundaries
                img_shape = tf.shape(self.img_batch)
                rpn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(rpn_decode_boxes, img_shape)

            rpn_softmax_scores = slim.softmax(self.rpn_scores)
            rpn_object_score = rpn_softmax_scores[:, 1]  # second column represent object

            if self.top_k_nms:
                rpn_object_score, top_k_indices = tf.nn.top_k(rpn_object_score, k=self.top_k_nms)
                rpn_decode_boxes = tf.gather(rpn_decode_boxes, top_k_indices)

            valid_indices = nms.non_maximal_suppression(boxes=rpn_decode_boxes,
                                                        scores=rpn_object_score,
                                                        max_output_size=self.max_proposals_num,
                                                        iou_threshold=self.rpn_nms_iou_threshold)

            valid_boxes = tf.gather(rpn_decode_boxes, valid_indices)
            valid_scores = tf.gather(rpn_object_score, valid_indices)
            rpn_proposals_boxes, rpn_proposals_scores = tf.cond(
                tf.less(tf.shape(valid_boxes)[0], self.max_proposals_num),
                lambda: boxes_utils.padd_boxes_with_zeros(valid_boxes, valid_scores,
                                                          self.max_proposals_num),
                lambda: (valid_boxes, valid_scores))

            return rpn_proposals_boxes, rpn_proposals_scores
def postprocess_detctions(rpn_bbox, rpn_cls_prob, img_shape):
    '''
    :param rpn_bbox: [-1, 4]
    :param rpn_cls_prob: [-1, NUM_CLASS]
    :param img_shape:
    :return:
    '''

    boxes = boxes_utils.clip_boxes_to_img_boundaries(boxes=rpn_bbox,
                                                     img_shape=img_shape)

    return_boxes = []
    return_scores = []
    return_labels = []
    for j in range(0, cfgs.CLASS_NUM):
        indices = filter_detections(boxes, rpn_cls_prob[:, j])
        tmp_boxes = tf.reshape(tf.gather(boxes, indices), [-1, 4])
        return_boxes.append(tmp_boxes)
        tmp_scores = tf.gather(rpn_cls_prob[:, j], indices)
        tmp_scores = tf.reshape(tmp_scores, [
            -1,
        ])
        return_scores.append(tmp_scores)
        return_labels.append(tf.ones_like(tmp_scores) * (j + 1))

    return_boxes = tf.concat(return_boxes, axis=0)
    return_scores = tf.concat(return_scores, axis=0)
    return_labels = tf.concat(return_labels, axis=0)

    return return_boxes, return_scores, return_labels
Exemple #7
0
def postprocess_rpn_proposals(rpn_bbox_pred, rpn_cls_prob, img_shape, anchors,
                              is_training):
    '''

    :param rpn_bbox_pred: [-1, 4]
    :param rpn_cls_prob: [-1, 2]
    :param img_shape:
    :param anchors:[-1, 4]
    :param is_training:
    :return:
    '''

    if is_training:
        pre_nms_topN = cfgs.RPN_TOP_K_NMS_TRAIN
        post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TARIN
        nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD
    else:
        pre_nms_topN = cfgs.RPN_TOP_K_NMS_TEST
        post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TEST
        nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD

    cls_prob = rpn_cls_prob[:, 1]

    # 1. decode boxes
    decode_boxes = encode_and_decode.decode_boxes(
        encode_boxes=rpn_bbox_pred,
        reference_boxes=anchors,
        scale_factors=cfgs.ANCHOR_SCALE_FACTORS)

    # decode_boxes = encode_and_decode.decode_boxes(boxes=anchors,
    #                                               deltas=rpn_bbox_pred,
    #                                               scale_factor=None)

    # 2. clip to img boundaries
    decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
        decode_boxes=decode_boxes, img_shape=img_shape)

    # 3. get top N to NMS
    if pre_nms_topN > 0:
        pre_nms_topN = tf.minimum(pre_nms_topN,
                                  tf.shape(decode_boxes)[0],
                                  name='avoid_unenough_boxes')
        cls_prob, top_k_indices = tf.nn.top_k(cls_prob, k=pre_nms_topN)
        decode_boxes = tf.gather(decode_boxes, top_k_indices)

    # 4. NMS
    keep = tf.image.non_max_suppression(boxes=decode_boxes,
                                        scores=cls_prob,
                                        max_output_size=post_nms_topN,
                                        iou_threshold=nms_thresh)

    final_boxes = tf.gather(decode_boxes, keep)
    final_probs = tf.gather(cls_prob, keep)

    return final_boxes, final_probs
Exemple #8
0
    def fast_rcnn_proposals(self, decode_boxes, scores):
        '''
        mutilclass NMS
        :param decode_boxes: [N, num_classes*4]
        :param scores: [N, num_classes+1]
        :return:
        detection_boxes : [-1, 4]
        scores : [-1, ]

        '''

        with tf.variable_scope('fast_rcnn_proposals'):
            category = tf.argmax(scores, axis=1)

            object_mask = tf.cast(tf.not_equal(category, 0), tf.float32)

            decode_boxes = decode_boxes * tf.expand_dims(object_mask, axis=1)  # make background box is [0 0 0 0]
            scores = scores * tf.expand_dims(object_mask, axis=1)

            decode_boxes = tf.reshape(decode_boxes, [-1, self.num_classes, 4])  # [N, num_classes, 4]

            decode_boxes_list = tf.unstack(decode_boxes, axis=1)
            score_list = tf.unstack(scores[:, 1:], axis=1)
            after_nms_boxes = []
            after_nms_scores = []
            category_list = []
            for per_class_decode_boxes, per_class_scores in zip(decode_boxes_list, score_list):

                valid_indices = boxes_utils.nms_boxes(per_class_decode_boxes, per_class_scores,
                                                      iou_threshold=self.fast_rcnn_nms_iou_threshold,
                                                      max_output_size=self.fast_rcnn_nms_max_boxes_per_class,
                                                      name='second_stage_NMS')

                after_nms_boxes.append(tf.gather(per_class_decode_boxes, valid_indices))
                after_nms_scores.append(tf.gather(per_class_scores, valid_indices))
                tmp_category = tf.gather(category, valid_indices)

                category_list.append(tmp_category)

            all_nms_boxes = tf.concat(after_nms_boxes, axis=0)
            all_nms_scores = tf.concat(after_nms_scores, axis=0)
            all_category = tf.concat(category_list, axis=0)

            all_nms_boxes = boxes_utils.clip_boxes_to_img_boundaries(all_nms_boxes,
                                                                     img_shape=self.img_shape)

            scores_large_than_threshold_indices = tf.reshape(tf.where(tf.greater(all_nms_scores,
                                                                                 self.show_detections_score_threshold)), [-1])

            all_nms_boxes = tf.gather(all_nms_boxes, scores_large_than_threshold_indices)
            all_nms_scores = tf.gather(all_nms_scores, scores_large_than_threshold_indices)
            all_category = tf.gather(all_category, scores_large_than_threshold_indices)

            return all_nms_boxes, all_nms_scores, tf.shape(all_nms_boxes)[0], all_category  # num of objects
    def fast_rcnn_predict(self):

        with tf.variable_scope('fast_rcnn_predict'):
            fast_rcnn_softmax_scores = slim.softmax(
                self.fast_rcnn_scores)  # [-1, num_classes+1]
            fast_rcnn_softmax_scores_rotate = slim.softmax(
                self.fast_rcnn_scores_rotate)  # [-1, num_classes+1]

            fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes,
                                                [-1, 4])
            fast_rcnn_encode_boxes_rotate = tf.reshape(
                self.fast_rcnn_encode_boxes_rotate, [-1, 5])

            # Class agnostic regression without tile
            # reference_boxes = tf.tile(self.fast_rcnn_all_level_proposals, [1, self.num_classes])  # [N, 4*num_classes]
            reference_boxes = self.fast_rcnn_all_level_proposals

            reference_boxes = tf.reshape(reference_boxes,
                                         [-1, 4])  # [N*num_classes, 4]
            fast_rcnn_decode_boxes = encode_and_decode.decode_boxes(
                encode_boxes=fast_rcnn_encode_boxes,
                reference_boxes=reference_boxes,
                scale_factors=self.scale_factors)

            fast_rcnn_decode_boxes_rotate = \
                encode_and_decode.decode_boxes_rotate(encode_boxes=fast_rcnn_encode_boxes_rotate,
                                                      reference_boxes=reference_boxes,
                                                      scale_factors=self.scale_factors)

            fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
                fast_rcnn_decode_boxes, img_shape=self.img_shape)

            # mutilclass NMS
            # Class-agnostic regression without reshape
            # fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes, [-1, self.num_classes*4])
            # fast_rcnn_decode_boxes_rotate = tf.reshape(fast_rcnn_decode_boxes_rotate, [-1, self.num_classes * 5])

            fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \
                self.fast_rcnn_proposals(fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_scores)

            fast_rcnn_decode_boxes_rotate, fast_rcnn_score_rotate, fast_rcnn_head_quadrant, \
            num_of_objects_rotate, detection_category_rotate = \
                self.fast_rcnn_proposals_rotate(fast_rcnn_decode_boxes_rotate,
                                                scores=fast_rcnn_softmax_scores_rotate,
                                                head_quadrant=self.fast_rcnn_head_quadrant)

            return fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category,\
                   fast_rcnn_decode_boxes_rotate, fast_rcnn_score_rotate, fast_rcnn_head_quadrant, \
                   num_of_objects_rotate, detection_category_rotate
Exemple #10
0
    def postprocess_rpn_proposals(self, rpn_bbox_pred, rpn_cls_prob, img_shape,
                                  anchors, is_training):
        """
        rpn proposals operation
        :param rpn_bbox_pred: predict bbox
        :param rpn_cls_prob: probability of rpn classification
        :param img_shape: image_shape
        :param anchor: all reference anchor
        :param is_training:
        :return:
        """
        if is_training:
            pre_nms_topN = cfgs.RPN_TOP_K_NMS_TRAIN
            post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TARIN
            nms_threshold = cfgs.RPN_NMS_IOU_THRESHOLD
        else:
            pre_nms_topN = cfgs.RPN_TOP_K_NMS_TEST
            post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TEST
            nms_threshold = cfgs.RPN_NMS_IOU_THRESHOLD

        cls_prob = rpn_cls_prob[:, 1]  #(, 2) =>(negtive, postive)

        # step 1  decode boxes
        decode_boxes = encode_and_decode.decode_boxes(
            encoded_boxes=rpn_bbox_pred,
            reference_boxes=anchors,
            scale_factors=cfgs.ANCHOR_SCALE_FACTORS)
        # step 2 clip to image boundaries
        decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
            decode_boxes, img_shape=img_shape)
        # step 3 get top N to NMS
        if pre_nms_topN > 0:
            pre_nms_topN = tf.minimum(pre_nms_topN,
                                      tf.shape(decode_boxes)[0],
                                      name='minimum_boxes')
            cls_prob, top_k_indices = tf.nn.top_k(cls_prob, k=pre_nms_topN)
            decode_boxes = tf.gather(params=decode_boxes,
                                     indices=top_k_indices)

        # step 4 NMS(Non Max Suppression)
        keep_indices = tf.image.non_max_suppression(
            boxes=decode_boxes,
            scores=cls_prob,
            max_output_size=post_nms_topN,
            iou_threshold=nms_threshold)
        final_boxes = tf.gather(decode_boxes, keep_indices)
        final_probs = tf.gather(cls_prob, keep_indices)
        return final_boxes, final_probs
        def batch_slice_rpn_proposals(rpn_encode_boxes, rpn_scores, anchors,
                                      config, rpn_proposals_num):

            with tf.variable_scope('rpn_proposals'):
                rpn_softmax_scores = slim.softmax(rpn_scores)
                rpn_object_score = rpn_softmax_scores[:,
                                                      1]  # second column represent object
                if config.RPN_TOP_K_NMS:
                    top_k_indices = tf.nn.top_k(rpn_object_score,
                                                k=config.RPN_TOP_K_NMS).indices
                    rpn_object_score = tf.gather(rpn_object_score,
                                                 top_k_indices)
                    rpn_encode_boxes = tf.gather(rpn_encode_boxes,
                                                 top_k_indices)
                    anchors = tf.gather(anchors, top_k_indices)

                rpn_decode_boxes = encode_and_decode.decode_boxes(
                    encode_boxes=rpn_encode_boxes,
                    reference_boxes=anchors,
                    dev_factors=config.RPN_BBOX_STD_DEV)

                valid_indices = boxes_utils.non_maximal_suppression(
                    boxes=rpn_decode_boxes,
                    scores=rpn_object_score,
                    max_output_size=rpn_proposals_num,
                    iou_threshold=config.RPN_NMS_IOU_THRESHOLD)
                rpn_decode_boxes = tf.gather(rpn_decode_boxes, valid_indices)
                rpn_object_score = tf.gather(rpn_object_score, valid_indices)
                # clip proposals to img boundaries(replace the out boundary with image boundary)
                rpn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
                    rpn_decode_boxes,
                    [0, 0, config.TARGET_SIDE - 1, config.TARGET_SIDE - 1])
                # Pad if needed
                padding = tf.maximum(
                    rpn_proposals_num - tf.shape(rpn_decode_boxes)[0], 0)
                # care about why we don't use tf.pad in there
                zeros_padding = tf.zeros((padding, 4), dtype=tf.float32)
                rpn_proposals_boxes = tf.concat(
                    [rpn_decode_boxes, zeros_padding], axis=0)
                rpn_object_score = tf.pad(rpn_object_score, [(0, padding)])

                return rpn_proposals_boxes, rpn_object_score
    def fast_rcnn_predict(self):

        with tf.variable_scope('fast_rcnn_predict'):
            fast_rcnn_softmax_scores = slim.softmax(self.fast_rcnn_scores)  # [-1, num_classes+1]
            fast_rcnn_softmax_scores_rotate = slim.softmax(self.fast_rcnn_scores_rotate)  # [-1, num_classes+1]

            fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes, [-1, 4])
            fast_rcnn_encode_boxes_rotate = tf.reshape(self.fast_rcnn_encode_boxes_rotate, [-1, 5])

            reference_boxes = tf.tile(self.fast_rcnn_all_level_proposals, [1, self.num_classes])  # [N, 4*num_classes]
            reference_boxes = tf.reshape(reference_boxes, [-1, 4])   # [N*num_classes, 4]
            fast_rcnn_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=fast_rcnn_encode_boxes,
                                                                    reference_boxes=reference_boxes,
                                                                    scale_factors=self.scale_factors)

            fast_rcnn_decode_boxes_rotate = \
                encode_and_decode.decode_boxes_rotate(encode_boxes=fast_rcnn_encode_boxes_rotate,
                                                      reference_boxes=reference_boxes,
                                                      scale_factors=self.scale_factors)

            fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(fast_rcnn_decode_boxes,
                                                                              img_shape=self.img_shape)

            # mutilclass NMS
            fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes, [-1, self.num_classes*4])
            fast_rcnn_decode_boxes_rotate = tf.reshape(fast_rcnn_decode_boxes_rotate, [-1, self.num_classes * 5])

            fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \
                self.fast_rcnn_proposals(fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_scores)

            fast_rcnn_decode_boxes_rotate, fast_rcnn_score_rotate, num_of_objects_rotate, detection_category_rotate = \
                self.fast_rcnn_proposals_rotate(fast_rcnn_decode_boxes_rotate, scores=fast_rcnn_softmax_scores_rotate)

            fast_rcnn_decode_boxes_rotate_reorder = tf.py_func(read_reorder,
                                                               inp=[fast_rcnn_decode_boxes_rotate],
                                                               Tout=tf.float32)

            fast_rcnn_decode_boxes_rotate_original = fast_rcnn_decode_boxes_rotate
            fast_rcnn_decode_boxes_rotate = fast_rcnn_decode_boxes_rotate_reorder
            return fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category,\
                   fast_rcnn_decode_boxes_rotate_original, fast_rcnn_decode_boxes_rotate, fast_rcnn_decode_boxes_rotate_reorder, fast_rcnn_score_rotate, num_of_objects_rotate, detection_category_rotate
Exemple #13
0
    def rpn_loss(self):
        '''
        :param: self.gtboxes_and_label: [n, 5]->[ymin, xmin, ymax, xmax, cls]
        :param: self.anchors: [m, 4]-> [ymin, xmin, ymax, xmax]
        :param:self.rpn_encode_boxes: [m, 4]->[ycenter, xcenter, h, w]
        :return:
        '''
        with tf.variable_scope('rpn_loss'):
            minibatch_indices,\
            minibatch_anchor_matched_gtboxes,\
            object_mask,\
            minibatch_label_onehot = self.make_minibatch()

            minibatch_anchors = tf.gather(self.anchors, minibatch_indices)
            minibatch_rpn_encode_boxes = tf.gather(self.rpn_encode_boxes,
                                                   minibatch_indices)
            minibatch_rpn_scores = tf.gather(self.rpn_scores,
                                             minibatch_indices)

            minibatch_encode_boxes_label = encode_and_decode.encode_boxes(
                minibatch_anchors, minibatch_anchor_matched_gtboxes,
                self.scale_factors)
            # summary
            positive_anchors_in_img = draw_box_with_tensor(
                img_batch=self.img_batch,
                boxes=minibatch_anchors * tf.expand_dims(object_mask, 1),
                text=tf.shape(tf.where(tf.equal(object_mask, 1)))[0])
            negative_mask = tf.cast(
                tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32)
            negative_anchors_in_img = draw_box_with_tensor(
                img_batch=self.img_batch,
                boxes=minibatch_anchors * tf.expand_dims(negative_mask, 1),
                text=tf.shape(tf.where(tf.equal(negative_mask, 1)))[0])

            minibatch_decode_anchors = encode_and_decode.decode_boxes(
                encode_boxes=minibatch_rpn_encode_boxes,
                reference_boxes=minibatch_anchors,
                scale_factors=self.scale_factors)
            # clip boxes into image shape
            minibatch_decode_anchors = boxes_utils.clip_boxes_to_img_boundaries(
                minibatch_decode_anchors, tf.shape(self.img_batch))
            positive_decode_anchor_in_img = \
                draw_box_with_tensor(img_batch=self.img_batch,
                                     boxes=minibatch_decode_anchors*tf.expand_dims(object_mask, 1),
                                     text=tf.shape(tf.where(tf.equal(object_mask, 1)))[0]
                                     )

            tf.summary.image('images/rpn/losses/anchors_positive_minibatch',
                             positive_anchors_in_img)
            tf.summary.image('images/rpn/losses/anchors_negative_minibatch',
                             negative_anchors_in_img)
            tf.summary.image('images/rpn/losses/decode_anchor_positive',
                             positive_decode_anchor_in_img)

            # losses
            with tf.variable_scope('rpn_localization_losses'):
                classify_loss = slim.losses.softmax_cross_entropy(
                    logits=minibatch_rpn_scores,
                    onehot_labels=minibatch_label_onehot)

                location_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_rpn_encode_boxes,
                    gtboxes=minibatch_encode_boxes_label,
                    object_weights=object_mask)
                slim.losses.add_loss(
                    location_loss)  # add location loss to losses collections

            return location_loss, classify_loss
def postprocess_rpn_proposals(rpn_bbox_pred, rpn_cls_prob, img_shape, anchors,
                              is_training):
    '''

    :param rpn_bbox_pred: [-1, 4]
    :param rpn_cls_prob: [-1, 2]
    :param img_shape:
    :param anchors:[-1, 4]
    :param is_training:
    :return:
    '''

    if is_training:
        pre_nms_topN = cfgs.RPN_TOP_K_NMS_TRAIN  # 默认12000
        post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TARIN  # 默认2000
        nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD  # 默认0.7
    else:
        pre_nms_topN = cfgs.RPN_TOP_K_NMS_TEST  # 默认6000
        post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TEST  # 默认300
        nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD  # 默认0.7

    cls_prob = rpn_cls_prob[:, 1]

    # 1. decode boxes
    # clw note:这个函数接受RPN网络的预测框位置,以及预测的类别(两类),图像的尺寸大小,以及生成的锚点作为输入。
    #           经过解码后,得到的是真实的预测框的位置,因为有可能预测的框比设定的选取前N个框的个数还小,
    #           因此在预测框的数目以及设定的数目之间取最小值,之后再采用 tf.image.non_max_suppression抑制,
    #           选取最终的非极大值抑制后的Top K个框,原论文中未采用NMS之前为12000个(就是上面的cfgs.RPN_TOP_K_NMS_TRAIN),
    #           NMS后为2000个(就是上面的cfgs.RPN_MAXIMUM_PROPOSAL_TARIN)。
    #           这里还没有具体的分类那个框是那个目标,只是选出了前K个可能存在目标的框。
    decode_boxes = encode_and_decode.decode_boxes(
        encoded_boxes=rpn_bbox_pred,
        reference_boxes=anchors,
        scale_factors=cfgs.ANCHOR_SCALE_FACTORS)

    # decode_boxes = encode_and_decode.decode_boxes(boxes=anchors,
    #                                               deltas=rpn_bbox_pred,
    #                                               scale_factor=None)

    # 2. clip to img boundaries
    decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
        decode_boxes=decode_boxes, img_shape=img_shape)

    # 3. get top N to NMS
    if pre_nms_topN > 0:  # clw note:初步得到一系列框(~60*40*9=20k)之后,如果是训练集,会去掉与边界相交的anchors,因此
        #           数量会大大减小,即NMS之前的TopK个框(这里默认值是12k,文中给的6k),之后再进行NMS。
        pre_nms_topN = tf.minimum(pre_nms_topN,
                                  tf.shape(decode_boxes)[0],
                                  name='avoid_unenough_boxes')
        cls_prob, top_k_indices = tf.nn.top_k(cls_prob, k=pre_nms_topN)
        decode_boxes = tf.gather(decode_boxes, top_k_indices)

    # 4. NMS
    keep = tf.image.non_max_suppression(boxes=decode_boxes,
                                        scores=cls_prob,
                                        max_output_size=post_nms_topN,
                                        iou_threshold=nms_thresh)

    final_boxes = tf.gather(decode_boxes, keep)
    final_probs = tf.gather(cls_prob, keep)

    return final_boxes, final_probs
Exemple #15
0
def postprocess_fastrcnn_proposals(bbox_ppred, scores, img_shape, rois,
                                   is_training):
    '''

    :param rpn_bbox_pred: [-1, 4]
    :param rpn_cls_prob: [-1, 2]
    :param img_shape:
    :param anchors:[-1, 4]
    :param is_training:
    :return:
    '''

    if is_training:
        pre_nms_topN = 2000  #cfgs.RPN_TOP_K_NMS_TRAIN
        post_nms_topN = 500  #cfgs.RPN_MAXIMUM_PROPOSAL_TARIN
        nms_thresh = 0.8  #cfgs.RPN_NMS_IOU_THRESHOLD
    else:
        pre_nms_topN = 1500  #cfgs.RPN_TOP_K_NMS_TEST
        post_nms_topN = 300  #cfgs.RPN_MAXIMUM_PROPOSAL_TEST
        nms_thresh = 0.7  #cfgs.RPN_NMS_IOU_THRESHOLD

    #rois = tf.stop_gradient(rois)
    #scores = tf.stop_gradient(scores)
    bbox_ppred = tf.reshape(bbox_ppred, [-1, cfgs.CLASS_NUM + 1, 4])
    #bbox_ppred = tf.stop_gradient(bbox_ppred)

    bbox_pred_list = tf.unstack(bbox_ppred, axis=1)
    score_list = tf.unstack(scores, axis=1)

    allclasses_boxes = []
    allclasses_scores = []
    categories = []
    for i in range(1, cfgs.CLASS_NUM + 1):
        # 1. decode boxes in each class
        tmp_encoded_box = bbox_pred_list[i]
        tmp_score = score_list[i]
        tmp_decoded_boxes = encode_and_decode.decode_boxes(
            encode_boxes=tmp_encoded_box,
            reference_boxes=rois,
            scale_factors=cfgs.ROI_SCALE_FACTORS)
        # tmp_decoded_boxes = encode_and_decode.decode_boxes(boxes=rois,
        #                                                    deltas=tmp_encoded_box,
        #                                                    scale_factor=cfgs.ROI_SCALE_FACTORS)

        # 2. clip to img boundaries
        tmp_decoded_boxes = boxes_utils.clip_boxes_to_img_boundaries(
            decode_boxes=tmp_decoded_boxes, img_shape=img_shape)

        # 3. NMS

        pre_nms_topN = tf.minimum(pre_nms_topN,
                                  tf.shape(tmp_decoded_boxes)[0],
                                  name='avoid_unenough_boxes')
        cls_prob, top_k_indices = tf.nn.top_k(tmp_score, k=pre_nms_topN)
        decode_boxes = tf.gather(tmp_decoded_boxes, top_k_indices)  # 取索引

        # 4. NMS
        keep = tf.image.non_max_suppression(boxes=decode_boxes,
                                            scores=cls_prob,
                                            max_output_size=post_nms_topN,
                                            iou_threshold=nms_thresh)

        perclass_boxes = tf.gather(tmp_decoded_boxes, keep)
        perclass_scores = tf.gather(tmp_score, keep)

        allclasses_boxes.append(perclass_boxes)
        allclasses_scores.append(perclass_scores)
        categories.append(tf.ones_like(perclass_scores) * i)

    final_boxes = tf.concat(allclasses_boxes, axis=0)
    final_scores = tf.concat(allclasses_scores, axis=0)
    final_category = tf.concat(categories, axis=0)

    return final_boxes, final_scores
Exemple #16
0
def train():
    with tf.Graph().as_default():

        ##############
        # input data #
        ##############

        with tf.name_scope('get_batch'):
            data = Read_tfrecord()
            iterator, img_name, img, gtboxes_label, num_gtbox = data.get_batch_data(
            )

        with tf.name_scope('draw_gtboxes'):
            gtboxes_in_img = draw_box_with_tensor(img,
                                                  tf.reshape(
                                                      gtboxes_label,
                                                      [-1, 5])[:, :-1],
                                                  text=img_name)
        # original_img = tf.squeeze(img, axis=0)+tf.constant(cfg.DEPTH_MEAN)
        # original_img = tf.reshape(original_img, shape=tf.shape(img))
        # tf.summary.image('images/original_images', original_img)

        ####################
        # backbone network #
        ####################

        _, end_point = get_network_byname(net_name=cfg.NETWORK_NAME,
                                          inputs=img,
                                          num_classes=None,
                                          is_training=True,
                                          global_pool=False,
                                          output_stride=None,
                                          spatial_squeeze=False)

        ###############
        # rpn network #
        ###############
        rpn_net = build_rpn.RPN(
            net_name=cfg.NETWORK_NAME,
            inputs=img,
            gtboxes_and_label=tf.squeeze(gtboxes_label, axis=0),
            is_training=True,
            end_point=end_point,
            anchor_scales=cfg.ANCHOR_SCALES,
            anchor_ratios=cfg.ANCHOR_RATIOS,
            scale_factors=cfg.SCALE_FACTOR,
            base_anchor_size_list=cfg.BASE_ANCHOR_SIZE_LIST,
            stride=cfg.STRIDE,
            level=cfg.LEVEL,
            top_k_nms=cfg.TOP_K_NMS,
            share_head=cfg.IS_SHARE_HEAD,
            rpn_nms_iou_threshold=cfg.RPN_NMS_IOU_THRESHOLD,
            max_proposal_num=cfg.MAX_PROPOSAL_NUM,
            rpn_iou_positive_threshold=cfg.RPN_IOU_POSITIVE_THRESHOLD,
            rpn_iou_negtive_threshold=cfg.RPN_IOU_NEGATIVE_THRESHOLD,
            rpn_mini_batchsize=cfg.RPN_MINI_BATCH_SIZE,
            rpn_positive_ratio=cfg.POSITIVE_RATIO,
            remove_outside_anchors=cfg.IS_FILTER_OUTSIDE_ANCHORS,
            rpn_weight_decay=cfg.RPN_WEIGHT_DECAY)
        rpn_proposals_boxes, rpn_proposals_scores = rpn_net.rpn_proposals()
        rpn_location_loss, rpn_classification_loss = rpn_net.rpn_loss()
        rpn_net_loss = rpn_location_loss + rpn_classification_loss

        with tf.name_scope('draw_proposals'):
            rpn_object_indices = tf.reshape(tf.where(
                tf.greater(rpn_proposals_scores, 0.5)),
                                            shape=[-1])

            # clip boxes into image shape
            clip_rpn_proposals_boxes = clip_boxes_to_img_boundaries(
                rpn_proposals_boxes, tf.shape(img))
            rpn_object_boxes = tf.gather(clip_rpn_proposals_boxes,
                                         indices=rpn_object_indices)
            rpn_object_boxes_in_img = draw_box_with_tensor(
                img_batch=img,
                boxes=rpn_object_boxes,
                text=tf.shape(rpn_object_boxes)[0])

            rpn_proposals_boxes_in_img = draw_box_with_tensor(
                img_batch=img,
                boxes=clip_rpn_proposals_boxes,
                text=tf.shape(rpn_proposals_boxes)[0])

        #############
        # fast-rcnn #
        #############
        fast_rcnn = build_fast_rcnn.FastRcnn(
            img_batch=img,
            feature_dict=rpn_net.feature_pyramid,
            rpn_proposal_boxes=rpn_proposals_boxes,
            rpn_proposal_scores=rpn_proposals_scores,
            gtboxes_and_label=tf.squeeze(gtboxes_label, axis=0),
            crop_size=cfg.CROP_SIZE,
            roi_pooling_kernel_size=cfg.ROI_POOLING_KERNEL_SIZE,
            levels=cfg.LEVEL,
            is_training=True,
            weights_regularizer=cfg.FAST_RCNN_WEIGHTS_DECAY,
            num_cls=cfg.NUM_CLASSES,
            scale_factors=cfg.SCALE_FACTOR,
            fast_rcnn_nms_iou_threshold=cfg.FAST_RCNN_NMS_IOU_THRESHOLD,
            max_num_per_class=cfg.MAX_NUM_PER_CLASS,
            fast_rcnn_score_threshold=cfg.FAST_RCNN_SCORE_THRESHOLD,
            fast_rcnn_positive_threshold_iou=cfg.
            FAST_RCNN_POSITIVE_THRESHOLD_IOU,
            fast_rcnn_minibatch_size=cfg.FAST_RCNN_MINIBATCH_SIZE,
            fast_rcnn_positive_ratio=cfg.FAST_RCNN_POSITIVE_RATIO)
        fast_rcnn_decode_boxes, fast_rcnn_category, fast_rcnn_scores, num_object = \
            fast_rcnn.fast_rcnn_prediction()
        fast_rcnn_boxes_loss, fast_rcnn_cls_loss = fast_rcnn.fast_rcnn_loss()
        fast_rcnn_total_loss = fast_rcnn_boxes_loss + fast_rcnn_cls_loss

        with tf.name_scope('fast_rcnn_prediction_boxes'):
            fast_rcnn_prediction_in_image = draw_boxes_with_category(
                img_batch=img,
                boxes=fast_rcnn_decode_boxes,
                category=fast_rcnn_category,
                scores=fast_rcnn_scores)

        #####################
        # optimization part #
        #####################
        # global_step = tf.train.get_or_create_global_step()
        # total_loss = slim.losses.get_losses()
        # total_loss = tf.reduce_sum(total_loss * tf.constant(cfg.LOSS_WEIGHT, dtype=tf.float32))
        #
        # lr = tf.train.piecewise_constant(global_step,
        #                                  [60000],
        #                                  [cfg.BASE_LEARNING_RATE, cfg.BASE_LEARNING_RATE/10])
        #
        # optimizer = slim.train.MomentumOptimizer(learning_rate=lr,
        #                                          momentum=cfg.MOMENTUM,)
        #
        # train_op = optimizer.minimize(total_loss, global_step)

        global_step = tf.train.get_or_create_global_step()
        total_loss = slim.losses.get_total_loss()

        lr = tf.train.piecewise_constant(
            global_step, [60000],
            [cfg.BASE_LEARNING_RATE, cfg.BASE_LEARNING_RATE / 10])

        optimizer = tf.train.MomentumOptimizer(learning_rate=lr,
                                               momentum=cfg.MOMENTUM)

        train_op = slim.learning.create_train_op(total_loss, optimizer,
                                                 global_step)
        ###########
        # summary #
        ###########
        # ground truth boxes
        tf.summary.image('images/gtboxes', gtboxes_in_img)

        # rpn net's proposals
        tf.summary.image('images/rpn/proposals', rpn_proposals_boxes_in_img)
        tf.summary.image('images/rpn/objects', rpn_object_boxes_in_img)

        # rpn loss scale
        tf.summary.scalar('losses/rpn/location_loss', rpn_location_loss)
        tf.summary.scalar('losses/rpn/cls_loss', rpn_classification_loss)
        tf.summary.scalar('losses/rpn/total_loss', rpn_net_loss)

        # fast rcnn prediction boxes
        tf.summary.image('images/fast_rcnn/prediction_boxes',
                         fast_rcnn_prediction_in_image)

        # fast loss part
        tf.summary.scalar('losses/fast_rcnn/location_loss',
                          fast_rcnn_boxes_loss)
        tf.summary.scalar('losses/fast_rcnn/cls_loss', fast_rcnn_cls_loss)
        tf.summary.scalar('losses/fast_rcnn/total_loss', fast_rcnn_total_loss)
        tf.summary.scalar('losses/total_loss', total_loss)
        tf.summary.scalar('learing_rate', lr)

        if debug:
            # bcckbone network
            for key in end_point.keys():
                tf.summary.histogram('value/' + key, end_point[key])
            # weights
            for weight in slim.get_model_variables():
                tf.summary.histogram('weight/' + weight.name, weight.value())
            # rpn anchor
            image_with_anchor_list = debug_rpn.debug_rpn(rpn_net, img)
            for i, image_with_anchor in enumerate(image_with_anchor_list):
                tf.summary.image('anchors/image_with_anchors_' + str(i),
                                 image_with_anchor[0])
            # fast rcnn prediction
            tf.summary.tensor_summary('image_shape', tf.shape(img))
            tf.summary.tensor_summary('fast_rcnn_prediction_boxes',
                                      fast_rcnn_decode_boxes)

        summary_op = tf.summary.merge_all()
        summary_path = cfg.SUMMARY_PATH
        check_and_create_paths([summary_path])
        ################
        # session part #
        ################

        init_op = tf.group(tf.local_variables_initializer(),
                           tf.global_variables_initializer())

        checkpoint_path, restorer = get_restorer()

        with tf.Session() as sess:

            # initial part
            sess.run(init_op)
            sess.run(iterator.initializer)
            summary_writer = tf.summary.FileWriter(summary_path,
                                                   graph=sess.graph)
            saver = tf.train.Saver()
            if checkpoint_path:
                restorer.restore(sess, checkpoint_path)
                print('restore is done!!!')
            step = 0
            while True:
                try:
                    if step >= 30:
                        break
                    training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                                  time.localtime(time.time()))
                    start_time = time.time()
                    _global_step,\
                        _img_name,\
                        _rpn_location_loss,\
                        _rpn_classification_loss,\
                        _rpn_net_loss,\
                        _fast_rcnn_boxes_loss,\
                        _fast_rcnn_cls_loss,\
                        _fast_rcnn_total_loss,\
                        _total_loss,\
                        _train_op,\
                        summary_str\
                        = sess.run([global_step,
                                    img_name,
                                    rpn_location_loss,
                                    rpn_classification_loss,
                                    rpn_net_loss,
                                    fast_rcnn_boxes_loss,
                                    fast_rcnn_cls_loss,
                                    fast_rcnn_total_loss,
                                    total_loss,
                                    train_op,
                                    summary_op])
                    end_time = time.time()

                    # print the result in screen
                    if 1:  # step % 10 == 0:
                        cost_time = end_time - start_time
                        print(
                            """-----time:%s---step:%d---image name:%s---cost_time:%.4fs-----\n
                        total_loss:%.4f\n
                        rpn_boxes_loss:%.4f         rpn_class_loss:%.4f         rpn_total_loss:%.4f\n
                        fast_rcnn_boxes_loss:%.4f   fast_rcnn_class_loss:%.4f   fast_rcnn_total_loss:%4f"""
                            % (training_time, _global_step, str(_img_name),
                               cost_time, _total_loss, _rpn_location_loss,
                               _rpn_classification_loss, _rpn_net_loss,
                               _fast_rcnn_boxes_loss, _fast_rcnn_cls_loss,
                               _fast_rcnn_total_loss))
                    # add summary
                    if step % 10 == 0:
                        # summary_str = sess.run(summary_op)
                        summary_writer.add_summary(summary_str, step)
                        summary_writer.flush()
                    # save ckpt
                    if step % 10000 == 0 and step > 1:
                        check_and_create_paths([cfg.CKPT_PATH])
                        save_path = os.path.join(cfg.CKPT_PATH,
                                                 'model_weights')
                        saver.save(sess, save_path, global_step)
                    step += 1
                except tf.errors.OutOfRangeError:
                    break
            summary_writer.close()
    def get_rois(self):
        '''
        1)get roi from feature map
        2)roi align or roi pooling. Here is roi align
        :return:
        all_level_rois: [N, 7, 7, C]
        all_level_proposals : [N, 5]
        all_level_proposals is matched with all_level_rois

        因为产生rois的时候打乱了self.first_stage_decode_boxes的顺序, 而到时候解码的时候应该让rois和正确的reference box对应,
        所以要重新产生一个匹配的all_level_proposals
        '''
        levels = self.assign_level()

        all_level_roi_list = []
        all_level_proposal_rotate_list = []
        all_level_proposal_horizontal_list = []

        with tf.variable_scope('crop_roi_and_roi_align'):
            for i in range(self.min_level, self.max_level + 1):
                level_i_proposal_indices = tf.reshape(tf.where(tf.equal(levels, i)), [-1])
                level_i_rotate_proposals = tf.gather(self.rpn_proposals_boxes, level_i_proposal_indices)

                level_i_rotate_proposals = tf.cond(
                    tf.equal(tf.shape(level_i_rotate_proposals)[0], 0),
                    lambda: tf.constant([[0, 0, 1, 1, -90]], dtype=tf.float32),
                    lambda: level_i_rotate_proposals
                )  # to avoid level_i_proposals batch-size is 0, or this project will be broken when BP gradients

                all_level_proposal_rotate_list.append(level_i_rotate_proposals)

                level_i_horizon_proposals = get_horizon_minAreaRectangle(level_i_rotate_proposals, False)
                level_i_horizon_proposals = clip_boxes_to_img_boundaries(level_i_horizon_proposals,
                                                                         img_shape=self.img_shape)

                xmin, ymin, xmax, ymax = tf.unstack(level_i_horizon_proposals, axis=1)

                h = tf.maximum(ymax-ymin, 0)
                w = tf.maximum(xmax-xmin, 0)
                x_c = (xmax+xmin) // 2
                y_c = (ymax+ymin) // 2
                theta = tf.ones_like(h) * -90
                level_i_horizontal_proposals = tf.transpose(tf.stack([x_c, y_c, h, w, theta]))
                all_level_proposal_horizontal_list.append(level_i_horizontal_proposals)

                img_h, img_w = tf.cast(self.img_shape[1], tf.float32), tf.cast(self.img_shape[2], tf.float32)
                normalize_ymin = ymin / img_h
                normalize_xmin = xmin / img_w
                normalize_ymax = ymax / img_h
                normalize_xmax = xmax / img_w

                level_i_cropped_rois = tf.image.crop_and_resize(self.feature_pyramid['P%d' % i],
                                                                boxes=tf.transpose(tf.stack([normalize_ymin, normalize_xmin,
                                                                                             normalize_ymax, normalize_xmax])),
                                                                box_ind=tf.zeros(shape=[tf.shape(level_i_rotate_proposals)[0], ],
                                                                                 dtype=tf.int32),
                                                                crop_size=[self.roi_size, self.roi_size],
                                                                name='CROP_AND_RESIZE'
                                                                )
                if cfgs.USE_MASK:
                    '''
                    RRPN, affine rotation. We implement it with rotated mask.
                    '''
                    roi_mask = tf_wrapper.get_mask_tf(level_i_rotate_proposals, self.roi_size)  # [<300, 14, 14]

                    roi_mask = tf.stack([roi_mask for _ in range(256)], axis=3)
                    level_i_cropped_rois = level_i_cropped_rois * roi_mask

                level_i_rois = slim.max_pool2d(level_i_cropped_rois,
                                               [self.roi_pool_kernel_size, self.roi_pool_kernel_size],
                                               stride=self.roi_pool_kernel_size)
                all_level_roi_list.append(level_i_rois)

            all_level_rois = tf.concat(all_level_roi_list, axis=0)
            all_level_rotate_proposals = tf.concat(all_level_proposal_rotate_list, axis=0)
            all_level_horizontal_proposals = tf.concat(all_level_proposal_horizontal_list, axis=0)

            return all_level_rois, all_level_rotate_proposals, all_level_horizontal_proposals
    def postprocess_fastrcnn(self, rois, bbox_ppred, scores, img_shape):
        '''

        :param rois:[-1, 4]
        :param bbox_ppred: [-1, (cfgs.Class_num+1) * 4]
        :param scores: [-1, cfgs.Class_num + 1]
        :return:
        '''

        with tf.name_scope('postprocess_fastrcnn'):
            rois = tf.stop_gradient(rois)
            scores = tf.stop_gradient(scores)
            bbox_ppred = tf.reshape(bbox_ppred, [-1, cfgs.CLASS_NUM + 1, 4])
            bbox_ppred = tf.stop_gradient(bbox_ppred)

            bbox_pred_list = tf.unstack(bbox_ppred, axis=1)
            score_list = tf.unstack(scores, axis=1)

            allclasses_boxes = []
            allclasses_scores = []
            categories = []
            for i in range(1, cfgs.CLASS_NUM + 1):

                # 1. decode boxes in each class
                tmp_encoded_box = bbox_pred_list[i]
                tmp_score = score_list[i]
                tmp_decoded_boxes = encode_and_decode.decode_boxes(
                    encoded_boxes=tmp_encoded_box,
                    reference_boxes=rois,
                    scale_factors=cfgs.ROI_SCALE_FACTORS)
                # tmp_decoded_boxes = encode_and_decode.decode_boxes(boxes=rois,
                #                                                    deltas=tmp_encoded_box,
                #                                                    scale_factor=cfgs.ROI_SCALE_FACTORS)

                # 2. clip to img boundaries
                tmp_decoded_boxes = boxes_utils.clip_boxes_to_img_boundaries(
                    decode_boxes=tmp_decoded_boxes, img_shape=img_shape)

                # 3. NMS
                keep = tf.image.non_max_suppression(
                    boxes=tmp_decoded_boxes,
                    scores=tmp_score,
                    max_output_size=cfgs.FAST_RCNN_NMS_MAX_BOXES_PER_CLASS,
                    iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD)

                perclass_boxes = tf.gather(tmp_decoded_boxes, keep)
                perclass_scores = tf.gather(tmp_score, keep)

                allclasses_boxes.append(perclass_boxes)
                allclasses_scores.append(perclass_scores)
                categories.append(tf.ones_like(perclass_scores) * i)

            final_boxes = tf.concat(allclasses_boxes, axis=0)
            final_scores = tf.concat(allclasses_scores, axis=0)
            final_category = tf.concat(categories, axis=0)

            if self.is_training:
                '''
                in training. We should show the detecitons in the tensorboard. So we add this.
                '''
                kept_indices = tf.reshape(
                    tf.where(
                        tf.greater_equal(final_scores,
                                         cfgs.SHOW_SCORE_THRSHOLD)), [-1])

                final_boxes = tf.gather(final_boxes, kept_indices)
                final_scores = tf.gather(final_scores, kept_indices)
                final_category = tf.gather(final_category, kept_indices)

        return final_boxes, final_scores, final_category
    def fast_rcnn_loss(self):
        '''
        :return:
        '''
        with tf.variable_scope('fast_rcnn_loss'):
            minibatch_indices, minibatch_gtboxes, minibatch_onehot_label, minibatch_object_mask = self.make_minibatch(
            )

            minibatch_proposal_boxes = tf.gather(self.rois_boxes,
                                                 minibatch_indices)
            minibatch_predict_scores = tf.gather(self.fast_rcnn_cls_scores,
                                                 minibatch_indices)
            minibatch_predict_encode_boxes = tf.gather(
                self.fast_rcnn_encode_boxes, minibatch_indices)

            # encode minibatch_gtboxes
            minibatch_encode_gtboxes = encode_boxes(
                anchors=minibatch_proposal_boxes,
                gtboxes=minibatch_gtboxes,
                scale_factors=self.scale_factors)

            # [minibatch_size, 4]->[minibatch_size, num_cls*4]
            minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes,
                                               [1, self.num_cls])

            # class_weight_mask [minibatch_size, num_cls*4]
            class_weight_mask_list = []
            category_list = tf.unstack(minibatch_onehot_label, axis=1)
            for i in range(1, self.num_cls + 1):
                class_weight = tf.ones([self.fast_rcnn_minibatch_size, 4],
                                       dtype=tf.float32)
                class_weight = class_weight * tf.expand_dims(category_list[i],
                                                             axis=1)
                class_weight_mask_list.append(class_weight)

            class_weight_mask = tf.concat(class_weight_mask_list, axis=1)

            # cls loss
            with tf.variable_scope('fast_rcnn_cls_losses'):
                fast_rcnn_cls_loss = slim.losses.softmax_cross_entropy(
                    logits=minibatch_predict_scores,
                    onehot_labels=minibatch_onehot_label)

            # boxes loss
            with tf.variable_scope('fast_rcnn_boxes_losses'):
                fast_rcnn_boxes_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_predict_encode_boxes,
                    gtboxes=minibatch_encode_gtboxes,
                    object_weights=minibatch_object_mask,
                    classes_weights=class_weight_mask)
                slim.losses.add_loss(fast_rcnn_boxes_loss)
            # check loss and decode boxes
            # summary positive proposals and negative proposals
            minibatch_proposal_boxes = boxes_utils.clip_boxes_to_img_boundaries(
                minibatch_proposal_boxes, self.img_shape)
            minibatch_positive_proposals = \
                draw_box_with_tensor(img_batch=self.img_batch,
                                     boxes=minibatch_proposal_boxes*tf.expand_dims(tf.cast(minibatch_object_mask,
                                                                                           tf.float32),
                                                                                   1),
                                     text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 1)))[0]
                                     )

            minibatch_negative_mask = tf.cast(
                tf.logical_not(tf.cast(minibatch_object_mask, tf.bool)),
                tf.float32)
            minibatch_negative_proposals = \
                draw_box_with_tensor(img_batch=self.img_batch,
                                     boxes=minibatch_proposal_boxes * tf.expand_dims(minibatch_negative_mask, 1),
                                     text=tf.shape(tf.where(tf.equal(minibatch_negative_mask, 1)))[0]
                                     )
            tf.summary.image('minibatch_positive_proposals',
                             minibatch_positive_proposals)
            tf.summary.image('minibatch_negative_proposal',
                             minibatch_negative_proposals)
            # check the cls tensor part
            tf.summary.tensor_summary('minibatch_object_mask',
                                      minibatch_object_mask)
            tf.summary.tensor_summary('class_weight_mask', class_weight_mask)
            tf.summary.tensor_summary('minibatch_predict_encode_boxes',
                                      minibatch_predict_encode_boxes)
            tf.summary.tensor_summary('minibatch_encode_gtboxes',
                                      minibatch_encode_gtboxes)
            tf.summary.tensor_summary('location_loss', fast_rcnn_boxes_loss)
            tf.summary.tensor_summary('logits', minibatch_predict_scores)
            tf.summary.tensor_summary('one_hot', minibatch_onehot_label)

        return fast_rcnn_boxes_loss, fast_rcnn_cls_loss
Exemple #20
0
        def batch_slice_head_proposals(rpn_proposal_bbox, encode_boxes,
                                       categories, scores, image_height,
                                       image_width):
            """
            mutilclass NMS
            :param rpn_proposal_bbox: (N, 4)
            :param encode_boxes: (N, 4)
            :param categories:(N, )
            :param scores: (N, )
            :param image_window:(y1, x1, y2, x2) the boundary of image
            :return:
            detection_boxes_scores_labels : (-1, 6)[y1, x1, y2, x2, scores, labels]
            """
            with tf.name_scope('head_proposals'):
                # trim the zero graph
                rpn_proposal_bbox, non_zeros = boxes_utils.trim_zeros_graph(
                    rpn_proposal_bbox, name="trim_proposals_detection")
                encode_boxes = tf.boolean_mask(encode_boxes, non_zeros)
                categories = tf.boolean_mask(categories, non_zeros)
                scores = tf.boolean_mask(scores, non_zeros)
                fast_rcnn_decode_boxes = encode_and_decode.decode_boxes(
                    encode_boxes=encode_boxes,
                    reference_boxes=rpn_proposal_bbox,
                    scale_factors=cfgs.BBOX_STD_DEV)
                fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
                    fast_rcnn_decode_boxes, image_height, image_width)

                # remove the background
                keep = tf.cast(tf.where(categories > 0)[:, 0], tf.int32)
                if cfgs.DEBUG:
                    print_categories = tf.gather(categories, keep)
                    print_scores = tf.gather(scores, keep)
                    num_item = tf.minimum(tf.shape(print_scores)[0], 100)
                    print_scores_vision, print_index = tf.nn.top_k(
                        print_scores, k=num_item)
                    print_categories_vision = tf.gather(
                        print_categories, print_index)
                    boxes_utils.print_tensors(print_categories_vision,
                                              "categories")
                    boxes_utils.print_tensors(print_scores_vision, "scores")
                # Filter out low confidence boxes
                if cfgs.FINAL_SCORE_THRESHOLD:  # 0.7
                    conf_keep = tf.cast(
                        tf.where(scores >= cfgs.FINAL_SCORE_THRESHOLD)[:, 0],
                        tf.int32)
                    keep = tf.sets.set_intersection(
                        tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0))
                    keep = tf.sparse_tensor_to_dense(keep)[0]

                pre_nms_class_ids = tf.gather(categories, keep)
                pre_nms_scores = tf.gather(scores, keep)
                pre_nms_rois = tf.gather(fast_rcnn_decode_boxes, keep)
                unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0]

                def nms_keep_map(class_id):
                    """Apply Non-Maximum Suppression on ROIs of the given class."""
                    # Indices of ROIs of the given class
                    ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0]
                    # Apply NMS
                    class_keep = tf.image.non_max_suppression(
                        tf.gather(pre_nms_rois, ixs),
                        tf.gather(pre_nms_scores, ixs),
                        max_output_size=cfgs.DETECTION_MAX_INSTANCES,  # 最多200条
                        iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD
                    )  # 0.3 太高就过滤完了
                    # Map indicies
                    class_keep = tf.gather(keep, tf.gather(ixs, class_keep))
                    # Pad with -1 so returned tensors have the same shape
                    gap = cfgs.DETECTION_MAX_INSTANCES - tf.shape(
                        class_keep)[0]
                    class_keep = tf.pad(class_keep, [(0, gap)],
                                        mode='CONSTANT',
                                        constant_values=-1)
                    # Set shape so map_fn() can infer result shape
                    class_keep.set_shape([cfgs.DETECTION_MAX_INSTANCES])
                    return class_keep

                # 2. Map over class IDs
                nms_keep = tf.map_fn(nms_keep_map,
                                     unique_pre_nms_class_ids,
                                     dtype=tf.int32)
                # 3. Merge results into one list, and remove -1 padding
                nms_keep = tf.reshape(nms_keep, [-1])
                nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0])
                # 4. Compute intersection between keep and nms_keep
                keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
                                                tf.expand_dims(nms_keep, 0))
                keep = tf.sparse_tensor_to_dense(keep)[0]
                # Keep top detections
                roi_count = cfgs.DETECTION_MAX_INSTANCES
                class_scores_keep = tf.gather(scores, keep)
                num_keep = tf.minimum(
                    tf.shape(class_scores_keep)[0], roi_count)
                top_ids = tf.nn.top_k(class_scores_keep,
                                      k=num_keep,
                                      sorted=True)[1]
                keep = tf.gather(keep, top_ids)

                # Arrange output as [N, (y1, x1, y2, x2, class_id, score)]
                # Coordinates are normalized.
                detections = tf.concat([
                    tf.gather(fast_rcnn_decode_boxes, keep),
                    tf.to_float(tf.gather(categories, keep))[..., tf.newaxis],
                    tf.gather(scores, keep)[..., tf.newaxis]
                ],
                                       axis=1)

                # Pad with zeros if detections < DETECTION_MAX_INSTANCES
                gap = cfgs.DETECTION_MAX_INSTANCES - tf.shape(detections)[0]
                detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT")

                return detections
Exemple #21
0
    def get_rois(self):
        '''
        1)get roi from feature map
        2)roi align or roi pooling. Here is roi align
        :return:
        all_level_rois: [N, 7, 7, C]
        all_level_proposals : [N, 5]
        all_level_proposals is matched with all_level_rois

        因为产生rois的时候打乱了self.first_stage_decode_boxes的顺序, 而到时候解码的时候应该让rois和正确的reference box对应,
        所以要重新产生一个匹配的all_level_proposals
        '''
        levels = self.assign_level()

        all_level_roi_list = []
        all_level_proposal_rotate_list = []
        all_level_proposal_horizontal_list = []

        with tf.variable_scope('crop_roi_and_roi_align'):
            for i in range(self.min_level, self.max_level + 1):
                level_i_proposal_indices = tf.reshape(
                    tf.where(tf.equal(levels, i)), [-1])
                level_i_rotate_proposals = tf.gather(self.rpn_proposals_boxes,
                                                     level_i_proposal_indices)

                level_i_rotate_proposals = tf.cond(
                    tf.equal(tf.shape(level_i_rotate_proposals)[0], 0),
                    lambda: tf.constant([[0, 0, 1, 1, -90]], dtype=tf.float32),
                    lambda: level_i_rotate_proposals
                )  # to avoid level_i_proposals batch-size is 0, or this project will be broken when BP gradients

                all_level_proposal_rotate_list.append(level_i_rotate_proposals)

                level_i_horizon_proposals = get_horizon_minAreaRectangle(
                    level_i_rotate_proposals, False)
                level_i_horizon_proposals = clip_boxes_to_img_boundaries(
                    level_i_horizon_proposals, img_shape=self.img_shape)

                xmin, ymin, xmax, ymax = tf.unstack(level_i_horizon_proposals,
                                                    axis=1)

                h = tf.maximum(ymax - ymin, 0)
                w = tf.maximum(xmax - xmin, 0)
                x_c = (xmax + xmin) // 2
                y_c = (ymax + ymin) // 2
                theta = tf.ones_like(h) * -90
                level_i_horizontal_proposals = tf.transpose(
                    tf.stack([x_c, y_c, h, w, theta]))
                all_level_proposal_horizontal_list.append(
                    level_i_horizontal_proposals)

                img_h, img_w = tf.cast(self.img_shape[1], tf.float32), tf.cast(
                    self.img_shape[2], tf.float32)
                normalize_ymin = ymin / img_h
                normalize_xmin = xmin / img_w
                normalize_ymax = ymax / img_h
                normalize_xmax = xmax / img_w

                level_i_cropped_rois = tf.image.crop_and_resize(
                    self.feature_pyramid['P%d' % i],
                    boxes=tf.transpose(
                        tf.stack([
                            normalize_ymin, normalize_xmin, normalize_ymax,
                            normalize_xmax
                        ])),
                    box_ind=tf.zeros(shape=[
                        tf.shape(level_i_rotate_proposals)[0],
                    ],
                                     dtype=tf.int32),
                    crop_size=[self.roi_size, self.roi_size],
                    name='CROP_AND_RESIZE')
                if cfgs.USE_MASK:
                    '''
                    RRPN, affine rotation. We implement it with rotated mask.
                    '''
                    roi_mask = tf_wrapper.get_mask_tf(
                        level_i_rotate_proposals,
                        self.roi_size)  # [<300, 14, 14]

                    roi_mask = tf.stack([roi_mask for _ in range(256)], axis=3)
                    level_i_cropped_rois = level_i_cropped_rois * roi_mask

                level_i_rois = slim.max_pool2d(
                    level_i_cropped_rois,
                    [self.roi_pool_kernel_size, self.roi_pool_kernel_size],
                    stride=self.roi_pool_kernel_size)
                all_level_roi_list.append(level_i_rois)

            all_level_rois = tf.concat(all_level_roi_list, axis=0)
            all_level_rotate_proposals = tf.concat(
                all_level_proposal_rotate_list, axis=0)
            all_level_horizontal_proposals = tf.concat(
                all_level_proposal_horizontal_list, axis=0)

            return all_level_rois, all_level_rotate_proposals, all_level_horizontal_proposals