Example #1
0
    def rpn_losses(self):
        with tf.variable_scope('rpn_losses'):
            minibatch_indices, minibatch_anchor_matched_gtboxes, object_mask, minibatch_labels_one_hot = \
                self.make_minibatch(self.anchors)

            minibatch_anchors = tf.gather(self.anchors, minibatch_indices)
            minibatch_encode_boxes = tf.gather(self.rpn_encode_boxes,
                                               minibatch_indices)
            minibatch_boxes_scores = tf.gather(self.rpn_scores,
                                               minibatch_indices)

            # encode gtboxes
            minibatch_encode_gtboxes = encode_and_decode.encode_boxes(
                unencode_boxes=minibatch_anchor_matched_gtboxes,
                reference_boxes=minibatch_anchors,
                scale_factors=self.scale_factors)

            positive_anchors_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_anchors * tf.expand_dims(object_mask, 1),
                text=tf.shape(tf.where(tf.equal(object_mask, 1.0)))[0])

            negative_mask = tf.cast(
                tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32)
            negative_anchors_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_anchors * tf.expand_dims(negative_mask, 1),
                text=tf.shape(tf.where(tf.equal(object_mask, 0.0)))[0])

            minibatch_decode_boxes = encode_and_decode.decode_boxes(
                encode_boxes=minibatch_encode_boxes,
                reference_boxes=minibatch_anchors,
                scale_factors=self.scale_factors)

            tf.summary.image('/positive_anchors', positive_anchors_in_img)
            tf.summary.image('/negative_anchors', negative_anchors_in_img)
            top_k_scores, top_k_indices = tf.nn.top_k(
                minibatch_boxes_scores[:, 1], k=5)

            top_detections_in_img = draw_box_with_color(
                self.img_batch,
                tf.gather(minibatch_decode_boxes, top_k_indices),
                text=tf.shape(top_k_scores)[0])
            tf.summary.image('/top_5', top_detections_in_img)

            # losses
            with tf.variable_scope('rpn_location_loss'):
                location_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_encode_boxes,
                    gtboxes=minibatch_encode_gtboxes,
                    object_weights=object_mask)
                slim.losses.add_loss(
                    location_loss)  # add smooth l1 loss to losses collection

            with tf.variable_scope('rpn_classification_loss'):
                classification_loss = slim.losses.softmax_cross_entropy(
                    logits=minibatch_boxes_scores,
                    onehot_labels=minibatch_labels_one_hot)

            return location_loss, classification_loss
Example #2
0
def compute_targets(ex_rois, gt_rois):
    """Compute bounding-box regression targets for an image."""
    # targets = bbox_transform(ex_rois, gt_rois[:, :4]).astype(
    #     np.float32, copy=False)
    targets = encode_and_decode.encode_boxes(
        unencode_boxes=gt_rois,
        reference_boxes=ex_rois,
        scale_factors=cfgs.ANCHOR_SCALE_FACTORS)
    return targets
Example #3
0
    def rpn_losses(self):
        with tf.variable_scope('rpn_losses'):
            minibatch_indices, minibatch_anchor_matched_gtboxes, \
            object_mask, minibatch_labels_one_hot = self.make_minibatch(self.anchors)

            minibatch_anchors = tf.gather(self.anchors, minibatch_indices)
            minibatch_encode_boxes = tf.gather(self.rpn_encode_boxes, minibatch_indices)
            minibatch_boxes_scores = tf.gather(self.rpn_scores, minibatch_indices)

            # encode gtboxes
            minibatch_encode_gtboxes = encode_and_decode.encode_boxes(unencode_boxes=minibatch_anchor_matched_gtboxes,
                                                                      reference_boxes=minibatch_anchors,
                                                                      scale_factors=self.scale_factors)

            positive_anchors_in_img = draw_box_with_color(self.img_batch,
                                                          minibatch_anchors * tf.expand_dims(object_mask, 1),
                                                          text=tf.shape(tf.where(tf.equal(object_mask, 1.0)))[0])

            negative_mask = tf.cast(tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32)
            negative_anchors_in_img = draw_box_with_color(self.img_batch,
                                                          minibatch_anchors * tf.expand_dims(negative_mask, 1),
                                                          text=tf.shape(tf.where(tf.equal(object_mask, 0.0)))[0])

            minibatch_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=minibatch_encode_boxes,
                                                                    reference_boxes=minibatch_anchors,
                                                                    scale_factors=self.scale_factors)

            tf.summary.image('/positive_anchors', positive_anchors_in_img)
            tf.summary.image('/negative_anchors', negative_anchors_in_img)

            minibatch_boxes_softmax_scores = tf.gather(slim.softmax(self.rpn_scores), minibatch_indices)
            top_k_scores, top_k_indices = tf.nn.top_k(minibatch_boxes_softmax_scores[:, 1], k=20)

            top_k_boxes = tf.gather(minibatch_decode_boxes, top_k_indices)
            top_detections_in_img = draw_boxes_with_scores(self.img_batch,
                                                           boxes=top_k_boxes,
                                                           scores=top_k_scores)

            tf.summary.image('/top_20', top_detections_in_img)

            temp_indices = tf.reshape(tf.where(tf.greater(top_k_scores, cfgs.FINAL_SCORE_THRESHOLD)), [-1])
            rpn_predict_boxes = tf.gather(top_k_boxes, temp_indices)
            rpn_predict_scores = tf.gather(top_k_scores, temp_indices)

            # losses
            with tf.variable_scope('rpn_location_loss'):
                location_loss = losses.l1_smooth_losses(predict_boxes=minibatch_encode_boxes,
                                                        gtboxes=minibatch_encode_gtboxes,
                                                        object_weights=object_mask)
                slim.losses.add_loss(location_loss)  # add smooth l1 loss to losses collection

            with tf.variable_scope('rpn_classification_loss'):
                classification_loss = slim.losses.softmax_cross_entropy(logits=minibatch_boxes_scores,
                                                                        onehot_labels=minibatch_labels_one_hot)

            return location_loss, classification_loss, rpn_predict_boxes, rpn_predict_scores
    def fast_rcnn_loss(self):
        with tf.variable_scope('fast_rcnn_loss'):
            minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, minibatch_object_mask, \
            minibatch_label_one_hot = self.fast_rcnn_minibatch(self.fast_rcnn_all_level_proposals)

            minibatch_reference_boxes = tf.gather(
                self.fast_rcnn_all_level_proposals, minibatch_indices)

            minibatch_encode_boxes = tf.gather(
                self.fast_rcnn_encode_boxes,
                minibatch_indices)  # [minibatch_size, num_classes*4]
            minibatch_scores = tf.gather(self.fast_rcnn_scores,
                                         minibatch_indices)

            # encode gtboxes
            minibatch_encode_gtboxes = \
                encode_and_decode.encode_boxes(
                    unencode_boxes=minibatch_reference_boxes_mattached_gtboxes,
                    reference_boxes=minibatch_reference_boxes,
                    scale_factors=self.scale_factors
                )

            # [minibatch_size, num_classes*4]
            minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes,
                                               [1, self.num_classes])

            class_weights_list = []
            category_list = tf.unstack(minibatch_label_one_hot, axis=1)
            for i in range(1, self.num_classes + 1):
                tmp_class_weights = tf.ones(
                    shape=[tf.shape(minibatch_encode_boxes)[0], 4],
                    dtype=tf.float32)
                tmp_class_weights = tmp_class_weights * tf.expand_dims(
                    category_list[i], axis=1)
                class_weights_list.append(tmp_class_weights)
            class_weights = tf.concat(
                class_weights_list, axis=1)  # [minibatch_size, num_classes*4]

            # loss
            with tf.variable_scope('fast_rcnn_classification_loss'):
                fast_rcnn_classification_loss = slim.losses.softmax_cross_entropy(
                    logits=minibatch_scores,
                    onehot_labels=minibatch_label_one_hot)
            with tf.variable_scope('fast_rcnn_location_loss'):
                fast_rcnn_location_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_encode_boxes,
                    gtboxes=minibatch_encode_gtboxes,
                    object_weights=minibatch_object_mask,
                    classes_weights=class_weights)
                slim.losses.add_loss(fast_rcnn_location_loss)

            return fast_rcnn_location_loss, fast_rcnn_classification_loss
Example #5
0
def compute_targets(ex_rois, gt_rois):
    """
    Compute bound-box regression targets for an image
    :param ex_rois:
    :param gt_rois:
    :return:
    """
    assert ex_rois.shape[0] == gt_rois.shape[0]
    assert ex_rois.shape[1] == 4
    assert gt_rois.shape[1] == 4

    targets = encode_and_decode.encode_boxes(
        unencode_boxes=gt_rois,
        reference_boxes=ex_rois,
        scale_factors=cfgs.ANCHOR_SCALE_FACTORS)
    return targets
def _compute_targets_h(ex_rois, gt_rois_h, labels):
    """Compute bounding-box regression targets for an image.
    that is : [label, tx, ty, tw, th]
    """

    assert ex_rois.shape[0] == gt_rois_h.shape[0]
    assert ex_rois.shape[1] == 4
    assert gt_rois_h.shape[1] == 4

    targets_h = encode_and_decode.encode_boxes(unencode_boxes=gt_rois_h,
                                               reference_boxes=ex_rois,
                                               scale_factors=cfgs.ROI_SCALE_FACTORS)
    # targets = encode_and_decode.encode_boxes(ex_rois=ex_rois,
    #                                          gt_rois=gt_rois,
    #                                          scale_factor=cfgs.ROI_SCALE_FACTORS)

    return np.hstack((labels[:, np.newaxis], targets_h)).astype(np.float32, copy=False)
    def fast_rcnn_loss(self):
        with tf.variable_scope('fast_rcnn_loss'):
            minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, minibatch_object_mask, \
                minibatch_label_one_hot = self.fast_rcnn_minibatch(self.fast_rcnn_all_level_proposals)

            minibatch_reference_boxes = tf.gather(
                self.fast_rcnn_all_level_proposals, minibatch_indices)

            minibatch_encode_boxes = tf.gather(
                self.fast_rcnn_encode_boxes,
                minibatch_indices)  # [minibatch_size, num_classes*4]
            minibatch_scores = tf.gather(self.fast_rcnn_scores,
                                         minibatch_indices)

            positive_proposals_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_reference_boxes *
                tf.expand_dims(minibatch_object_mask, 1),
                text=tf.shape(tf.where(tf.equal(minibatch_object_mask,
                                                1.0)))[0])

            negative_mask = tf.cast(
                tf.logical_not(tf.cast(minibatch_object_mask, tf.bool)),
                tf.float32)
            negative_proposals_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_reference_boxes * tf.expand_dims(negative_mask, 1),
                text=tf.shape(tf.where(tf.equal(minibatch_object_mask,
                                                0.0)))[0])

            tf.summary.image('/positive_proposals', positive_proposals_in_img)
            tf.summary.image('/negative_proposals', negative_proposals_in_img)

            if cfgs.CLASS_NUM == 1:
                minibatch_decode_boxes = encode_and_decode.decode_boxes(
                    encode_boxes=minibatch_encode_boxes,
                    reference_boxes=minibatch_reference_boxes,
                    scale_factors=self.scale_factors)

                minibatch_softmax_scores = tf.gather(
                    slim.softmax(self.fast_rcnn_scores), minibatch_indices)
                top_k_scores, top_k_indices = tf.nn.top_k(
                    minibatch_softmax_scores[:, 1], k=5)

                top_detections_in_img = draw_boxes_with_scores(
                    self.img_batch,
                    boxes=tf.gather(minibatch_decode_boxes, top_k_indices),
                    scores=top_k_scores)
                tf.summary.image('/top_5', top_detections_in_img)

            # encode gtboxes
            minibatch_encode_gtboxes = \
                encode_and_decode.encode_boxes(
                    unencode_boxes=minibatch_reference_boxes_mattached_gtboxes,
                    reference_boxes=minibatch_reference_boxes,
                    scale_factors=self.scale_factors
                )

            # [minibatch_size, num_classes*4]
            minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes,
                                               [1, self.num_classes])

            class_weights_list = []
            category_list = tf.unstack(minibatch_label_one_hot, axis=1)
            for i in range(1, self.num_classes + 1):
                tmp_class_weights = tf.ones(
                    shape=[tf.shape(minibatch_encode_boxes)[0], 4],
                    dtype=tf.float32)
                tmp_class_weights = tmp_class_weights * tf.expand_dims(
                    category_list[i], axis=1)
                class_weights_list.append(tmp_class_weights)
            class_weights = tf.concat(
                class_weights_list, axis=1)  # [minibatch_size, num_classes*4]

            # loss
            with tf.variable_scope('fast_rcnn_classification_loss'):
                fast_rcnn_classification_loss = tf.losses.softmax_cross_entropy(
                    logits=minibatch_scores,
                    onehot_labels=minibatch_label_one_hot)

            with tf.variable_scope('fast_rcnn_location_loss'):
                fast_rcnn_location_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_encode_boxes,
                    gtboxes=minibatch_encode_gtboxes,
                    object_weights=minibatch_object_mask,
                    classes_weights=class_weights)
                tf.losses.add_loss(fast_rcnn_location_loss)

            return fast_rcnn_location_loss, fast_rcnn_classification_loss
Example #8
0
def build_rpn_target(gt_boxes, anchors, config):
    """
    assign anchors targets: object or background.
    :param anchors: (all_anchors, 4)[y1, x1, y2, x2]. use N to represent all_anchors
    :param gt_boxes: (M, 4).
    :param config: the config of making data

    :return:
    """
    with tf.variable_scope('rpn_find_positive_negative_samples'):
        gt_boxes = tf.cast(gt_boxes, tf.float32)
        ious = iou_calculate(anchors, gt_boxes)  # (N, M)

        # an anchor that has an IoU overlap higher than 0.7 with any ground-truth box
        max_iou_each_row = tf.reduce_max(ious, axis=1)
        rpn_labels = tf.ones(shape=[
            tf.shape(anchors)[0],
        ], dtype=tf.float32) * (-1)  # [N, ] # ignored is -1
        matchs = tf.cast(tf.argmax(ious, axis=1), tf.int32)
        positives1 = tf.greater_equal(max_iou_each_row,
                                      config.RPN_IOU_POSITIVE_THRESHOLD)

        # the anchor/anchors with the highest Intersection-over-Union (IoU) overlap with a ground-truth box
        max_iou_each_column = tf.reduce_max(ious, 0)  # (M, )
        positives2 = tf.reduce_sum(tf.cast(tf.equal(ious, max_iou_each_column),
                                           tf.float32),
                                   axis=1)

        positives = tf.logical_or(positives1, tf.cast(positives2, tf.bool))
        rpn_labels += 2 * tf.cast(positives, tf.float32)

        anchors_matched_gtboxes = tf.gather(gt_boxes, matchs)  # [N, 4]

        # background's gtboxes tmp set the first gtbox, it dose not matter, because use object_mask will ignored it
        negatives = tf.less(max_iou_each_row,
                            config.RPN_IOU_NEGATIVE_THRESHOLD)
        rpn_labels = rpn_labels + tf.cast(
            negatives, tf.float32
        )  # [N, ] positive is >=1.0, negative is 0, ignored is -1.0
        '''
        Need to note: when positive, labels may >= 1.0.
        Because, when all the iou< 0.7, we set anchors having max iou each column as positive.
        these anchors may have iou < 0.3.
        In the begining, labels is [-1, -1, -1...-1]
        then anchors having iou<0.3 as well as are max iou each column will be +1.0.
        when decide negatives, because of iou<0.3, they add 1.0 again.
        So, the final result will be 2.0

        So, when opsitive, labels may in [1.0, 2.0]. that is labels >=1.0
        '''
        positives = tf.cast(tf.greater_equal(rpn_labels, 1.0), tf.float32)
        ignored = tf.cast(tf.equal(rpn_labels, -1.0), tf.float32) * -1

        rpn_labels = positives + ignored

    with tf.variable_scope('rpn_minibatch'):
        # random choose the positive objects
        positive_indices = tf.reshape(tf.where(tf.equal(
            rpn_labels, 1.0)), [-1])  # use labels is same as object_mask
        num_of_positives = tf.minimum(
            tf.shape(positive_indices)[0],
            tf.cast(config.RPN_MINIBATCH_SIZE * config.RPN_POSITIVE_RATE,
                    tf.int32))
        positive_indices = tf.random_shuffle(positive_indices)
        positive_indices = tf.slice(positive_indices,
                                    begin=[0],
                                    size=[num_of_positives])
        # random choose the negative objects
        negatives_indices = tf.reshape(tf.where(tf.equal(rpn_labels, 0.0)),
                                       [-1])
        num_of_negatives = tf.minimum(
            config.RPN_MINIBATCH_SIZE - num_of_positives,
            tf.shape(negatives_indices)[0])
        negatives_indices = tf.random_shuffle(negatives_indices)
        negatives_indices = tf.slice(negatives_indices,
                                     begin=[0],
                                     size=[num_of_negatives])

        minibatch_indices = tf.concat([positive_indices, negatives_indices],
                                      axis=0)

        # padding the negative objects if need
        gap = config.RPN_MINIBATCH_SIZE - tf.shape(minibatch_indices)[0]
        extract_indices = tf.random_shuffle(negatives_indices)
        extract_indices = tf.slice(extract_indices, begin=[0], size=[gap])
        minibatch_indices = tf.concat([minibatch_indices, extract_indices],
                                      axis=0)

        minibatch_indices = tf.random_shuffle(minibatch_indices)
        # (config.RPN_MINI_BATCH_SIZE, 4)
        minibatch_anchor_matched_gtboxes = tf.gather(anchors_matched_gtboxes,
                                                     minibatch_indices)
        rpn_labels = tf.cast(tf.gather(rpn_labels, minibatch_indices),
                             tf.int32)
        # encode gtboxes
        minibatch_anchors = tf.gather(anchors, minibatch_indices)
        minibatch_encode_gtboxes = encode_and_decode.encode_boxes(
            unencode_boxes=minibatch_anchor_matched_gtboxes,
            reference_boxes=minibatch_anchors,
            dev_factors=config.RPN_BBOX_STD_DEV)
        rpn_labels_one_hot = tf.one_hot(rpn_labels, 2, axis=-1)

    return minibatch_indices, minibatch_encode_gtboxes, rpn_labels_one_hot
    def fast_rcnn_loss(self):
        with tf.variable_scope('fast_rcnn_loss'):
            minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, minibatch_object_mask, \
            minibatch_label_one_hot = self.fast_rcnn_minibatch(self.fast_rcnn_all_level_rotate_proposals) #######################

            # minibatch_reference_boxes = tf.gather(self.fast_rcnn_all_level_horizontal_proposals, minibatch_indices)
            minibatch_reference_boxes = tf.gather(self.fast_rcnn_all_level_rotate_proposals, minibatch_indices)

            minibatch_encode_boxes = tf.gather(self.fast_rcnn_encode_boxes,
                                               minibatch_indices)  # [minibatch_size, num_classes*5]

            minibatch_scores = tf.gather(self.fast_rcnn_scores, minibatch_indices)

            positive_proposals_in_img = draw_box_with_color(self.img_batch,
                                                            minibatch_reference_boxes * tf.expand_dims(
                                                                   minibatch_object_mask, 1),
                                                            text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 1.0)))[0])

            negative_mask = tf.cast(tf.logical_not(tf.cast(minibatch_object_mask, tf.bool)), tf.float32)
            negative_proposals_in_img = draw_box_with_color(self.img_batch,
                                                            minibatch_reference_boxes * tf.expand_dims(negative_mask, 1),
                                                            text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 0.0)))[0])

            tf.summary.image('/positive_proposals', positive_proposals_in_img)
            tf.summary.image('/negative_proposals', negative_proposals_in_img)

            minibatch_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=minibatch_encode_boxes,
                                                                    reference_boxes=minibatch_reference_boxes,
                                                                    scale_factors=self.scale_factors)

            minibatch_softmax_scores = tf.gather(slim.softmax(self.fast_rcnn_scores), minibatch_indices)
            top_k_scores, top_k_indices = tf.nn.top_k(minibatch_softmax_scores[:, 1], k=5)

            top_detections_in_img = draw_boxes_with_scores(self.img_batch,
                                                           boxes=tf.gather(minibatch_decode_boxes, top_k_indices),
                                                           scores=top_k_scores)
            tf.summary.image('/top_5', top_detections_in_img)


            # encode gtboxes
            minibatch_encode_gtboxes = \
                encode_and_decode.encode_boxes(
                    unencode_boxes=minibatch_reference_boxes_mattached_gtboxes,
                    reference_boxes=minibatch_reference_boxes,
                    scale_factors=self.scale_factors)

            # [minibatch_size, num_classes*5]
            minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes, [1, self.num_classes])

            class_weights_list = []
            category_list = tf.unstack(minibatch_label_one_hot, axis=1)
            for i in range(1, self.num_classes+1):
                tmp_class_weights = tf.ones(shape=[tf.shape(minibatch_encode_boxes)[0], 5], dtype=tf.float32)
                tmp_class_weights = tmp_class_weights * tf.expand_dims(category_list[i], axis=1)
                class_weights_list.append(tmp_class_weights)
            class_weights = tf.concat(class_weights_list, axis=1)  # [minibatch_size, num_classes*5]

            # loss
            with tf.variable_scope('fast_rcnn_classification_loss'):
                fast_rcnn_classification_loss = slim.losses.softmax_cross_entropy(logits=minibatch_scores,
                                                                                  onehot_labels=minibatch_label_one_hot)
                # if DEBUG:
                #     print_tensors(minibatch_scores, 'minibatch_scores')
                #     print_tensors(classification_loss, '2nd_cls_loss')
            with tf.variable_scope('fast_rcnn_location_loss'):
                fast_rcnn_location_loss = losses.l1_smooth_losses(predict_boxes=minibatch_encode_boxes,
                                                                  gtboxes=minibatch_encode_gtboxes,
                                                                  object_weights=minibatch_object_mask,
                                                                  classes_weights=class_weights)
                slim.losses.add_loss(fast_rcnn_location_loss)

            return fast_rcnn_location_loss, fast_rcnn_classification_loss
Example #10
0
    def rpn_loss(self):
        '''
        :param: self.gtboxes_and_label: [n, 5]->[ymin, xmin, ymax, xmax, cls]
        :param: self.anchors: [m, 4]-> [ymin, xmin, ymax, xmax]
        :param:self.rpn_encode_boxes: [m, 4]->[ycenter, xcenter, h, w]
        :return:
        '''
        with tf.variable_scope('rpn_loss'):
            minibatch_indices,\
            minibatch_anchor_matched_gtboxes,\
            object_mask,\
            minibatch_label_onehot = self.make_minibatch()

            minibatch_anchors = tf.gather(self.anchors, minibatch_indices)
            minibatch_rpn_encode_boxes = tf.gather(self.rpn_encode_boxes,
                                                   minibatch_indices)
            minibatch_rpn_scores = tf.gather(self.rpn_scores,
                                             minibatch_indices)

            minibatch_encode_boxes_label = encode_and_decode.encode_boxes(
                minibatch_anchors, minibatch_anchor_matched_gtboxes,
                self.scale_factors)
            # summary
            positive_anchors_in_img = draw_box_with_tensor(
                img_batch=self.img_batch,
                boxes=minibatch_anchors * tf.expand_dims(object_mask, 1),
                text=tf.shape(tf.where(tf.equal(object_mask, 1)))[0])
            negative_mask = tf.cast(
                tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32)
            negative_anchors_in_img = draw_box_with_tensor(
                img_batch=self.img_batch,
                boxes=minibatch_anchors * tf.expand_dims(negative_mask, 1),
                text=tf.shape(tf.where(tf.equal(negative_mask, 1)))[0])

            minibatch_decode_anchors = encode_and_decode.decode_boxes(
                encode_boxes=minibatch_rpn_encode_boxes,
                reference_boxes=minibatch_anchors,
                scale_factors=self.scale_factors)
            # clip boxes into image shape
            minibatch_decode_anchors = boxes_utils.clip_boxes_to_img_boundaries(
                minibatch_decode_anchors, tf.shape(self.img_batch))
            positive_decode_anchor_in_img = \
                draw_box_with_tensor(img_batch=self.img_batch,
                                     boxes=minibatch_decode_anchors*tf.expand_dims(object_mask, 1),
                                     text=tf.shape(tf.where(tf.equal(object_mask, 1)))[0]
                                     )

            tf.summary.image('images/rpn/losses/anchors_positive_minibatch',
                             positive_anchors_in_img)
            tf.summary.image('images/rpn/losses/anchors_negative_minibatch',
                             negative_anchors_in_img)
            tf.summary.image('images/rpn/losses/decode_anchor_positive',
                             positive_decode_anchor_in_img)

            # losses
            with tf.variable_scope('rpn_localization_losses'):
                classify_loss = slim.losses.softmax_cross_entropy(
                    logits=minibatch_rpn_scores,
                    onehot_labels=minibatch_label_onehot)

                location_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_rpn_encode_boxes,
                    gtboxes=minibatch_encode_boxes_label,
                    object_weights=object_mask)
                slim.losses.add_loss(
                    location_loss)  # add location loss to losses collections

            return location_loss, classify_loss
Example #11
0
            def batch_slice_build_sample(gtboxes_and_label,
                                         rpn_proposals_boxes):

                with tf.name_scope('select_pos_neg_samples'):
                    gtboxes = tf.cast(
                        tf.reshape(gtboxes_and_label[:, :-1], [-1, 4]),
                        tf.float32)
                    gt_class_ids = tf.cast(
                        tf.reshape(gtboxes_and_label[:, -1], [
                            -1,
                        ]), tf.int32)
                    gtboxes, non_zeros = boxes_utils.trim_zeros_graph(
                        gtboxes, name="trim_gt_box")  # [M, 4]
                    gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros)
                    rpn_proposals_boxes, _ = boxes_utils.trim_zeros_graph(
                        rpn_proposals_boxes, name="trim_rpn_proposal_train")

                    ious = iou.iou_calculate(rpn_proposals_boxes,
                                             gtboxes)  # [N, M]
                    matchs = tf.cast(tf.argmax(ious, axis=1),
                                     tf.int32)  # [N, ]
                    max_iou_each_row = tf.reduce_max(ious, axis=1)
                    positives = tf.cast(
                        tf.greater_equal(
                            max_iou_each_row,
                            cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD), tf.int32)

                    reference_boxes_mattached_gtboxes = tf.gather(
                        gtboxes, matchs)  # [N, 4]
                    gt_class_ids = tf.gather(gt_class_ids, matchs)  # [N, ]
                    object_mask = tf.cast(positives, tf.float32)  # [N, ]
                    # when box is background, not caculate gradient, so give a weight 0 to avoid caculate gradient
                    gt_class_ids = gt_class_ids * positives

                with tf.name_scope('head_train_minibatch'):
                    # choose the positive indices
                    positive_indices = tf.reshape(
                        tf.where(tf.equal(object_mask, 1.)), [-1])
                    num_of_positives = tf.minimum(
                        tf.shape(positive_indices)[0],
                        tf.cast(
                            cfgs.FAST_RCNN_MINIBATCH_SIZE *
                            cfgs.FAST_RCNN_POSITIVE_RATE, tf.int32))
                    positive_indices = tf.random_shuffle(positive_indices)
                    positive_indices = tf.slice(positive_indices,
                                                begin=[0],
                                                size=[num_of_positives])
                    # choose the negative indices,
                    # Strictly propose the proportion of positive and negative is 1:3
                    negative_indices = tf.reshape(
                        tf.where(tf.equal(object_mask, 0.)), [-1])
                    num_of_negatives = tf.cast(int(1. / cfgs.FAST_RCNN_POSITIVE_RATE) * num_of_positives, tf.int32)\
                                       - num_of_positives

                    num_of_negatives = tf.minimum(
                        tf.shape(negative_indices)[0], num_of_negatives)
                    negative_indices = tf.random_shuffle(negative_indices)
                    negative_indices = tf.slice(negative_indices,
                                                begin=[0],
                                                size=[num_of_negatives])

                    minibatch_indices = tf.concat(
                        [positive_indices, negative_indices], axis=0)
                    minibatch_reference_gtboxes = tf.gather(
                        reference_boxes_mattached_gtboxes, minibatch_indices)
                    minibatch_reference_proboxes = tf.gather(
                        rpn_proposals_boxes, minibatch_indices)
                    # encode gtboxes
                    minibatch_encode_gtboxes = \
                        encode_and_decode.encode_boxes(
                            unencode_boxes=minibatch_reference_gtboxes,
                            reference_boxes=minibatch_reference_proboxes,
                            scale_factors=cfgs.BBOX_STD_DEV)
                    object_mask = tf.gather(object_mask, minibatch_indices)
                    gt_class_ids = tf.gather(gt_class_ids, minibatch_indices)

                    # padding if necessary
                    gap = tf.cast(cfgs.FAST_RCNN_MINIBATCH_SIZE -
                                  (num_of_positives + num_of_negatives),
                                  dtype=tf.int32)
                    bbox_padding = tf.zeros((gap, 4))
                    minibatch_reference_proboxes = tf.concat(
                        [minibatch_reference_proboxes, bbox_padding], axis=0)
                    minibatch_encode_gtboxes = tf.concat(
                        [minibatch_encode_gtboxes, bbox_padding], axis=0)
                    object_mask = tf.pad(object_mask, [(0, gap)])
                    gt_class_ids = tf.pad(gt_class_ids, [(0, gap)])

                return minibatch_reference_proboxes, minibatch_encode_gtboxes, object_mask, gt_class_ids
Example #12
0
    def rpn_losses(self):
        with tf.variable_scope('rpn_losses'):
            minibatch_indices, minibatch_anchor_matched_gtboxes, object_mask, minibatch_labels_one_hot = \
                self.make_minibatch(self.anchors)

            minibatch_anchors = tf.gather(self.anchors, minibatch_indices)
            minibatch_encode_boxes = tf.gather(self.rpn_encode_boxes,
                                               minibatch_indices)
            minibatch_boxes_scores = tf.gather(self.rpn_scores,
                                               minibatch_indices)

            # encode gtboxes
            minibatch_encode_gtboxes = encode_and_decode.encode_boxes(
                unencode_boxes=minibatch_anchor_matched_gtboxes,
                reference_boxes=minibatch_anchors,
                scale_factors=self.scale_factors)

            positive_anchors_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_anchors * tf.expand_dims(object_mask, 1),
                text=tf.shape(tf.where(tf.equal(object_mask, 1.0)))[0])

            negative_mask = tf.cast(
                tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32)
            negative_anchors_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_anchors * tf.expand_dims(negative_mask, 1),
                text=tf.shape(tf.where(tf.equal(object_mask, 0.0)))[0])

            minibatch_decode_boxes = encode_and_decode.decode_boxes(
                encode_boxes=minibatch_encode_boxes,
                reference_boxes=minibatch_anchors,
                scale_factors=self.scale_factors)

            tf.summary.image('/positive_anchors', positive_anchors_in_img)
            tf.summary.image('/negative_anchors', negative_anchors_in_img)
            top_k_scores, top_k_indices = tf.nn.top_k(
                minibatch_boxes_scores[:, 1], k=1)

            top_detections_in_img = draw_box_with_color(
                self.img_batch,
                tf.gather(minibatch_decode_boxes, top_k_indices),
                text=tf.shape(top_k_scores)[0])
            tf.summary.image('/top_1', top_detections_in_img)

            # losses
            with tf.variable_scope('rpn_location_loss'):
                location_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_encode_boxes,
                    gtboxes=minibatch_encode_gtboxes,
                    object_weights=object_mask)
                slim.losses.add_loss(
                    location_loss)  # add smooth l1 loss to losses collection

            with tf.variable_scope('rpn_classification_loss'):

                # logits = tf.cast(minibatch_boxes_scores, tf.float32)
                # onehot_labels = tf.cast(minibatch_labels_one_hot, tf.float32)
                # one = tf.ones(shape=tf.shape(onehot_labels), dtype=tf.float32)
                # predictions_pt = tf.where(tf.equal(onehot_labels, 1), logits, 1-logits)
                #
                # # add small value to avoid
                # alpha_t = tf.scalar_mul(0.25, one)
                # alpha_t = tf.where(tf.equal(onehot_labels, 1), alpha_t, 1 - alpha_t)
                # gamma = tf.scalar_mul(2, one)
                # new_gamma = tf.where(tf.less(predictions_pt, 0.5), -gamma, gamma)
                # classification_loss = tf.multiply(tf.multiply(alpha_t, slim.losses.softmax_cross_entropy(logits=logits,
                #                                                   onehot_labels=onehot_labels)), tf.pow((1-predictions_pt), 2))
                # # classification_loss = tf.multiply(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,
                # #                                                    labels=onehot_labels), tf.pow((1-predictions_pt), 2))
                # classification_loss = tf.reduce_sum(classification_loss[:,0]+classification_loss[:,1])
                # # classification_loss = slim.losses.softmax_cross_entropy(logits=tf.clip_by_value(minibatch_boxes_scores,1e-8,tf.reduce_max(minibatch_boxes_scores)),
                # #                                                         onehot_labels=minibatch_labels_one_hot)
                classification_loss = slim.losses.softmax_cross_entropy(
                    logits=minibatch_boxes_scores,
                    onehot_labels=minibatch_labels_one_hot)
            return location_loss, classification_loss
    def fast_rcnn_loss(self):
        with tf.variable_scope('fast_rcnn_loss'):
            minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, minibatch_object_mask, \
            minibatch_label_one_hot = self.fast_rcnn_minibatch(self.fast_rcnn_all_level_proposals)

            minibatch_reference_boxes = tf.gather(
                self.fast_rcnn_all_level_proposals, minibatch_indices)

            minibatch_encode_boxes = tf.gather(
                self.fast_rcnn_encode_boxes,
                minibatch_indices)  # [minibatch_size, num_classes*4]
            minibatch_scores = tf.gather(self.fast_rcnn_scores,
                                         minibatch_indices)

            positive_proposals_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_reference_boxes *
                tf.expand_dims(minibatch_object_mask, 1),
                text=tf.shape(tf.where(tf.equal(minibatch_object_mask,
                                                1.0)))[0])

            negative_mask = tf.cast(
                tf.logical_not(tf.cast(minibatch_object_mask, tf.bool)),
                tf.float32)
            negative_proposals_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_reference_boxes * tf.expand_dims(negative_mask, 1),
                text=tf.shape(tf.where(tf.equal(minibatch_object_mask,
                                                0.0)))[0])

            tf.summary.image('/positive_proposals', positive_proposals_in_img)
            tf.summary.image('/negative_proposals', negative_proposals_in_img)

            if cfgs.CLASS_NUM == 1:
                minibatch_decode_boxes = encode_and_decode.decode_boxes(
                    encode_boxes=minibatch_encode_boxes,
                    reference_boxes=minibatch_reference_boxes,
                    scale_factors=self.scale_factors)

                minibatch_softmax_scores = tf.gather(
                    slim.softmax(self.fast_rcnn_scores), minibatch_indices)
                top_k_scores, top_k_indices = tf.nn.top_k(
                    minibatch_softmax_scores[:, 1], k=5)

                top_detections_in_img = draw_boxes_with_scores(
                    self.img_batch,
                    boxes=tf.gather(minibatch_decode_boxes, top_k_indices),
                    scores=top_k_scores)
                tf.summary.image('/top_5', top_detections_in_img)

            # encode gtboxes
            minibatch_encode_gtboxes = \
                encode_and_decode.encode_boxes(
                    unencode_boxes=minibatch_reference_boxes_mattached_gtboxes,
                    reference_boxes=minibatch_reference_boxes,
                    scale_factors=self.scale_factors
                )

            # [minibatch_size, num_classes*4]
            minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes,
                                               [1, self.num_classes])

            class_weights_list = []
            category_list = tf.unstack(minibatch_label_one_hot, axis=1)
            for i in range(1, self.num_classes + 1):
                tmp_class_weights = tf.ones(
                    shape=[tf.shape(minibatch_encode_boxes)[0], 4],
                    dtype=tf.float32)
                tmp_class_weights = tmp_class_weights * tf.expand_dims(
                    category_list[i], axis=1)
                class_weights_list.append(tmp_class_weights)
            class_weights = tf.concat(
                class_weights_list, axis=1)  # [minibatch_size, num_classes*4]

            # loss
            with tf.variable_scope('fast_rcnn_classification_loss'):

                logits = tf.cast(minibatch_scores, tf.float32)
                onehot_labels = tf.cast(minibatch_label_one_hot, tf.float32)
                one = tf.ones(shape=tf.shape(onehot_labels), dtype=tf.float32)
                predictions_pt = tf.where(tf.equal(onehot_labels, 1), logits,
                                          1 - logits)

                # add small value to avoid
                alpha_t = tf.scalar_mul(0.25, one)
                alpha_t = tf.where(tf.equal(onehot_labels, 1), alpha_t,
                                   1 - alpha_t)
                gamma = tf.scalar_mul(2, one)
                new_gamma = tf.where(tf.less(predictions_pt, 0.5), -gamma,
                                     gamma)
                ##PFL
                fast_rcnn_classification_loss = tf.multiply(
                    tf.multiply(
                        alpha_t,
                        slim.losses.softmax_cross_entropy(
                            logits=logits, onehot_labels=onehot_labels)),
                    tf.pow(1 - predictions_pt, new_gamma))
                ##FL
                # fast_rcnn_classification_loss = tf.multiply(tf.multiply(alpha_t, slim.losses.softmax_cross_entropy(logits=logits,
                #                                                    onehot_labels=onehot_labels)), tf.pow(1-predictions_pt, 2))

                #FL和PFL,不注释这句;CE注释这句
                fast_rcnn_classification_loss = tf.reduce_sum(
                    fast_rcnn_classification_loss[:, 0] +
                    fast_rcnn_classification_loss[:, 1])

                ##CE
                # fast_rcnn_classification_loss = slim.losses.softmax_cross_entropy(logits=tf.clip_by_value(minibatch_scores,1e-8,tf.reduce_max(minibatch_scores)),
                #                                                                   onehot_labels=minibatch_label_one_hot)
                # fast_rcnn_classification_loss = slim.losses.softmax_cross_entropy(
                #     logits=minibatch_scores,
                #     onehot_labels=minibatch_label_one_hot)

            with tf.variable_scope('fast_rcnn_location_loss'):
                fast_rcnn_location_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_encode_boxes,
                    gtboxes=minibatch_encode_gtboxes,
                    object_weights=minibatch_object_mask,
                    classes_weights=class_weights)
                slim.losses.add_loss(fast_rcnn_location_loss)

            return fast_rcnn_location_loss, fast_rcnn_classification_loss
    def fast_rcnn_loss(self):
        with tf.variable_scope('fast_rcnn_loss'):
            minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, \
            minibatch_reference_boxes_mattached_gtboxes_rotate, \
            minibatch_reference_boxes_mattached_head_quadrant, minibatch_object_mask, \
            minibatch_label_one_hot = self.fast_rcnn_minibatch(self.fast_rcnn_all_level_proposals)

            minibatch_reference_boxes = tf.gather(
                self.fast_rcnn_all_level_proposals, minibatch_indices)

            minibatch_encode_boxes = tf.gather(
                self.fast_rcnn_encode_boxes,
                minibatch_indices)  # [minibatch_size, num_classes*4]

            minibatch_encode_boxes_rotate = tf.gather(
                self.fast_rcnn_encode_boxes_rotate,
                minibatch_indices)  # [minibatch_size, num_classes*5]

            minibatch_head_quadrant = tf.gather(self.fast_rcnn_head_quadrant,
                                                minibatch_indices)

            minibatch_scores = tf.gather(self.fast_rcnn_scores,
                                         minibatch_indices)
            minibatch_scores_rotate = tf.gather(self.fast_rcnn_scores_rotate,
                                                minibatch_indices)

            # encode gtboxes
            minibatch_encode_gtboxes = \
                encode_and_decode.encode_boxes(
                    unencode_boxes=minibatch_reference_boxes_mattached_gtboxes,
                    reference_boxes=minibatch_reference_boxes,
                    scale_factors=self.scale_factors
                )

            minibatch_encode_gtboxes_rotate = encode_and_decode.encode_boxes_rotate(
                unencode_boxes=
                minibatch_reference_boxes_mattached_gtboxes_rotate,
                reference_boxes=minibatch_reference_boxes,
                scale_factors=self.scale_factors)
            ############### Class-agnostic Without tile
            # [minibatch_size, num_classes*4]
            # minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes, [1, self.num_classes])
            ############### Class-agnostic Without tile
            # [minibatch_size, num_classes*5]
            # minibatch_encode_gtboxes_rotate = tf.tile(minibatch_encode_gtboxes_rotate, [1, self.num_classes])
            ############### Class-agnostic Without tile
            # minibatch_gt_head_quadrant = tf.tile(minibatch_reference_boxes_mattached_head_quadrant, [1, self.num_classes])
            minibatch_gt_head_quadrant = minibatch_reference_boxes_mattached_head_quadrant

            class_weights_list = []
            category_list = tf.unstack(minibatch_label_one_hot, axis=1)
            for i in range(1, self.num_classes + 1):
                tmp_class_weights = tf.ones(
                    shape=[tf.shape(minibatch_encode_boxes)[0], 4],
                    dtype=tf.float32)
                tmp_class_weights = tmp_class_weights * tf.expand_dims(
                    category_list[i], axis=1)
                class_weights_list.append(tmp_class_weights)
            class_weights = tf.concat(
                class_weights_list, axis=1)  # [minibatch_size, num_classes*4]

            class_weights_list_rotate = []
            category_list_rotate = tf.unstack(minibatch_label_one_hot, axis=1)
            for i in range(1, self.num_classes + 1):
                tmp_class_weights_rotate = tf.ones(
                    shape=[tf.shape(minibatch_encode_boxes_rotate)[0], 5],
                    dtype=tf.float32)
                tmp_class_weights_rotate = tmp_class_weights_rotate * tf.expand_dims(
                    category_list_rotate[i], axis=1)
                class_weights_list_rotate.append(tmp_class_weights_rotate)
            class_weights_rotate = tf.concat(
                class_weights_list_rotate,
                axis=1)  # [minibatch_size, num_classes*5]

            class_weights_list_head = []
            category_list_head = tf.unstack(minibatch_label_one_hot, axis=1)
            for i in range(1, self.num_classes + 1):
                tmp_class_weights_head = tf.ones(
                    shape=[tf.shape(minibatch_head_quadrant)[0], 4],
                    dtype=tf.float32)
                tmp_class_weights_head = tmp_class_weights_head * tf.expand_dims(
                    category_list_head[i], axis=1)
                class_weights_list_head.append(tmp_class_weights_head)
            class_weights_head = tf.concat(class_weights_list_head, axis=1)
            # loss
            with tf.variable_scope('fast_rcnn_classification_loss'):
                # fast_rcnn_classification_loss = slim.losses.softmax_cross_entropy(logits=minibatch_scores,
                #                                                                   onehot_labels=minibatch_label_one_hot)
                fast_rcnn_classification_loss = losses.focal_loss(
                    prediction_tensor=minibatch_scores,
                    target_tensor=minibatch_label_one_hot)
                slim.losses.add_loss(fast_rcnn_classification_loss)
            with tf.variable_scope('fast_rcnn_location_loss'):
                # fast_rcnn_location_loss = losses.l1_smooth_losses(predict_boxes=minibatch_encode_boxes,
                #                                                   gtboxes=minibatch_encode_gtboxes,
                #                                                   object_weights=minibatch_object_mask,
                #                                                   classes_weights=class_weights)
                # Class-agnostic regression
                fast_rcnn_location_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_encode_boxes,
                    gtboxes=minibatch_encode_gtboxes,
                    object_weights=minibatch_object_mask,
                    classes_weights=None)
                slim.losses.add_loss(fast_rcnn_location_loss)

            with tf.variable_scope('fast_rcnn_classification_rotate_loss'):
                # fast_rcnn_classification_rotate_loss = slim.losses.softmax_cross_entropy(logits=minibatch_scores_rotate,
                #                                                                          onehot_labels=minibatch_label_one_hot)
                fast_rcnn_classification_rotate_loss = losses.focal_loss(
                    prediction_tensor=minibatch_scores_rotate,
                    target_tensor=minibatch_label_one_hot)
                slim.losses.add_loss(fast_rcnn_classification_rotate_loss)

            with tf.variable_scope('fast_rcnn_location_rotate_loss'):
                # fast_rcnn_location_rotate_loss = losses.l1_smooth_losses(predict_boxes=minibatch_encode_boxes_rotate,
                #                                                          gtboxes=minibatch_encode_gtboxes_rotate,
                #                                                          object_weights=minibatch_object_mask,
                #                                                          classes_weights=class_weights_rotate)
                # Class-agnostic regression
                fast_rcnn_location_rotate_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_encode_boxes_rotate,
                    gtboxes=minibatch_encode_gtboxes_rotate,
                    object_weights=minibatch_object_mask,
                    classes_weights=None)
                slim.losses.add_loss(fast_rcnn_location_rotate_loss)

            with tf.variable_scope('fast_rcnn_head_quadrant_loss'):
                # fast_rcnn_head_quadrant_loss = losses.l1_smooth_losses(predict_boxes=minibatch_head_quadrant,
                #                                                        gtboxes=minibatch_gt_head_quadrant,
                #                                                        object_weights=minibatch_object_mask,
                #                                                        classes_weights=class_weights_head)
                # Class-agnostic regression
                fast_rcnn_head_quadrant_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_head_quadrant,
                    gtboxes=minibatch_gt_head_quadrant,
                    object_weights=minibatch_object_mask,
                    classes_weights=None)
                slim.losses.add_loss(
                    fast_rcnn_head_quadrant_loss *
                    10)  # More importance by the bigger weight

            return fast_rcnn_location_loss, fast_rcnn_classification_loss, \
                   fast_rcnn_location_rotate_loss, fast_rcnn_classification_rotate_loss, fast_rcnn_head_quadrant_loss * 10
    def fast_rcnn_loss(self):
        '''
        :return:
        '''
        with tf.variable_scope('fast_rcnn_loss'):
            minibatch_indices, minibatch_gtboxes, minibatch_onehot_label, minibatch_object_mask = self.make_minibatch(
            )

            minibatch_proposal_boxes = tf.gather(self.rois_boxes,
                                                 minibatch_indices)
            minibatch_predict_scores = tf.gather(self.fast_rcnn_cls_scores,
                                                 minibatch_indices)
            minibatch_predict_encode_boxes = tf.gather(
                self.fast_rcnn_encode_boxes, minibatch_indices)

            # encode minibatch_gtboxes
            minibatch_encode_gtboxes = encode_boxes(
                anchors=minibatch_proposal_boxes,
                gtboxes=minibatch_gtboxes,
                scale_factors=self.scale_factors)

            # [minibatch_size, 4]->[minibatch_size, num_cls*4]
            minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes,
                                               [1, self.num_cls])

            # class_weight_mask [minibatch_size, num_cls*4]
            class_weight_mask_list = []
            category_list = tf.unstack(minibatch_onehot_label, axis=1)
            for i in range(1, self.num_cls + 1):
                class_weight = tf.ones([self.fast_rcnn_minibatch_size, 4],
                                       dtype=tf.float32)
                class_weight = class_weight * tf.expand_dims(category_list[i],
                                                             axis=1)
                class_weight_mask_list.append(class_weight)

            class_weight_mask = tf.concat(class_weight_mask_list, axis=1)

            # cls loss
            with tf.variable_scope('fast_rcnn_cls_losses'):
                fast_rcnn_cls_loss = slim.losses.softmax_cross_entropy(
                    logits=minibatch_predict_scores,
                    onehot_labels=minibatch_onehot_label)

            # boxes loss
            with tf.variable_scope('fast_rcnn_boxes_losses'):
                fast_rcnn_boxes_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_predict_encode_boxes,
                    gtboxes=minibatch_encode_gtboxes,
                    object_weights=minibatch_object_mask,
                    classes_weights=class_weight_mask)
                slim.losses.add_loss(fast_rcnn_boxes_loss)
            # check loss and decode boxes
            # summary positive proposals and negative proposals
            minibatch_proposal_boxes = boxes_utils.clip_boxes_to_img_boundaries(
                minibatch_proposal_boxes, self.img_shape)
            minibatch_positive_proposals = \
                draw_box_with_tensor(img_batch=self.img_batch,
                                     boxes=minibatch_proposal_boxes*tf.expand_dims(tf.cast(minibatch_object_mask,
                                                                                           tf.float32),
                                                                                   1),
                                     text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 1)))[0]
                                     )

            minibatch_negative_mask = tf.cast(
                tf.logical_not(tf.cast(minibatch_object_mask, tf.bool)),
                tf.float32)
            minibatch_negative_proposals = \
                draw_box_with_tensor(img_batch=self.img_batch,
                                     boxes=minibatch_proposal_boxes * tf.expand_dims(minibatch_negative_mask, 1),
                                     text=tf.shape(tf.where(tf.equal(minibatch_negative_mask, 1)))[0]
                                     )
            tf.summary.image('minibatch_positive_proposals',
                             minibatch_positive_proposals)
            tf.summary.image('minibatch_negative_proposal',
                             minibatch_negative_proposals)
            # check the cls tensor part
            tf.summary.tensor_summary('minibatch_object_mask',
                                      minibatch_object_mask)
            tf.summary.tensor_summary('class_weight_mask', class_weight_mask)
            tf.summary.tensor_summary('minibatch_predict_encode_boxes',
                                      minibatch_predict_encode_boxes)
            tf.summary.tensor_summary('minibatch_encode_gtboxes',
                                      minibatch_encode_gtboxes)
            tf.summary.tensor_summary('location_loss', fast_rcnn_boxes_loss)
            tf.summary.tensor_summary('logits', minibatch_predict_scores)
            tf.summary.tensor_summary('one_hot', minibatch_onehot_label)

        return fast_rcnn_boxes_loss, fast_rcnn_cls_loss