Ejemplo n.º 1
0
    def fast_rcnn_find_positive_negative_samples(self, reference_boxes):
        '''
        when training, we should know each reference box's label and gtbox,
        in second stage
        iou >= 0.5 is object
        iou < 0.5 is background
        :param reference_boxes: [num_of_input_boxes, 4]
        :return:
        reference_boxes_mattached_gtboxes: each reference box mattched gtbox, shape: [num_of_input_boxes, 4]
        object_mask: indicate box(a row) weather is a object, 1 is object, 0 is background
        category_label: indicate box's class, one hot encoding. shape: [num_of_input_boxes, num_classes+1]
        '''

        with tf.variable_scope('fast_rcnn_find_positive_negative_samples'):
            gtboxes = tf.cast(
                tf.reshape(self.gtboxes_and_label_minAreaRectangle[:, :-1],
                           [-1, 4]), tf.float32)  # [M, 4]

            gtboxes_rotate = tf.cast(
                tf.reshape(self.gtboxes_and_label[:, :-1], [-1, 5]),
                tf.float32)  # [M, 5]

            head_quadrant = tf.cast(tf.reshape(self.head_quadrant, [-1, 4]),
                                    tf.float32)  # [M, 4]

            ious = iou.iou_calculate(reference_boxes, gtboxes)  # [N, M]

            matchs = tf.cast(tf.argmax(ious, axis=1), tf.int32)  # [N, ]
            max_iou_each_row = tf.reduce_max(ious, axis=1)
            # [N, ]
            positives = tf.cast(
                tf.greater_equal(max_iou_each_row,
                                 self.fast_rcnn_positives_iou_threshold),
                tf.int32)

            reference_boxes_mattached_gtboxes = tf.gather(gtboxes,
                                                          matchs)  # [N, 4]
            reference_boxes_mattached_gtboxes_rotate = tf.gather(
                gtboxes_rotate, matchs)
            reference_boxes_mattached_head_quadrant = tf.gather(
                head_quadrant, matchs)

            object_mask = tf.cast(positives, tf.float32)  # [N, ]

            label = tf.gather(self.gtboxes_and_label_minAreaRectangle[:, -1],
                              matchs)  # [N, ]
            label = tf.cast(label, tf.int32) * positives  # background is 0

            return reference_boxes_mattached_gtboxes, reference_boxes_mattached_gtboxes_rotate, \
                   reference_boxes_mattached_head_quadrant, object_mask, label
    def fast_rcnn_find_positive_negative_samples(self, reference_boxes):
        '''
        when training, we should know each reference box's label and gtbox,
        in second stage
        iou >= 0.5 is object
        iou < 0.5 is background
        :param reference_boxes: [num_of_input_boxes, 4]
        :return:
        reference_boxes_mattached_gtboxes: each reference box mattched gtbox, shape: [num_of_input_boxes, 4]
        object_mask: indicate box(a row) weather is a object, 1 is object, 0 is background
        category_label: indicate box's class, one hot encoding. shape: [num_of_input_boxes, num_classes+1]
        '''

        with tf.variable_scope('fast_rcnn_find_positive_negative_samples'):
            gtboxes = tf.cast(
                tf.reshape(self.gtboxes_and_label_minAreaRectangle[:, :-1],
                           [-1, 4]), tf.float32)  # [M, 4]
            ious = iou.iou_calculate(reference_boxes, gtboxes)  # [N, M]

            matchs = tf.cast(tf.argmax(ious, axis=1), tf.int32)  # [N, ]
            max_iou_each_row = tf.reduce_max(ious, axis=1)
            # [N, ]
            positives = tf.cast(
                tf.greater_equal(max_iou_each_row,
                                 self.fast_rcnn_positives_iou_threshold),
                tf.int32)

            # matchs = matchs * greater_than_threshold_indicator
            # in matchs, negative is 0, object is 0, 1, 2, ... ,num_of_classes

            reference_boxes_mattached_gtboxes = tf.gather(gtboxes,
                                                          matchs)  # [N, 4]

            object_mask = tf.cast(positives, tf.float32)  # [N, ]
            # when box is background, not caculate gradient, so give a weight 0 to avoid caculate gradient

            label = tf.gather(self.gtboxes_and_label_minAreaRectangle[:, -1],
                              matchs)  # [N, ]
            label = tf.cast(label, tf.int32) * positives  # background is 0
            # label = tf.one_hot(category_label, depth=self.num_classes + 1)

            return reference_boxes_mattached_gtboxes, object_mask, label
Ejemplo n.º 3
0
def iou_smooth_l1_loss_1(preds,
                         anchor_state,
                         target_boxes,
                         anchors,
                         sigma=3.0,
                         alpha=1.0,
                         beta=1.0,
                         is_refine=False):
    if cfgs.METHOD == 'H' and not is_refine:
        x_c = (anchors[:, 2] + anchors[:, 0]) / 2
        y_c = (anchors[:, 3] + anchors[:, 1]) / 2
        h = anchors[:, 2] - anchors[:, 0] + 1
        w = anchors[:, 3] - anchors[:, 1] + 1
        theta = -90 * tf.ones_like(x_c)
        anchors = tf.transpose(tf.stack([x_c, y_c, w, h, theta]))

    sigma_squared = sigma**2
    indices = tf.reshape(tf.where(tf.equal(anchor_state, 1)), [
        -1,
    ])

    preds = tf.gather(preds, indices)
    # targets = tf.gather(targets, indices)
    target_boxes = tf.gather(target_boxes, indices)
    anchors = tf.gather(anchors, indices)

    boxes_pred = bbox_transform.rbbox_transform_inv(
        boxes=anchors, deltas=preds, scale_factors=cfgs.ANCHOR_SCALE_FACTORS)

    boxes_pred = tf.reshape(boxes_pred, [-1, 5])
    target_boxes = tf.reshape(target_boxes, [-1, 6])
    boxes_pred_x, boxes_pred_y, boxes_pred_w, boxes_pred_h, boxes_pred_theta = tf.unstack(
        boxes_pred, axis=-1)
    target_boxes_x, target_boxes_y, target_boxes_w, target_boxes_h, target_boxes_theta, _ = tf.unstack(
        target_boxes, axis=-1)

    # compute smooth L1 loss
    # f(x) = 0.5 * (sigma * x)^2          if |x| < 1 / sigma / sigma
    #        |x| - 0.5 / sigma / sigma    otherwise
    regression_diff_angle = boxes_pred_theta - target_boxes_theta
    regression_diff_angle = tf.abs(regression_diff_angle)

    regression_diff_angle = tf.where(
        tf.less(regression_diff_angle, 1.0 / sigma_squared),
        0.5 * sigma_squared * tf.pow(regression_diff_angle, 2),
        regression_diff_angle - 0.5 / sigma_squared)

    iou = iou_calculate(
        tf.transpose(
            tf.stack([boxes_pred_x, boxes_pred_y, boxes_pred_w,
                      boxes_pred_h])),
        tf.transpose(
            tf.stack([
                target_boxes_x, target_boxes_y, target_boxes_w, target_boxes_h
            ])))

    iou_loss_appro = regression_diff_angle - iou

    overlaps = tf.py_func(iou_rotate_calculate2,
                          inp=[
                              tf.reshape(boxes_pred, [-1, 5]),
                              tf.reshape(target_boxes[:, :-1], [-1, 5])
                          ],
                          Tout=[tf.float32])

    overlaps = tf.reshape(overlaps, [-1, 1])
    iou_loss_appro = tf.reshape(iou_loss_appro, [-1, 1])
    # 1-exp(1-x)
    iou_factor = tf.stop_gradient(tf.exp(alpha * (1 - overlaps)**beta) - 1) / (
        tf.stop_gradient(iou_loss_appro) + cfgs.EPSILON)
    # iou_factor = tf.stop_gradient(1-overlaps) / (tf.stop_gradient(regression_loss) + cfgs.EPSILON)
    # iou_factor = tf.Print(iou_factor, [iou_factor], 'iou_factor', summarize=50)

    normalizer = tf.stop_gradient(tf.where(tf.equal(anchor_state, 1)))
    normalizer = tf.cast(tf.shape(normalizer)[0], tf.float32)
    normalizer = tf.maximum(1.0, normalizer)

    # normalizer = tf.stop_gradient(tf.cast(tf.equal(anchor_state, 1), tf.float32))
    # normalizer = tf.maximum(tf.reduce_sum(normalizer), 1)

    return tf.reduce_sum(iou_loss_appro * iou_factor) / normalizer
Ejemplo n.º 4
0
    def rpn_find_positive_negative_samples(self, anchors):
        '''
    assign anchors targets: object or background.
    :param anchors: [valid_num_of_anchors, 4]. use N to represent valid_num_of_anchors

    :return:labels. anchors_matched_gtboxes, object_mask

    labels shape is [N, ].  positive is 1, negative is 0, ignored is -1
    anchor_matched_gtboxes. each anchor's gtbox(only positive box has gtbox)shape is [N, 4]
    object_mask. tf.float32. 1.0 represent box is object, 0.0 is others. shape is [N, ]
    '''
        with tf.variable_scope('rpn_find_positive_negative_samples'):
            gtboxes = tf.reshape(self.gtboxes_and_label[:, :-1], [-1, 4])
            gtboxes = tf.cast(gtboxes, tf.float32)

            ious = iou.iou_calculate(anchors, gtboxes)  # [N, M]

            max_iou_each_row = tf.reduce_max(ious, axis=1)

            labels = tf.ones(shape=[tf.shape(anchors)[0], ], dtype=tf.float32) * \
                     (-1)  # [N, ] # ignored is -1

            matchs = tf.cast(tf.argmax(ious, axis=1), tf.int32)

            # an anchor that has an IoU overlap higher than 0.7 with any ground-truth box
            positives1 = tf.greater_equal(
                max_iou_each_row,
                self.rpn_iou_positive_threshold)  # iou >= 0.7 is positive

            # to avoid none of boxes iou >= 0.7, use max iou boxes as positive
            max_iou_each_column = tf.reduce_max(ious, 0)
            # the anchor/anchors with the highest Intersection-over-Union (IoU) overlap with a ground-truth box
            positives2 = tf.reduce_sum(tf.cast(
                tf.equal(ious, max_iou_each_column), tf.float32),
                                       axis=1)

            positives = tf.logical_or(positives1, tf.cast(positives2, tf.bool))

            # Now, positive is 1, ignored and background is -1
            labels += 2 * tf.cast(positives, tf.float32)

            # object_mask = tf.cast(positives, tf.float32)  # 1.0 is object, 0.0 is others

            # matchs = matchs * tf.cast(positives, dtype=matchs.dtype)  # remove background and ignored
            anchors_matched_gtboxes = tf.gather(gtboxes, matchs)  # [N, 4]
            # background's gtboxes tmp set the first gtbox, it dose not matter, because use object_mask will ignored it

            negatives = tf.less(max_iou_each_row,
                                self.rpn_iou_negative_threshold)
            negatives = tf.logical_and(negatives,
                                       tf.greater_equal(max_iou_each_row, 0.1))

            # [N, ] positive is >=1.0, negative is 0, ignored is -1.0
            labels = labels + tf.cast(negatives, tf.float32)
            '''
            Need to note: when opsitive, labels may >= 1.0.
            Because, when all the iou < 0.7, we set anchors having max iou each column as positive.
            these anchors may have iou < 0.3.
            In the begining, labels is [-1, -1, -1...-1]
            then anchors having iou<0.3 as well as are max iou each column will be +1.0.
            when decide negatives, because of iou<0.3, they add 1.0 again.
            So, the final result will be 2.0

            So, when opsitive, labels may in [1.0, 2.0]. that is labels >=1.0
            '''
            positives = tf.cast(tf.greater_equal(labels, 1.0), tf.float32)
            ignored = tf.cast(tf.equal(labels, -1.0), tf.float32) * -1

            labels = positives + ignored
            object_mask = tf.cast(positives,
                                  tf.float32)  # 1.0 is object, 0.0 is others

            return labels, anchors_matched_gtboxes, object_mask
Ejemplo n.º 5
0
            def batch_slice_build_sample(gtboxes_and_label,
                                         rpn_proposals_boxes):

                with tf.name_scope('select_pos_neg_samples'):
                    gtboxes = tf.cast(
                        tf.reshape(gtboxes_and_label[:, :-1], [-1, 4]),
                        tf.float32)
                    gt_class_ids = tf.cast(
                        tf.reshape(gtboxes_and_label[:, -1], [
                            -1,
                        ]), tf.int32)
                    gtboxes, non_zeros = boxes_utils.trim_zeros_graph(
                        gtboxes, name="trim_gt_box")  # [M, 4]
                    gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros)
                    rpn_proposals_boxes, _ = boxes_utils.trim_zeros_graph(
                        rpn_proposals_boxes, name="trim_rpn_proposal_train")

                    ious = iou.iou_calculate(rpn_proposals_boxes,
                                             gtboxes)  # [N, M]
                    matchs = tf.cast(tf.argmax(ious, axis=1),
                                     tf.int32)  # [N, ]
                    max_iou_each_row = tf.reduce_max(ious, axis=1)
                    positives = tf.cast(
                        tf.greater_equal(
                            max_iou_each_row,
                            cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD), tf.int32)

                    reference_boxes_mattached_gtboxes = tf.gather(
                        gtboxes, matchs)  # [N, 4]
                    gt_class_ids = tf.gather(gt_class_ids, matchs)  # [N, ]
                    object_mask = tf.cast(positives, tf.float32)  # [N, ]
                    # when box is background, not caculate gradient, so give a weight 0 to avoid caculate gradient
                    gt_class_ids = gt_class_ids * positives

                with tf.name_scope('head_train_minibatch'):
                    # choose the positive indices
                    positive_indices = tf.reshape(
                        tf.where(tf.equal(object_mask, 1.)), [-1])
                    num_of_positives = tf.minimum(
                        tf.shape(positive_indices)[0],
                        tf.cast(
                            cfgs.FAST_RCNN_MINIBATCH_SIZE *
                            cfgs.FAST_RCNN_POSITIVE_RATE, tf.int32))
                    positive_indices = tf.random_shuffle(positive_indices)
                    positive_indices = tf.slice(positive_indices,
                                                begin=[0],
                                                size=[num_of_positives])
                    # choose the negative indices,
                    # Strictly propose the proportion of positive and negative is 1:3
                    negative_indices = tf.reshape(
                        tf.where(tf.equal(object_mask, 0.)), [-1])
                    num_of_negatives = tf.cast(int(1. / cfgs.FAST_RCNN_POSITIVE_RATE) * num_of_positives, tf.int32)\
                                       - num_of_positives

                    num_of_negatives = tf.minimum(
                        tf.shape(negative_indices)[0], num_of_negatives)
                    negative_indices = tf.random_shuffle(negative_indices)
                    negative_indices = tf.slice(negative_indices,
                                                begin=[0],
                                                size=[num_of_negatives])

                    minibatch_indices = tf.concat(
                        [positive_indices, negative_indices], axis=0)
                    minibatch_reference_gtboxes = tf.gather(
                        reference_boxes_mattached_gtboxes, minibatch_indices)
                    minibatch_reference_proboxes = tf.gather(
                        rpn_proposals_boxes, minibatch_indices)
                    # encode gtboxes
                    minibatch_encode_gtboxes = \
                        encode_and_decode.encode_boxes(
                            unencode_boxes=minibatch_reference_gtboxes,
                            reference_boxes=minibatch_reference_proboxes,
                            scale_factors=cfgs.BBOX_STD_DEV)
                    object_mask = tf.gather(object_mask, minibatch_indices)
                    gt_class_ids = tf.gather(gt_class_ids, minibatch_indices)

                    # padding if necessary
                    gap = tf.cast(cfgs.FAST_RCNN_MINIBATCH_SIZE -
                                  (num_of_positives + num_of_negatives),
                                  dtype=tf.int32)
                    bbox_padding = tf.zeros((gap, 4))
                    minibatch_reference_proboxes = tf.concat(
                        [minibatch_reference_proboxes, bbox_padding], axis=0)
                    minibatch_encode_gtboxes = tf.concat(
                        [minibatch_encode_gtboxes, bbox_padding], axis=0)
                    object_mask = tf.pad(object_mask, [(0, gap)])
                    gt_class_ids = tf.pad(gt_class_ids, [(0, gap)])

                return minibatch_reference_proboxes, minibatch_encode_gtboxes, object_mask, gt_class_ids