def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
    """
    Sample some boxes from all proposals for training.
    #fg is guaranteed to be > 0, because ground truth boxes will be added as proposals.

    Args:
        boxes: nx4 region proposals, floatbox
        gt_boxes: mx4, floatbox
        gt_labels: m, int32

    Returns:
        A BoxProposals instance.
        sampled_boxes: tx4 floatbox, the rois
        sampled_labels: t int64 labels, in [0, #class). Positive means foreground.
        fg_inds_wrt_gt: #fg indices, each in range [0, m-1].
            It contains the matching GT of each foreground roi.
    """
    iou = pairwise_iou(boxes, gt_boxes)     # nxm
    proposal_metrics(iou)

    # add ground truth as proposals as well
    boxes = tf.concat([boxes, gt_boxes], axis=0)    # (n+m) x 4
    iou = tf.concat([iou, tf.eye(tf.shape(gt_boxes)[0])], axis=0)   # (n+m) x m
    # #proposal=n+m from now on

    def sample_fg_bg(iou):
        fg_mask = tf.reduce_max(iou, axis=1) >= cfg.FRCNN.FG_THRESH

        fg_inds = tf.reshape(tf.where(fg_mask), [-1])
        num_fg = tf.minimum(int(
            cfg.FRCNN.BATCH_PER_IM * cfg.FRCNN.FG_RATIO),
            tf.size(fg_inds), name='num_fg')
        fg_inds = tf.random_shuffle(fg_inds)[:num_fg]

        bg_inds = tf.reshape(tf.where(tf.logical_not(fg_mask)), [-1])
        num_bg = tf.minimum(
            cfg.FRCNN.BATCH_PER_IM - num_fg,
            tf.size(bg_inds), name='num_bg')
        bg_inds = tf.random_shuffle(bg_inds)[:num_bg]

        add_moving_summary(num_fg, num_bg)
        return fg_inds, bg_inds

    fg_inds, bg_inds = sample_fg_bg(iou)
    # fg,bg indices w.r.t proposals

    best_iou_ind = tf.argmax(iou, axis=1)   # #proposal, each in 0~m-1
    fg_inds_wrt_gt = tf.gather(best_iou_ind, fg_inds)   # num_fg

    all_indices = tf.concat([fg_inds, bg_inds], axis=0)   # indices w.r.t all n+m proposal boxes
    ret_boxes = tf.gather(boxes, all_indices)

    ret_labels = tf.concat(
        [tf.gather(gt_labels, fg_inds_wrt_gt),
         tf.zeros_like(bg_inds, dtype=tf.int64)], axis=0)
    # stop the gradient -- they are meant to be training targets
    return BoxProposals(
        tf.stop_gradient(ret_boxes, name='sampled_proposal_boxes'),
        tf.stop_gradient(ret_labels, name='sampled_labels'),
        tf.stop_gradient(fg_inds_wrt_gt))
Exemple #2
0
 def match_box_with_gt(self, boxes, iou_threshold):
     """
     Args:
         boxes: Nx4
     Returns:
         BoxProposals
     """
     if self.is_training:
         with tf.name_scope('match_box_with_gt_{}'.format(iou_threshold)):
             iou = pairwise_iou(boxes,
                                self.gt_boxes)  # NxM   计算每个box和每个gt的iou
             max_iou_per_box = tf.reduce_max(iou,
                                             axis=1)  # N   每个box取iou最大值
             best_iou_ind = tf.argmax(iou, axis=1)  # N      每个boxiou最大值的位置
             labels_per_box = tf.gather(
                 self.gt_labels,
                 best_iou_ind)  # 从gt_labels中取出iou最大值位置对应的label
             fg_mask = max_iou_per_box >= iou_threshold  # 大于阈值就mask
             fg_inds_wrt_gt = tf.boolean_mask(best_iou_ind,
                                              fg_mask)  # 把mask处理掉
             labels_per_box = tf.stop_gradient(
                 labels_per_box *
                 tf.cast(fg_mask, tf.int64))  # mask掉的停止梯度下降
             return BoxProposals(boxes, labels_per_box,
                                 fg_inds_wrt_gt)  # 不太清楚 BoxProposals 是指?
     else:
         return BoxProposals(boxes)
Exemple #3
0
def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
    """
    Sample some ROIs from all proposals for training.
    #fg is guaranteed to be > 0, because grount truth boxes are added as RoIs.

    Args:
        boxes: nx4 region proposals, floatbox
        gt_boxes: mx4, floatbox
        gt_labels: m, int32

    Returns:
        sampled_boxes: tx4 floatbox, the rois
        sampled_labels: t labels, in [0, #class-1]. Positive means foreground.
        fg_inds_wrt_gt: #fg indices, each in range [0, m-1].
            It contains the matching GT of each foreground roi.
    """
    iou = pairwise_iou(boxes, gt_boxes)     # nxm
    proposal_metrics(iou)

    # add ground truth as proposals as well
    boxes = tf.concat([boxes, gt_boxes], axis=0)    # (n+m) x 4
    iou = tf.concat([iou, tf.eye(tf.shape(gt_boxes)[0])], axis=0)   # (n+m) x m
    # #proposal=n+m from now on

    def sample_fg_bg(iou):
        fg_mask = tf.reduce_max(iou, axis=1) >= cfg.FRCNN.FG_THRESH

        fg_inds = tf.reshape(tf.where(fg_mask), [-1])
        num_fg = tf.minimum(int(
            cfg.FRCNN.BATCH_PER_IM * cfg.FRCNN.FG_RATIO),
            tf.size(fg_inds), name='num_fg')
        fg_inds = tf.random_shuffle(fg_inds)[:num_fg]

        bg_inds = tf.reshape(tf.where(tf.logical_not(fg_mask)), [-1])
        num_bg = tf.minimum(
            cfg.FRCNN.BATCH_PER_IM - num_fg,
            tf.size(bg_inds), name='num_bg')
        bg_inds = tf.random_shuffle(bg_inds)[:num_bg]

        add_moving_summary(num_fg, num_bg)
        return fg_inds, bg_inds

    fg_inds, bg_inds = sample_fg_bg(iou)
    # fg,bg indices w.r.t proposals

    best_iou_ind = tf.argmax(iou, axis=1)   # #proposal, each in 0~m-1
    fg_inds_wrt_gt = tf.gather(best_iou_ind, fg_inds)   # num_fg

    all_indices = tf.concat([fg_inds, bg_inds], axis=0)   # indices w.r.t all n+m proposal boxes
    ret_boxes = tf.gather(boxes, all_indices)

    ret_labels = tf.concat(
        [tf.gather(gt_labels, fg_inds_wrt_gt),
         tf.zeros_like(bg_inds, dtype=tf.int64)], axis=0)
    # stop the gradient -- they are meant to be training targets
    return tf.stop_gradient(ret_boxes, name='sampled_proposal_boxes'), \
        tf.stop_gradient(ret_labels, name='sampled_labels'), \
        tf.stop_gradient(fg_inds_wrt_gt)
 def match_box_with_gt(self, boxes, gt_boxes, gt_labels, iou_threshold):
     from utils.box_ops import pairwise_iou
     if self.is_training:
         with tf.name_scope('match_box_with_gt_{}'.format(iou_threshold)):
             iou = pairwise_iou(boxes, gt_boxes)  # NxM
             max_iou_per_box = tf.reduce_max(iou, axis=1)  # N
             best_iou_ind = tf.argmax(iou, axis=1)  # N
             labels_per_box = tf.gather(gt_labels, best_iou_ind)
             fg_mask = max_iou_per_box >= iou_threshold
             fg_inds_wrt_gt = tf.boolean_mask(best_iou_ind, fg_mask)
             labels_per_box = tf.stop_gradient(labels_per_box *
                                               tf.to_int64(fg_mask))
             return [
                 boxes, labels_per_box, fg_inds_wrt_gt, gt_boxes, gt_labels
             ]
     else:
         return [boxes, None, None, None, None]
 def match_box_with_gt(self, boxes, iou_threshold):
     """
     Args:
         boxes: Nx4
     Returns:
         BoxProposals
     """
     if self.is_training:
         with tf.name_scope("match_box_with_gt_{}".format(iou_threshold)):
             iou = pairwise_iou(boxes, self.gt_boxes)  # NxM
             max_iou_per_box = tf.reduce_max(iou, axis=1)  # N
             best_iou_ind = tf.argmax(iou, axis=1)  # N
             labels_per_box = tf.gather(self.gt_labels, best_iou_ind)
             fg_mask = max_iou_per_box >= iou_threshold
             fg_inds_wrt_gt = tf.boolean_mask(best_iou_ind, fg_mask)
             labels_per_box = tf.stop_gradient(labels_per_box * tf.cast(fg_mask, tf.int64))
             return BoxProposals(boxes, labels_per_box, fg_inds_wrt_gt)
     else:
         return BoxProposals(boxes)
 def match_box_with_gt(self, boxes, iou_threshold):
     """
     Args:
         boxes: Nx4
     Returns:
         BoxProposals
     """
     if self.is_training:
         with tf.name_scope('match_box_with_gt_{}'.format(iou_threshold)):
             iou = pairwise_iou(boxes, self.gt_boxes)  # NxM
             max_iou_per_box = tf.reduce_max(iou, axis=1)  # N
             best_iou_ind = tf.argmax(iou, axis=1)  # N
             labels_per_box = tf.gather(self.gt_labels, best_iou_ind)
             fg_mask = max_iou_per_box >= iou_threshold
             fg_inds_wrt_gt = tf.boolean_mask(best_iou_ind, fg_mask)
             labels_per_box = tf.stop_gradient(labels_per_box * tf.to_int64(fg_mask))
             return BoxProposals(
                 boxes, labels_per_box, fg_inds_wrt_gt, self.gt_boxes, self.gt_labels)
     else:
         return BoxProposals(boxes)
 def match_box_with_gt(self, boxes, iou_threshold):
     """
     Args:
         boxes: Nx4
     Returns:
         BoxProposals
     """
     if self.training:
         with tf.name_scope('match_box_with_gt_{}'.format(iou_threshold)):
             iou = pairwise_iou(boxes, self.gt_boxes)  # NxM
             max_iou_per_box = tf.reduce_max(iou, axis=1)  # N
             best_iou_ind = tf.cond(tf.shape(iou)[1] > 0,
                                    lambda: tf.argmax(iou, axis=1),   # #proposal, each in 0~m-1
                                    lambda: tf.zeros([tf.shape(iou)[0]], dtype=tf.int64))
             labels_per_box = tf.gather(self.gt_labels, best_iou_ind)
             fg_mask = max_iou_per_box >= iou_threshold
             fg_inds_wrt_gt = tf.boolean_mask(best_iou_ind, fg_mask)
             labels_per_box = tf.stop_gradient(labels_per_box * tf.cast(fg_mask, tf.int64))
             return BoxProposals(boxes, labels_per_box, fg_inds_wrt_gt)
     else:
         return BoxProposals(boxes)
    def losses(self):
        encoded_fg_gt_boxes = encode_bbox_target(
            self.proposals.matched_gt_boxes(),
            self.proposals.fg_boxes()) * self.bbox_regression_weights

        decoded_boxes = self.decoded_output_boxes()
        decoded_boxes = tf.reshape(decoded_boxes, [-1, 4])
        gt_boxes = tf.reshape(self.proposals.gt_boxes, [-1, 4])
        iou = pairwise_iou(decoded_boxes, gt_boxes)
        max_iou = tf.reduce_max(iou, axis=1)
        # if only bg gt_boxes, all ious are 0.
        pos_mask = tf.stop_gradient(tf.not_equal(self.proposals.labels, 0))
        nr_pos = tf.identity(tf.count_nonzero(pos_mask, dtype=tf.int32))
        max_iou = tf.where(tf.equal(nr_pos, 0), tf.zeros_like(max_iou),
                           max_iou)
        max_iou = tf.stop_gradient(tf.reshape(max_iou, [-1]))

        return fastrcnn_losses_iou(self.proposals.labels,
                                   self.label_logits, max_iou,
                                   tf.reshape(self.iou_logits,
                                              [-1]), encoded_fg_gt_boxes,
                                   self.fg_box_logits())
Exemple #9
0
def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
    """
    Args:
        boxes: nx4 region proposals, floatbox
        gt_boxes: mx4, floatbox
        gt_labels: m, int32

    Returns:
        sampled_boxes: tx4 floatbox, the rois
        target_boxes: tx4 encoded box, the regression target
        labels: t labels
    """
    @under_name_scope()
    def assign_class_to_roi(iou, gt_boxes, gt_labels):
        """
        Args:
            iou: nxm (nr_proposal x nr_gt)
        Returns:
            fg_mask: n boolean, whether each roibox is fg
            roi_labels: n int32, best label for each roi box
            best_gt_boxes: nx4
        """
        # find best gt box for each roi box
        best_iou_ind = tf.argmax(iou, axis=1)   # n, each in 1~m
        best_iou = tf.reduce_max(iou, axis=1)   # n,
        best_gt_boxes = tf.gather(gt_boxes, best_iou_ind)   # nx4
        best_gt_labels = tf.gather(gt_labels, best_iou_ind)     # n, each in 1~C

        fg_mask = best_iou >= config.FASTRCNN_FG_THRESH
        return fg_mask, best_gt_labels, best_gt_boxes

    iou = pairwise_iou(boxes, gt_boxes)     # nxm

    with tf.name_scope('proposal_metrics'):
        # find best roi for each gt, for summary only
        best_iou = tf.reduce_max(iou, axis=0)
        mean_best_iou = tf.reduce_mean(best_iou, name='best_iou_per_gt')
        summaries = [mean_best_iou]
        with tf.device('/cpu:0'):
            for th in [0.3, 0.5]:
                recall = tf.truediv(
                    tf.count_nonzero(best_iou >= th),
                    tf.size(best_iou, out_type=tf.int64),
                    name='recall_iou{}'.format(th))
                summaries.append(recall)
        add_moving_summary(*summaries)

    # n, n, nx4
    fg_mask, roi_labels, best_gt_boxes = assign_class_to_roi(iou, gt_boxes, gt_labels)

    # don't have to add gt for training, but add it anyway
    fg_inds = tf.reshape(tf.where(fg_mask), [-1])
    fg_inds = tf.concat([fg_inds, tf.cast(
        tf.range(tf.size(gt_labels)) + tf.shape(boxes)[0],
        tf.int64)], 0)
    num_fg = tf.size(fg_inds)
    num_fg = tf.minimum(int(
        config.FASTRCNN_BATCH_PER_IM * config.FASTRCNN_FG_RATIO),
        num_fg, name='num_fg')
    fg_inds = tf.slice(tf.random_shuffle(fg_inds), [0], [num_fg])

    bg_inds = tf.where(tf.logical_not(fg_mask))[:, 0]
    num_bg = tf.size(bg_inds)
    num_bg = tf.minimum(config.FASTRCNN_BATCH_PER_IM - num_fg, num_bg, name='num_bg')
    bg_inds = tf.slice(tf.random_shuffle(bg_inds), [0], [num_bg])

    add_moving_summary(num_fg, num_bg)

    all_boxes = tf.concat([boxes, gt_boxes], axis=0)
    all_matched_gt_boxes = tf.concat([best_gt_boxes, gt_boxes], axis=0)
    all_labels = tf.concat([roi_labels, gt_labels], axis=0)

    ind_in_all = tf.concat([fg_inds, bg_inds], axis=0)   # ind in all n+m boxes
    ret_boxes = tf.gather(all_boxes, ind_in_all, name='sampled_boxes')
    ret_matched_gt_boxes = tf.gather(all_matched_gt_boxes, ind_in_all)
    ret_encoded_boxes = encode_bbox_target(ret_matched_gt_boxes, ret_boxes)
    ret_encoded_boxes = ret_encoded_boxes * tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS)
    # bg boxes will not be trained on

    ret_labels = tf.concat(
        [tf.gather(all_labels, fg_inds),
         tf.zeros_like(bg_inds, dtype=tf.int64)], axis=0, name='sampled_labels')
    return ret_boxes, tf.stop_gradient(ret_encoded_boxes), tf.stop_gradient(ret_labels)
Exemple #10
0
def sample_sniper_targets(boxes, gt_boxes, gt_labels):
    """
    Sample some ROIs from all proposals for training.
    Filter boxes first and make sure left are all in scale range
    #fg is guaranteed to be > 0, because ground truth boxes are added as RoIs.

    Args:
        boxes: nx4 region proposals, floatbox
        gt_boxes: mx4, floatbox
        gt_labels: m, int32

    Returns:
        sampled_boxes: tx4 floatbox, the rois
        sampled_labels: t labels, in [0, #class-1]. Positive means foreground.
        fg_inds_wrt_gt: #fg indices, each in range [0, m-1].
            It contains the matching GT of each foreground roi.
    """
    iou = pairwise_iou(boxes, gt_boxes)  # nxm
    proposal_metrics(iou)

    # add ground truth as proposals as well
    boxes = tf.concat([boxes, gt_boxes], axis=0)  # (n+m) x 4
    iou = tf.concat([iou, tf.eye(tf.shape(gt_boxes)[0])], axis=0)  # (n+m) x m

    # #proposal=n+m from now on

    #filter proposal by box size
    # box_range = cfg.SNIPER.VALID_RANGES[scale_index]
    # box_range = tf.gather(cfg.SNIPER.VALID_RANGES, scale_index)
    # print(box_range)
    # minbox = box_range[0]
    # maxbox = box_range[1]
    # # print(minbox, maxbox)
    # minbox = 0 if minbox == -1 else minbox
    # maxbox = sys.maxsize if maxbox == -1 else maxbox
    # boxes_filtered = np.argwhere(
    #     np.logical_and.reduce(
    #         np.logical_or((boxes[:, 2] - boxes[:, 0]) > minbox,
    #                       (boxes[:, 3] - boxes[:, 1]) > minbox),
    #         (boxes[:, 2] - boxes[:, 0]) < maxbox,
    #         (boxes[:, 3] - boxes[:, 1]) < maxbox)).flatten().tolist()
    # boxes = boxes[boxes_filtered]
    # iou = boxes[boxes_filtered]

    def sample_fg_bg(iou):
        fg_mask = tf.reduce_max(iou, axis=1) >= cfg.FRCNN.FG_THRESH

        fg_inds = tf.reshape(tf.where(fg_mask), [-1])
        num_fg = tf.minimum(int(cfg.FRCNN.BATCH_PER_IM * cfg.FRCNN.FG_RATIO),
                            tf.size(fg_inds),
                            name='num_fg')
        fg_inds = tf.random_shuffle(fg_inds)[:num_fg]

        bg_inds = tf.reshape(tf.where(tf.logical_not(fg_mask)), [-1])
        num_bg = tf.minimum(cfg.FRCNN.BATCH_PER_IM - num_fg,
                            tf.size(bg_inds),
                            name='num_bg')
        bg_inds = tf.random_shuffle(bg_inds)[:num_bg]

        add_moving_summary(num_fg, num_bg)
        return fg_inds, bg_inds

    fg_inds, bg_inds = sample_fg_bg(iou)
    # fg,bg indices w.r.t proposals

    best_iou_ind = tf.argmax(iou, axis=1)  # #proposal, each in 0~m-1
    fg_inds_wrt_gt = tf.gather(best_iou_ind, fg_inds)  # num_fg

    all_indices = tf.concat([fg_inds, bg_inds],
                            axis=0)  # indices w.r.t all n+m proposal boxes
    ret_boxes = tf.gather(boxes, all_indices)

    ret_labels = tf.concat([
        tf.gather(gt_labels, fg_inds_wrt_gt),
        tf.zeros_like(bg_inds, dtype=tf.int64)
    ],
                           axis=0)
    # stop the gradient -- they are meant to be training targets
    return tf.stop_gradient(ret_boxes, name='sampled_proposal_boxes'), \
        tf.stop_gradient(ret_labels, name='sampled_labels'), \
        tf.stop_gradient(fg_inds_wrt_gt)
Exemple #11
0
    def build_graph(self, *inputs):
        is_training = get_current_tower_context().is_training
        image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_ids, orig_shape = inputs
        image = self.preprocess(image)  # 1CHW

        featuremap = resnet_c4_backbone(image,
                                        cfg.BACKBONE.RESNET_NUM_BLOCK[:3])
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap,
                                                    cfg.RPN.HEAD_DIM,
                                                    cfg.RPN.NUM_ANCHOR)

        anchors = RPNAnchors(get_all_anchors(), anchor_labels, anchor_boxes)
        anchors = anchors.narrow_to(featuremap)

        image_shape2d = tf.shape(image)[2:]  # h,w
        # decode into actual image coordinates
        pred_boxes_decoded = anchors.decode_logits(
            rpn_box_logits)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(pred_boxes_decoded, [-1, 4]),
            tf.reshape(rpn_label_logits,
                       [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK
            if is_training else cfg.RPN.TEST_PRE_NMS_TOPK,
            cfg.RPN.TRAIN_POST_NMS_TOPK
            if is_training else cfg.RPN.TEST_POST_NMS_TOPK)

        if is_training:
            # sample proposal boxes in training
            rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets(
                proposal_boxes, gt_boxes, gt_labels)
        else:
            # The boxes to be used to crop RoIs.
            # Use all proposal boxes in inference
            rcnn_boxes = proposal_boxes

        boxes_on_featuremap = rcnn_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE)
        # size? #proposals*h*w*c?
        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        feature_fastrcnn = resnet_conv5(
            roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])  # nxcx7x7
        # Keep C5 feature to be shared with mask branch
        feature_gap = GlobalAvgPooling('gap',
                                       feature_fastrcnn,
                                       data_format='channels_first')
        fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs(
            'fastrcnn', feature_gap, cfg.DATA.NUM_CLASS)

        if is_training:
            # rpn loss
            rpn_label_loss, rpn_box_loss = rpn_losses(
                anchors.gt_labels, anchors.encoded_gt_boxes(),
                rpn_label_logits, rpn_box_logits)

            # fastrcnn loss
            matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt)

            fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0),
                                            [-1])  # fg inds w.r.t all samples
            # outputs from fg proposals
            fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample)
            fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits,
                                               fg_inds_wrt_sample)

            # rcnn_labels: the labels of the proposals
            # fg_sampled_boxes: fg proposals
            # matched_gt_boxes: just like RPN, the gt boxes
            #                   that match the corresponding fg proposals
            fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training(
                image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes,
                fastrcnn_label_logits, fg_fastrcnn_box_logits)

            # acquire pred for re-id training
            # turning NMS off gives re-id branch more training samples
            if cfg.RE_ID.NMS:
                boxes, final_labels, final_probs = self.fastrcnn_inference(
                    image_shape2d, rcnn_boxes, fastrcnn_label_logits,
                    fastrcnn_box_logits)
            else:
                boxes, final_labels, final_probs = self.fastrcnn_inference_id(
                    image_shape2d, rcnn_boxes, fastrcnn_label_logits,
                    fastrcnn_box_logits)
            # scale = tf.sqrt(tf.cast(image_shape2d[0], tf.float32) / tf.cast(orig_shape[0], tf.float32) *
            #                 tf.cast(image_shape2d[1], tf.float32) / tf.cast(orig_shape[1], tf.float32))
            # final_boxes = boxes / scale
            # # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more.
            # final_boxes = tf_clip_boxes(final_boxes, orig_shape)

            # IOU, discard bad dets, assign re-id labels
            # the results are already NMS so no need to NMS again
            # crop from conv4 with dets (maybe plus gts)
            # feedforward re-id branch
            # resizing during ROIalign?
            iou = pairwise_iou(boxes, gt_boxes)  # are the gt boxes resized?
            tp_mask = tf.reduce_max(iou, axis=1) >= cfg.RE_ID.IOU_THRESH
            iou = tf.boolean_mask(iou, tp_mask)

            # return iou to debug

            def re_id_loss(pred_boxes, pred_matching_gt_ids, featuremap):
                with tf.variable_scope('id_head'):
                    num_of_samples_used = tf.get_variable(
                        'num_of_samples_used', initializer=0, trainable=False)
                    num_of_samples_used = num_of_samples_used.assign_add(
                        tf.shape(pred_boxes)[0])

                    boxes_on_featuremap = pred_boxes * (1.0 /
                                                        cfg.RPN.ANCHOR_STRIDE)
                    # name scope?
                    # stop gradient
                    roi_resized = roi_align(featuremap, boxes_on_featuremap,
                                            14)
                    feature_idhead = resnet_conv5(
                        roi_resized,
                        cfg.BACKBONE.RESNET_NUM_BLOCK[-1])  # nxcx7x7
                    feature_gap = GlobalAvgPooling(
                        'gap', feature_idhead, data_format='channels_first')

                    init = tf.variance_scaling_initializer()
                    hidden = FullyConnected('fc6',
                                            feature_gap,
                                            1024,
                                            kernel_initializer=init,
                                            activation=tf.nn.relu)
                    hidden = FullyConnected('fc7',
                                            hidden,
                                            1024,
                                            kernel_initializer=init,
                                            activation=tf.nn.relu)
                    hidden = FullyConnected('fc8',
                                            hidden,
                                            256,
                                            kernel_initializer=init,
                                            activation=tf.nn.relu)
                    id_logits = FullyConnected(
                        'class',
                        hidden,
                        cfg.DATA.NUM_ID,
                        kernel_initializer=tf.random_normal_initializer(
                            stddev=0.01))

                label_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=pred_matching_gt_ids, logits=id_logits)
                label_loss = tf.reduce_mean(label_loss, name='label_loss')

                return label_loss, num_of_samples_used

            def check_unid_pedes(iou, gt_ids, boxes, tp_mask, featuremap):
                pred_gt_ind = tf.argmax(iou, axis=1)
                # output following tensors
                # pick out the -2 class here
                pred_matching_gt_ids = tf.gather(gt_ids, pred_gt_ind)
                pred_boxes = tf.boolean_mask(boxes, tp_mask)
                # label 1 corresponds to unid pedes
                unid_ind = tf.not_equal(pred_matching_gt_ids, 1)
                pred_matching_gt_ids = tf.boolean_mask(pred_matching_gt_ids,
                                                       unid_ind)
                pred_boxes = tf.boolean_mask(pred_boxes, unid_ind)

                ret = tf.cond(
                    tf.equal(tf.size(pred_boxes), 0), lambda:
                    (tf.constant(cfg.RE_ID.STABLE_LOSS), tf.constant(0)),
                    lambda: re_id_loss(pred_boxes, pred_matching_gt_ids,
                                       featuremap))
                return ret

            with tf.name_scope('id_head'):
                # no detection has IOU > 0.7, re-id returns 0 loss
                re_id_loss, num_of_samples_used = tf.cond(
                    tf.equal(tf.size(iou), 0), lambda:
                    (tf.constant(cfg.RE_ID.STABLE_LOSS), tf.constant(0)),
                    lambda: check_unid_pedes(iou, gt_ids, boxes, tp_mask,
                                             featuremap))
                add_tensor_summary(num_of_samples_used, ['scalar'],
                                   name='num_of_samples_used')
            # for debug, use tensor name to take out the handle
            # return re_id_loss

            # pred_gt_ind = tf.argmax(iou, axis=1)
            # # output following tensors
            # # pick out the -2 class here
            # pred_gt_ids = tf.gather(gt_ids, pred_gt_ind)
            # pred_boxes = tf.boolean_mask(boxes, tp_mask)
            # unid_ind = pred_gt_ids != 1

            # return unid_ind

            # return tf.shape(boxes)[0]

            unnormed_id_loss = tf.identity(re_id_loss, name='unnormed_id_loss')
            re_id_loss = tf.divide(re_id_loss, cfg.RE_ID.LOSS_NORMALIZATION,
                                   're_id_loss')
            add_moving_summary(unnormed_id_loss)
            add_moving_summary(re_id_loss)

            wd_cost = regularize_cost('.*/W',
                                      l2_regularizer(cfg.TRAIN.WEIGHT_DECAY),
                                      name='wd_cost')

            # weights on the losses?
            total_cost = tf.add_n([
                rpn_label_loss, rpn_box_loss, fastrcnn_label_loss,
                fastrcnn_box_loss, re_id_loss, wd_cost
            ], 'total_cost')

            add_moving_summary(total_cost, wd_cost)
            return total_cost
        else:
            if cfg.RE_ID.QUERY_EVAL:
                # resize the gt_boxes in dataflow
                final_boxes = gt_boxes
            else:
                final_boxes, final_labels, _ = self.fastrcnn_inference(
                    image_shape2d, rcnn_boxes, fastrcnn_label_logits,
                    fastrcnn_box_logits)

            with tf.variable_scope('id_head'):
                preds_on_featuremap = final_boxes * (1.0 /
                                                     cfg.RPN.ANCHOR_STRIDE)
                # name scope?
                # stop gradient
                roi_resized = roi_align(featuremap, preds_on_featuremap, 14)
                feature_idhead = resnet_conv5(
                    roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1])  # nxcx7x7
                feature_gap = GlobalAvgPooling('gap',
                                               feature_idhead,
                                               data_format='channels_first')

                hidden = FullyConnected('fc6',
                                        feature_gap,
                                        1024,
                                        activation=tf.nn.relu)
                hidden = FullyConnected('fc7',
                                        hidden,
                                        1024,
                                        activation=tf.nn.relu)
                fv = FullyConnected('fc8', hidden, 256, activation=tf.nn.relu)
                id_logits = FullyConnected(
                    'class',
                    fv,
                    cfg.DATA.NUM_ID,
                    kernel_initializer=tf.random_normal_initializer(
                        stddev=0.01))

            scale = tf.sqrt(
                tf.cast(image_shape2d[0], tf.float32) /
                tf.cast(orig_shape[0], tf.float32) *
                tf.cast(image_shape2d[1], tf.float32) /
                tf.cast(orig_shape[1], tf.float32))
            rescaled_final_boxes = final_boxes / scale
            # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more.
            # rescaled_final_boxes_pre_clip = tf.identity(rescaled_final_boxes, name='re_boxes_pre_clip')
            rescaled_final_boxes = tf_clip_boxes(rescaled_final_boxes,
                                                 orig_shape)
            rescaled_final_boxes = tf.identity(rescaled_final_boxes,
                                               'rescaled_final_boxes')

            fv = tf.identity(fv, name='feature_vector')
            prob = tf.nn.softmax(id_logits, name='re_id_probs')
Exemple #12
0
def sample_cascade_rcnn_targets(boxes, gt_boxes, gt_labels, stage_num):
    """
    Sample some ROIs from all proposals for training.
    #fg is guaranteed to be > 0, because grount truth boxes are added as RoIs.

    Args:
        boxes: nx4 region proposals, floatbox
        gt_boxes: mx4, floatbox
        gt_labels: m, int32
        stage_num:

    Returns:
        sampled_boxes: tx4 floatbox, the rois
        sampled_labels: t labels, in [0, #class-1]. Positive means foreground.
        fg_inds_wrt_gt: #fg indices, each in range [0, m-1].
            It contains the matching GT of each foreground roi.
    """
    prefix = ''
    if stage_num == 1:
        prefix = '_1st'
        fg_thresh = cfg.CASCADERCNN.FG_THRESH_1ST
    if stage_num == 2:
        prefix = '_2nd'
        fg_thresh = cfg.CASCADERCNN.FG_THRESH_2ND
    elif stage_num == 3:
        prefix = '_3rd'
        fg_thresh = cfg.CASCADERCNN.FG_THRESH_3RD

    iou = pairwise_iou(boxes, gt_boxes)  # nxm
    proposal_metrics_cascade(iou, stage_num)

    # add ground truth as proposals as well
    boxes = tf.concat([boxes, gt_boxes], axis=0)  # (n+m) x 4
    iou = tf.concat([iou, tf.eye(tf.shape(gt_boxes)[0])], axis=0)  # (n+m) x m

    # #proposal=n+m from now on

    def sample_fg_bg(iou, prefix):
        fg_mask = tf.reduce_max(iou, axis=1) >= fg_thresh

        fg_inds = tf.reshape(tf.where(fg_mask), [-1])
        num_fg = tf.minimum(int(cfg.FRCNN.BATCH_PER_IM * cfg.FRCNN.FG_RATIO),
                            tf.size(fg_inds),
                            name='num_fg' + prefix)
        fg_inds = tf.random_shuffle(fg_inds)[:num_fg]

        bg_inds = tf.reshape(tf.where(tf.logical_not(fg_mask)), [-1])
        num_bg = tf.minimum(cfg.FRCNN.BATCH_PER_IM - num_fg,
                            tf.size(bg_inds),
                            name='num_bg' + prefix)
        bg_inds = tf.random_shuffle(bg_inds)[:num_bg]

        add_moving_summary(num_fg, num_bg)
        return fg_inds, bg_inds

    fg_inds, bg_inds = sample_fg_bg(iou, prefix)
    # fg,bg indices w.r.t proposals

    best_iou_ind = tf.argmax(iou, axis=1)  # #proposal, each in 0~m-1
    fg_inds_wrt_gt = tf.gather(best_iou_ind, fg_inds)  # num_fg

    all_indices = tf.concat([fg_inds, bg_inds],
                            axis=0)  # indices w.r.t all n+m proposal boxes
    ret_boxes = tf.gather(boxes, all_indices)

    ret_labels = tf.concat([
        tf.gather(gt_labels, fg_inds_wrt_gt),
        tf.zeros_like(bg_inds, dtype=tf.int64)
    ],
                           axis=0)
    # stop the gradient -- they are meant to be training targets
    return tf.stop_gradient(ret_boxes, name='sampled_proposal_boxes'+prefix), \
        tf.stop_gradient(ret_labels, name='sampled_labels'+prefix), \
        tf.stop_gradient(fg_inds_wrt_gt)
Exemple #13
0
def rpn_losses_iou(anchor_labels, anchor_boxes, gt_boxes, rpn_boxes,
                   label_logits, box_logits, iou_logits):
    """
    Args:
        anchor_labels: fHxfWxNA
        anchor_boxes: fHxfWxNAx4, encoded
        gt_boxes:
        rpn_boxes: fHxfWxNA decoded
        label_logits:  fHxfWxNA
        box_logits: fHxfWxNAx4
        iou_logits:  fHxfWxNA

    Returns:
        label_loss, box_loss, iou_loss
    """
    with tf.device('/cpu:0'):
        valid_mask = tf.stop_gradient(tf.not_equal(anchor_labels, -1))
        pos_mask = tf.stop_gradient(tf.equal(anchor_labels, 1))
        nr_valid = tf.stop_gradient(tf.count_nonzero(valid_mask,
                                                     dtype=tf.int32),
                                    name='num_valid_anchor')
        nr_pos = tf.identity(tf.count_nonzero(pos_mask, dtype=tf.int32),
                             name='num_pos_anchor')
        # nr_pos is guaranteed >0 in C4. But in FPN. even nr_valid could be 0.

        valid_anchor_labels = tf.boolean_mask(anchor_labels, valid_mask)
    valid_label_logits = tf.boolean_mask(label_logits, valid_mask)

    with tf.name_scope('label_metrics'):
        valid_label_prob = tf.nn.sigmoid(valid_label_logits)
        summaries = []
        with tf.device('/cpu:0'):
            for th in [0.5, 0.2, 0.1]:
                valid_prediction = tf.cast(valid_label_prob > th, tf.int32)
                nr_pos_prediction = tf.reduce_sum(valid_prediction,
                                                  name='num_pos_prediction')
                pos_prediction_corr = tf.count_nonzero(tf.logical_and(
                    valid_label_prob > th,
                    tf.equal(valid_prediction, valid_anchor_labels)),
                                                       dtype=tf.int32)
                placeholder = 0.5  # A small value will make summaries appear lower.
                recall = tf.to_float(tf.truediv(pos_prediction_corr, nr_pos))
                recall = tf.where(tf.equal(nr_pos, 0),
                                  placeholder,
                                  recall,
                                  name='recall_th{}'.format(th))
                precision = tf.to_float(
                    tf.truediv(pos_prediction_corr, nr_pos_prediction))
                precision = tf.where(tf.equal(nr_pos_prediction, 0),
                                     placeholder,
                                     precision,
                                     name='precision_th{}'.format(th))
                summaries.extend([precision, recall])
        add_moving_summary(*summaries)

    # Per-level loss summaries in FPN may appear lower due to the use of a small placeholder.
    # But the total RPN loss will be fine.  TODO make the summary op smarter
    placeholder = 0.
    ce_loss = tf.nn.sigmoid_cross_entropy_with_logits(
        labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits)
    # label_loss = tf.reduce_sum(label_loss) * (1. / cfg.RPN.BATCH_PER_IM)
    # label_loss = tf.where(tf.equal(nr_valid, 0), placeholder, label_loss, name='label_loss')

    #    alpha = 0.75
    #    gamma = 2.0
    #    probs = tf.sigmoid(valid_label_logits)
    #    alpha_t = tf.ones_like(valid_label_logits) * alpha
    #    alpha_t = tf.where(valid_anchor_labels > 0, alpha_t, 1.0 - alpha_t)
    #    probs_t = tf.where(valid_anchor_labels > 0, probs, 1.0 - probs)
    #    weight_matrix = alpha_t * tf.pow((1.0 - probs_t), gamma)
    #    # label_loss = tf.reduce_sum(weight_matrix * label_loss) * (1. / cfg.RPN.BATCH_PER_IM)
    #
    #    label_loss = weight_matrix * ce_loss
    #
    #    #n_pos = tf.reduce_sum(valid_anchor_labels)
    #    n_false = tf.reduce_sum(tf.cast(tf.greater(ce_loss, -tf.log(0.5)), tf.float32))
    #    def has_pos():
    #        return tf.reduce_sum(label_loss) / tf.cast(n_false, tf.float32)
    #    def no_pos():
    #        return tf.reduce_sum(label_loss)
    #    label_loss = tf.cond(n_false > 0, has_pos, no_pos)
    #    label_loss = tf.where(tf.equal(nr_valid, 0), placeholder, label_loss, name='label_loss')
    # find the most wrongly classified examples:

    n_selected = cfg.FRCNN.BATCH_PER_IM
    n_selected = tf.cast(n_selected, tf.int32)
    n_selected = tf.minimum(n_selected, tf.size(valid_anchor_labels))

    #    label_loss = alpha_t * label_loss

    vals, _ = tf.nn.top_k(ce_loss, k=n_selected)
    try:
        th = vals[-1]
    except:
        th = 1
    selected_mask = ce_loss >= th
    loss_weight = tf.cast(selected_mask, tf.float32)
    label_loss = tf.reduce_sum(
        ce_loss * loss_weight) * 1. / tf.reduce_sum(loss_weight)
    label_loss = tf.where(tf.equal(nr_valid, 0),
                          placeholder,
                          label_loss,
                          name='label_loss')

    pos_anchor_boxes = tf.boolean_mask(anchor_boxes, pos_mask)
    pos_box_logits = tf.boolean_mask(box_logits, pos_mask)
    delta = 1.0 / 9
    # box_loss = tf.losses.huber_loss(
    #    pos_anchor_boxes, pos_box_logits, delta=delta,
    #    reduction=tf.losses.Reduction.SUM) / delta
    box_loss = tf.losses.huber_loss(pos_anchor_boxes,
                                    pos_box_logits,
                                    reduction=tf.losses.Reduction.SUM)
    box_loss = box_loss * (50. / cfg.RPN.BATCH_PER_IM)
    box_loss = tf.where(tf.equal(nr_pos, 0),
                        placeholder,
                        box_loss,
                        name='box_loss')

    # iou loss: smooth l1 loss
    rpn_boxes = tf.reshape(rpn_boxes, [-1, 4])
    gt_boxes = tf.reshape(gt_boxes, [-1, 4])
    iou = pairwise_iou(rpn_boxes, gt_boxes)  # nxm
    max_iou = tf.reduce_max(iou, axis=1)
    # if only bg gt_boxes, all ious are 0.
    max_iou = tf.where(tf.equal(nr_pos, 0), tf.zeros_like(max_iou), max_iou)
    max_iou = tf.stop_gradient(tf.reshape(max_iou, [-1]),
                               name='rpn_box_gt_iou')

    iou_logits = tf.nn.sigmoid(iou_logits)
    iou_logits = tf.reshape(iou_logits, [-1])
    iou_loss = tf.losses.huber_loss(max_iou, iou_logits, reduction='none')

    n_selected = cfg.FRCNN.BATCH_PER_IM
    n_selected = tf.cast(n_selected, tf.int32)

    vals, _ = tf.nn.top_k(iou_loss, k=n_selected)
    th = vals[-1]
    selected_mask = iou_loss >= th
    loss_weight = tf.cast(selected_mask, tf.float32)
    iou_loss = tf.reduce_sum(
        iou_loss * loss_weight) * 1. / tf.reduce_sum(loss_weight)
    iou_loss = tf.identity(iou_loss, name='iou_loss')

    add_moving_summary(label_loss, box_loss, iou_loss, nr_valid, nr_pos)
    return label_loss, box_loss, iou_loss
Exemple #14
0
def get_mask_single_iou(curr_damage_anchors_batch, house_bboxes, iou_thr):
    iou_matrix = pairwise_iou(curr_damage_anchors_batch, house_bboxes)
    iou_max = tf.math.reduce_max(iou_matrix, axis=1)
    mask = tf.greater(iou_max, tf.constant(iou_thr, dtype=tf.float32))
    return mask