Ejemplo n.º 1
0
    def im_detect(self, im):

        cls_prob = self.cls_prob
        bbox_pred = self.bbox_pred
        rois = self.rois

        if isinstance(rois, tuple):
            rois = rois[0]

        cls_prob = np.reshape(cls_prob,
                              [-1, cfg.ZLRM.N_CLASSES + 1])  # (R, C+1)
        bbox_pred = np.reshape(bbox_pred,
                               [-1,
                                (cfg.ZLRM.N_CLASSES + 1) * 4])  # (R, (C+1)x4)
        rois = np.array(rois)
        boxes = rois[:, 1:5] / self.im_scale

        scores = cls_prob

        # Apply bounding-box regression deltas
        box_deltas = bbox_pred
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)

        return scores, pred_boxes
Ejemplo n.º 2
0
def im_detect(sess, net, im):

    blobs, im_scales = _get_blobs(im)

    im_blob = blobs['data']
    blobs['im_info'] = np.array(
        [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)
    # forward pass
    feed_dict = {net.data: blobs['data'], net.im_info: blobs['im_info']}

    cls_prob, bbox_pred, rois = \
        sess.run([net.get_output('cls_prob'), net.get_output('ave_bbox_pred_rois'),
                  net.get_output('rois')], \
                 feed_dict=feed_dict)

    if isinstance(rois, tuple):
        rois = rois[0]

    cls_prob = np.reshape(cls_prob, [-1, cfg.ZLRM.N_CLASSES + 1])  # (R, C+1)
    bbox_pred = np.reshape(bbox_pred,
                           [-1, (cfg.ZLRM.N_CLASSES + 1) * 4])  # (R, (C+1)x4)
    rois = np.array(rois)
    assert len(im_scales) == 1, "Only single-image batch implemented"
    boxes = rois[:, 1:5] / im_scales[0]

    scores = cls_prob

    if cfg.ZLRM.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)

    return scores, pred_boxes
Ejemplo n.º 3
0
def _process_boxes_scores(cls_prob, bbox_pred, rois, im_scale, im_shape):
    """
    process the output tensors, to get the boxes and scores
    """
    assert rois.shape[0] == bbox_pred.shape[0],\
        'rois and bbox_pred must have the same shape'
    boxes = rois[:, 1:5]
    scores = cls_prob
    if cfg.ZLRM.TEST.BBOX_REG:
        pred_boxes = bbox_transform_inv(boxes, deltas=bbox_pred)
        pred_boxes = clip_boxes(pred_boxes, im_shape)
    else:
        # Simply repeat the boxes, once for each class
        # boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes = clip_boxes(boxes, im_shape)
    return pred_boxes, scores
Ejemplo n.º 4
0
def proposal_layer(rpn_cls_prob_reshape_P2, rpn_bbox_pred_P2, \
                   rpn_cls_prob_reshape_P3, rpn_bbox_pred_P3, \
                   rpn_cls_prob_reshape_P4, rpn_bbox_pred_P4, \
                   rpn_cls_prob_reshape_P5, rpn_bbox_pred_P5, \
                   rpn_cls_prob_reshape_P6, rpn_bbox_pred_P6, \
                   im_info, cfg_train_key = True, _feat_strides = cfg.ZLRM.FPN_FEAT_STRIDE[2:], \
                   anchor_sizes = cfg.ZLRM.FPN_ANCHOR_SIZE[2:]): # anchor_scales = [8, 8, 8, 8, 8]
    """
    Parameters
    ----------
    rpn_cls_prob_reshape_P: (1 , H(P), W(P), A(P)x2) outputs of RPN, prob of bg or fg on pyramid layer P
    rpn_bbox_pred_P: (1 , H(P), W(P), A(P)x4), rgs boxes output of RPN on pyramid layer P
    im_info: a list of [image_height, image_width, scale_ratios]
    cfg_key: 'TRAIN' or 'TEST'
    _feat_strides: the downsampling ratio of feature map to the original input image on each pyramid layer
    anchor_sizes: the absolute anchor sizes on each pyramid layer
    ----------
    Returns
    ----------
    rpn_rois : (sum(H x W x A), 5) e.g. [0, x1, y1, x2, y2]

    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    #layer_params = yaml.load(self.param_str_)

    """
    anchor_scales = np.array(anchor_sizes) / np.array(_feat_strides)

    # _anchors = [generate_anchors(base_size=_feat_stride, scales=[anchor_scale]) for _feat_stride, anchor_scale in zip(_feat_strides, anchor_scales)]
    _anchors = [[], [], [], [], []]
    _anchors[0] = generate_anchors(base_size=_feat_strides[0], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[0]]))
    _anchors[1] = generate_anchors(base_size=_feat_strides[1], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[1]]))
    _anchors[2] = generate_anchors(base_size=_feat_strides[2], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[2]]))
    _anchors[3] = generate_anchors(base_size=_feat_strides[3], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[3]]))
    _anchors[4] = generate_anchors(base_size=_feat_strides[4], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[4]]))

    _num_anchors = [anchor.shape[0] for anchor in _anchors]

    im_info = im_info[0]

    #assert rpn_cls_prob_reshape.shape[0] == 1, \
    #    'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
    #cfg_key = 'TEST'
    if cfg_train_key==True:
        # print('使用TEST')
        pre_nms_topN = cfg.ZLRM.TRAIN.RPN_PRE_NMS_TOP_N  # 12000
        post_nms_topN = cfg.ZLRM.TRAIN.RPN_POST_NMS_TOP_N  # 2000
        nms_thresh = cfg.ZLRM.TRAIN.RPN_NMS_THRESH  # 0.7
        min_size = cfg.ZLRM.TRAIN.RPN_MIN_SIZE  # 16
    else:
        pre_nms_topN = cfg.ZLRM.TEST.RPN_PRE_NMS_TOP_N  # 6000
        post_nms_topN = cfg.ZLRM.TEST.RPN_POST_NMS_TOP_N  # 300
        nms_thresh = cfg.ZLRM.TEST.RPN_NMS_THRESH  # 0.7
        min_size = cfg.ZLRM.TEST.RPN_MIN_SIZE  # 16

    rpn_cls_prob_reshapes = [rpn_cls_prob_reshape_P2, rpn_cls_prob_reshape_P3, rpn_cls_prob_reshape_P4, rpn_cls_prob_reshape_P5, rpn_cls_prob_reshape_P6]
    bbox_deltas = [rpn_bbox_pred_P2, rpn_bbox_pred_P3, rpn_bbox_pred_P4, rpn_bbox_pred_P5, rpn_bbox_pred_P6]

    heights = [rpn_cls_prob_reshape.shape[1] for rpn_cls_prob_reshape in rpn_cls_prob_reshapes]
    widths = [rpn_cls_prob_reshape.shape[2] for rpn_cls_prob_reshape in rpn_cls_prob_reshapes]

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    # (4, 1, H, W, A(x))  --> (1, H, W, stack(A))
    scores = [np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchor, 2])[:,:,:,:,1],
                [-1, 1])
                for height, width, rpn_cls_prob_reshape, _num_anchor in
                zip(heights, widths, rpn_cls_prob_reshapes, _num_anchors)]

    # scores are (1 * H(P) * W(P) * A(P), 1) format
    # reshape to (sum(1 * H * W * A), 1) where rows are ordered by (h, w, a)
    scores = np.concatenate(scores, axis=0)

    if DEBUG:
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))

    # 1. Generate proposals from bbox deltas and shifted anchors
    if DEBUG:
        print('score map size: {}'.format(scores.shape))

    def gen_shift(height, width, _feat_stride):
        # Enumerate all shifts
        shift_x = np.arange(0, width) * _feat_stride
        shift_y = np.arange(0, height) * _feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shift = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()
        return shift

    shifts = [gen_shift(height, width, _feat_stride)
              for height, width, _feat_stride in zip(heights, widths, _feat_strides)]

    # Enumerate all shifted anchors:
    #
    # add A anchors (4, 1, A(x), 4) to
    # cell K shifts (4, K, 1, 4) to get
    # shift anchors (4, K, A(x), 4)
    # reshape to (K*stack(A), 4) shifted anchors
    As = _num_anchors
    Ks = [shift.shape[0] for shift in shifts]
    anchors = [_anchor.reshape((1, A, 4)) +
               shift.reshape((1, K, 4)).transpose((1, 0, 2))
               for A, K, _anchor, shift in zip(As, Ks, _anchors, shifts)]
    anchors = [anchor.reshape((K * A, 4))
               for anchor, A, K in zip(anchors, As, Ks)]
    anchors = np.concatenate(anchors, axis=0)

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A(x), H, W) format
    # transpose to (1, H, W, 4 * A(x))
    # reshape to (1 * H * W * A(x), 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order

    #bbox_deltas = bbox_deltas.reshape((-1, 4)) #(HxWxA, 4)

    bbox_deltas = [bbox_delta.reshape((-1, 4)) for bbox_delta in bbox_deltas]
    bbox_deltas = np.concatenate(bbox_deltas, axis=0)

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas)

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info[:2])

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals, min_size * im_info[2])
    proposals = proposals[keep, :]
    scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    rpn_rois = blob

    if cfg_train_key == False:
        # assign rois to level Pk    (P2 ~ P6)
        def calc_level(width, height):
            return min(6, max(2, int(4 + np.log2(np.sqrt(width * height) / 224))))

        level = lambda roi : calc_level(roi[3] - roi[1], roi[4] - roi[2])   # roi: [0, x0, y0, x1, y1]

        leveled_rois = [None] * 5
        leveled_idxs = [[], [], [], [], []]
        for idx, roi in enumerate(rpn_rois):
            level_idx = level(roi) - 2
            leveled_idxs[level_idx].append(idx)

        for level_idx in range(0, 5):
            leveled_rois[level_idx] = rpn_rois[leveled_idxs[level_idx]]

        rpn_rois = np.concatenate(leveled_rois, axis=0)

        return leveled_rois[0], leveled_rois[1], leveled_rois[2], leveled_rois[3], leveled_rois[4], rpn_rois

    return rpn_rois
Ejemplo n.º 5
0
def proposal_layer(rpn_cls_prob_reshape,
                   rpn_bbox_pred,
                   im_info,
                   cfg_key=True,
                   _feat_stride=cfg.ZLRM.RESNET_50_FEAT_STRIDE,
                   anchor_scales=cfg.ZLRM.ANCHOR_SCALE):
    """
    Parameters
    ----------
    rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg
                         NOTICE: the old version is ordered by (1, H, W, 2, A) !!!!
    rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN
    im_info: a list of [image_height, image_width, scale_ratios]
    cfg_key: 'TRAIN' or 'TEST'
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    Returns
    ----------
    rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2]

    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    #layer_params = yaml.load(self.param_str_)

    """
    _anchors = generate_anchors(scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]
    # rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape,[0,3,1,2]) #-> (1 , 2xA, H , W)
    # rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,1,2])              # -> (1 , Ax4, H , W)

    #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1])
    #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1])
    im_info = im_info[0]

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
    #cfg_key = 'TEST'
    # print('========================', cfg.ZLRM.TRAIN.RPN_PRE_NMS_TOP_N)
    # print('===================', cfg_key)
    if cfg_key == True:
        # print('使用TEST')
        pre_nms_topN = cfg.ZLRM.TRAIN.RPN_PRE_NMS_TOP_N  # 12000
        post_nms_topN = cfg.ZLRM.TRAIN.RPN_POST_NMS_TOP_N  # 2000
        nms_thresh = cfg.ZLRM.TRAIN.RPN_NMS_THRESH  # 0.7
        min_size = cfg.ZLRM.TRAIN.RPN_MIN_SIZE  # 16
    else:
        pre_nms_topN = cfg.ZLRM.TEST.RPN_PRE_NMS_TOP_N  # 6000
        post_nms_topN = cfg.ZLRM.TEST.RPN_POST_NMS_TOP_N  # 300
        nms_thresh = cfg.ZLRM.TEST.RPN_NMS_THRESH  # 0.7
        min_size = cfg.ZLRM.TEST.RPN_MIN_SIZE  # 16

    height, width = rpn_cls_prob_reshape.shape[1:3]

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    # (1, H, W, A)
    scores = np.reshape(
        np.reshape(rpn_cls_prob_reshape,
                   [1, height, width, _num_anchors, 2])[:, :, :, :, 1],
        [1, height, width, _num_anchors])

    # TODO: NOTICE: the old version is ordered by (1, H, W, 2, A) !!!!
    # TODO: if you use the old trained model, VGGnet_fast_rcnn_iter_70000.ckpt, uncomment this line
    # scores = rpn_cls_prob_reshape[:,:,:,_num_anchors:]

    bbox_deltas = rpn_bbox_pred
    #im_info = bottom[2].data[0, :]

    if DEBUG:
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))

    # 1. Generate proposals from bbox deltas and shifted anchors
    if DEBUG:
        print('score map size: {}'.format(scores.shape))

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.reshape((-1, 4))  #(HxWxA, 4)

    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    scores = scores.reshape((-1, 1))

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas)

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info[:2])

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals, min_size * im_info[2])
    proposals = proposals[keep, :]
    scores = scores[keep]

    # # remove irregular boxes, too fat too tall
    # keep = _filter_irregular_boxes(proposals)
    # proposals = proposals[keep, :]
    # scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    # dets = np.hstack((blob, scores)).astype(np.float32)
    # print(dets.shape)
    # print('jjjjj=============', dets[:, -1])
    return blob, scores