Exemple #1
0
def demo(sess, net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im = readimage(image_name)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    # print('rois--------------', scores)
    print('Detection took {:.3f}s for '
          '{:d} object proposals'.format(timer.total_time, boxes.shape[0]))

    CONF_THRESH = 0.7
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        vis(im, image_name, cls, dets, thresh=CONF_THRESH)
Exemple #2
0
    def detect(self, image):
        """Detect object classes in an image using pre-computed object proposals."""

        # Load the demo image
        # Detect all object classes and regress object bounds
        image = image_transform_1_3(image)
        timer = Timer()
        timer.tic()
        scores, boxes = self.im_detect(image)
        timer.toc()
        # print('rois--------------', scores)
        print('Detection took {:.3f}s for '
              '{:d} object proposals'.format(timer.total_time, boxes.shape[0]))

        CONF_THRESH = 0.7
        NMS_THRESH = 0.1
        for cls_ind, cls in enumerate(self.classes_detect[1:]):
            cls_ind += 1  # because we skipped background
            cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack(
                (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets, NMS_THRESH)
            dets = dets[keep, :]
            inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
            dets = dets[inds, :]
        return dets
Exemple #3
0
    def detect(self, image):
        """Detect object classes in an image using pre-computed object proposals."""

        # Load the demo image
        # Detect all object classes and regress object bounds
        image = image_transform_1_3(image)
        timer = Timer()
        timer.tic()
        scores, boxes = self.im_detect(image)
        timer.toc()
        print('rois--------------', scores)
        print('Detection took {:.3f}s for '
              '{:d} object proposals'.format(timer.total_time, len(boxes)))

        CONF_THRESH = 0.3
        # print(scores)
        NMS_THRESH = 0.5
        dets = []
        for i in range(len(boxes)):
            # print('lll')
            cls_boxes = boxes[i]
            cls_scores = scores[i]
            dets_i_ = np.hstack([cls_boxes[:, 0:4], cls_scores])
            keep = nms(dets_i_, NMS_THRESH)
            dets_i = np.hstack([cls_boxes, cls_scores])
            dets_i = dets_i[keep, :]
            inds = np.where(dets_i[:, -1] >= CONF_THRESH)[0]
            dets_i = dets_i[inds, :]
            dets_i = dets_i[:, 0:5]
            dets.append(dets_i)
        return dets
Exemple #4
0
    def detect(self, image):
        """Detect object classes in an image using pre-computed object proposals."""

        # Load the demo image
        # Detect all object classes and regress object bounds
        image = image_transform_1_3(image)
        timer = Timer()
        timer.tic()
        scores, boxes = self.im_detect(image)
        timer.toc()
        print('kkk', np.argmax(scores, axis=1))
        print('lll', scores[np.argmax(scores, axis=1) == 4, 4])
        print('Detection took {:.3f}s for '
              '{:d} object proposals'.format(timer.total_time, boxes.shape[0]))

        CONF_THRESH = 0.3
        NMS_THRESH = 0.5
        dets_list = []
        for cls_ind, cls in enumerate(self.classes_detect[1:]):
            inds = np.where(scores[:, cls_ind] > CONF_THRESH)[0]
            cls_ind += 1  # because we skipped background
            cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack(
                (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets[inds, :], NMS_THRESH)
            dets = dets[keep, :]
            inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
            cls_ind_list = np.empty((len(inds), 1), np.int32)
            cls_ind_list.fill(cls_ind)
            dets = np.hstack((dets[inds, :-1], cls_ind_list))
            dets_list.append(dets)
        dets = np.vstack(dets_list)
        print('jjj', dets)
        return dets
Exemple #5
0
def conduct_nms(class_ids, refined_rois, class_scores, keep, config):
    """per SAMPLE operation; no batch size dim!
    Args:
        class_ids       [say 1000]
        refined_rois    [1000 4]
        class_scores    [1000]
        keep            [True, False, ...] altogether 1000
        config          config
    Returns:
        detection:      [DET_MAX_INSTANCES, (y1, x1, y2, x2, class_id, class_score)]
    """
    pre_nms_class_ids = class_ids[keep]
    pre_nms_scores = class_scores[keep]
    pre_nms_rois = refined_rois[torch.nonzero(keep).squeeze(), :]
    _indx = torch.nonzero(keep).squeeze()

    # conduct nms per CLASS
    for i, class_id in enumerate(unique1d(pre_nms_class_ids)):

        # Pick detections of this class
        ixs = torch.nonzero(class_id == pre_nms_class_ids).squeeze()

        ix_scores = pre_nms_scores[ixs]
        ix_rois = pre_nms_rois[ixs, :]

        # Sort
        ix_scores, order = ix_scores.sort(descending=True)
        ix_rois = ix_rois[order, :]

        class_keep = nms(
            torch.cat((ix_rois, ix_scores.unsqueeze(1)),
                      dim=1).unsqueeze(0).data,
            config.TEST.DET_NMS_THRESHOLD)[0]

        # Map indices
        class_keep = _indx[ixs[order[class_keep.tolist()]]]

        if i == 0:
            nms_keep = class_keep
        else:
            nms_keep = unique1d(torch.cat((nms_keep, class_keep)))

    nms_indx = intersect1d(_indx, nms_keep)

    # Keep top detections
    roi_count = config.TEST.DET_MAX_INSTANCES
    top_ids = class_scores[nms_indx].sort(descending=True)[1][:roi_count]
    # final_index is the true index among the input samples (say 1000)
    final_index = nms_indx[top_ids].squeeze()

    # Arrange output as [DET_MAX_INSTANCES, (y1, x1, y2, x2, class_id, score)]
    # Coordinates are in image domain.
    detections = torch.cat((refined_rois[final_index],
                            class_ids[final_index].unsqueeze(1).float(),
                            class_scores[final_index].unsqueeze(1)),
                           dim=1)
    return detections, final_index
Exemple #6
0
    def detect(self, text_proposals, scores, size):
        # 删除得分较低的proposal
        keep_inds = np.where(
            scores > cfg["TEXT"]["TEXT_PROPOSALS_MIN_SCORE"])[0]
        text_proposals, scores = text_proposals[keep_inds], scores[keep_inds]

        # 按得分排序
        sorted_indices = np.argsort(scores.ravel())[::-1]
        text_proposals, scores = text_proposals[sorted_indices], scores[
            sorted_indices]

        # 对proposal做nms
        # print('text_proposals, scores', text_proposals.shape, scores.shape)

        keep_inds = nms(np.hstack((text_proposals, scores)),
                        cfg["TEXT"]["TEXT_PROPOSALS_NMS_THRESH"])
        # keep_inds = soft_nms(np.hstack((text_proposals, scores)),threshold=TextLineCfg.TEXT_PROPOSALS_NMS_THRESH)
        text_proposals, scores = text_proposals[keep_inds], scores[keep_inds]
        # 获取检测结果
        text_recs = self.text_proposal_connector.get_text_lines(
            text_proposals, scores, size)
        keep_inds = self.filter_boxes(text_recs)
        return text_proposals, scores, text_recs[keep_inds]
Exemple #7
0
def detect(sess, net, image):

    image = image_transform_1_3(image)
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(sess, net, image)
    timer.toc()
    # print('rois--------------', scores)
    print('Detection took {:.3f}s for '
          '{:d} object proposals'.format(timer.total_time, boxes.shape[0]))

    CONF_THRESH = 0.7
    NMS_THRESH = 0.1
    for cls_ind, cls in enumerate(CLASSES_DEFECT[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
        dets = dets[inds, :]
    return dets
def proposal_layer(inputs, proposal_count, nms_threshold, priors, config=None):
    """Receives anchor scores and selects a subset to pass as proposals
    to the second stage. Filtering is done based on anchor scores and
    non-max suppression to remove overlaps. It also applies bounding
    box refinement details to anchors.
    Args:
        inputs
            [0] rpn_probs:  [batch, anchors, (bg prob, fg prob)]
            [1] rpn_bbox:   [batch, anchors, (dy, dx, log(dh), log(dw))]
        proposal_count:     maximum output
        nms_threshold:      for proposal
        priors:             anchors
        config:             configuration
    Returns:
        Proposals in normalized coordinates [batch, rois, (y1, x1, y2, x2)]
    """
    anchors = Variable(priors.cuda(), requires_grad=False)
    bs, prior_num = inputs[0].size(0), anchors.size(0)
    # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1]
    scores = inputs[0][:, :, 1]

    # Box deltas [batch, num_rois, 4]
    deltas = inputs[1]
    std_dev = Variable(torch.from_numpy(np.reshape(config.DATA.BBOX_STD_DEV, [1, 1, 4])).float(),
                       requires_grad=False).cuda()
    deltas = deltas * std_dev

    anchors = anchors.expand(bs, anchors.size(0), anchors.size(1))

    # Improve performance by trimming to top anchors by score
    # and doing the rest on the smaller subset.
    pre_nms_limit = min(config.RPN.PRE_NMS_LIMIT, prior_num)
    scores, order = scores.sort(descending=True)
    scores = scores[:, :pre_nms_limit]
    order = order[:, :pre_nms_limit]

    deltas_trim = Variable(torch.FloatTensor(bs, pre_nms_limit, 4).cuda())
    anchors_trim = Variable(torch.FloatTensor(bs, pre_nms_limit, 4).cuda())
    # index two-dim (out_of_mem if directly index order.data)
    for i in range(bs):
        deltas_trim[i] = deltas[i][order.data[i], :]
        anchors_trim[i] = anchors[i][order.data[i], :]

    # Apply deltas to anchors to get refined anchors.
    # [batch, N, (y1, x1, y2, x2)]
    # TODO (mid): nan or inf in initial iter
    boxes = apply_box_deltas(anchors_trim, deltas_trim)

    # Clip to image boundaries. [batch, N, (y1, x1, y2, x2)]
    height, width = config.DATA.IMAGE_SHAPE[:2]
    window = np.array([0, 0, height, width]).astype(np.float32)
    window = Variable(torch.from_numpy(window).cuda(), requires_grad=False)
    boxes = clip_boxes(boxes, window)

    # Filter out small boxes
    # According to Xinlei Chen's paper, this reduces detection accuracy
    # for small objects, so we're skipping it.

    # Non-max suppression
    keep = nms(torch.cat((boxes, scores.unsqueeze(2)), 2).data, nms_threshold)
    keep = keep[:, :proposal_count]
    boxes_keep = Variable(torch.FloatTensor(bs, keep.shape[1], 4).cuda())  # bs, proposal_count(1000), 4
    for i in range(bs):
        boxes_keep[i] = boxes[i][keep[i], :]

    # Normalize dimensions to range of 0 to 1.
    norm = Variable(torch.from_numpy(np.array([height, width, height, width])).float(), requires_grad=False).cuda()
    normalized_boxes = boxes_keep / norm

    return normalized_boxes   # proposals
Exemple #9
0
def proposal_layer(rpn_cls_prob_reshape_P2, rpn_bbox_pred_P2, \
                   rpn_cls_prob_reshape_P3, rpn_bbox_pred_P3, \
                   rpn_cls_prob_reshape_P4, rpn_bbox_pred_P4, \
                   rpn_cls_prob_reshape_P5, rpn_bbox_pred_P5, \
                   rpn_cls_prob_reshape_P6, rpn_bbox_pred_P6, \
                   im_info, cfg_train_key = True, _feat_strides = cfg.ZLRM.FPN_FEAT_STRIDE[2:], \
                   anchor_sizes = cfg.ZLRM.FPN_ANCHOR_SIZE[2:]): # anchor_scales = [8, 8, 8, 8, 8]
    """
    Parameters
    ----------
    rpn_cls_prob_reshape_P: (1 , H(P), W(P), A(P)x2) outputs of RPN, prob of bg or fg on pyramid layer P
    rpn_bbox_pred_P: (1 , H(P), W(P), A(P)x4), rgs boxes output of RPN on pyramid layer P
    im_info: a list of [image_height, image_width, scale_ratios]
    cfg_key: 'TRAIN' or 'TEST'
    _feat_strides: the downsampling ratio of feature map to the original input image on each pyramid layer
    anchor_sizes: the absolute anchor sizes on each pyramid layer
    ----------
    Returns
    ----------
    rpn_rois : (sum(H x W x A), 5) e.g. [0, x1, y1, x2, y2]

    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    #layer_params = yaml.load(self.param_str_)

    """
    anchor_scales = np.array(anchor_sizes) / np.array(_feat_strides)

    # _anchors = [generate_anchors(base_size=_feat_stride, scales=[anchor_scale]) for _feat_stride, anchor_scale in zip(_feat_strides, anchor_scales)]
    _anchors = [[], [], [], [], []]
    _anchors[0] = generate_anchors(base_size=_feat_strides[0], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[0]]))
    _anchors[1] = generate_anchors(base_size=_feat_strides[1], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[1]]))
    _anchors[2] = generate_anchors(base_size=_feat_strides[2], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[2]]))
    _anchors[3] = generate_anchors(base_size=_feat_strides[3], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[3]]))
    _anchors[4] = generate_anchors(base_size=_feat_strides[4], ratios=cfg.ZLRM.ANCHOR_RATIO, scales=np.array([anchor_scales[4]]))

    _num_anchors = [anchor.shape[0] for anchor in _anchors]

    im_info = im_info[0]

    #assert rpn_cls_prob_reshape.shape[0] == 1, \
    #    'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
    #cfg_key = 'TEST'
    if cfg_train_key==True:
        # print('使用TEST')
        pre_nms_topN = cfg.ZLRM.TRAIN.RPN_PRE_NMS_TOP_N  # 12000
        post_nms_topN = cfg.ZLRM.TRAIN.RPN_POST_NMS_TOP_N  # 2000
        nms_thresh = cfg.ZLRM.TRAIN.RPN_NMS_THRESH  # 0.7
        min_size = cfg.ZLRM.TRAIN.RPN_MIN_SIZE  # 16
    else:
        pre_nms_topN = cfg.ZLRM.TEST.RPN_PRE_NMS_TOP_N  # 6000
        post_nms_topN = cfg.ZLRM.TEST.RPN_POST_NMS_TOP_N  # 300
        nms_thresh = cfg.ZLRM.TEST.RPN_NMS_THRESH  # 0.7
        min_size = cfg.ZLRM.TEST.RPN_MIN_SIZE  # 16

    rpn_cls_prob_reshapes = [rpn_cls_prob_reshape_P2, rpn_cls_prob_reshape_P3, rpn_cls_prob_reshape_P4, rpn_cls_prob_reshape_P5, rpn_cls_prob_reshape_P6]
    bbox_deltas = [rpn_bbox_pred_P2, rpn_bbox_pred_P3, rpn_bbox_pred_P4, rpn_bbox_pred_P5, rpn_bbox_pred_P6]

    heights = [rpn_cls_prob_reshape.shape[1] for rpn_cls_prob_reshape in rpn_cls_prob_reshapes]
    widths = [rpn_cls_prob_reshape.shape[2] for rpn_cls_prob_reshape in rpn_cls_prob_reshapes]

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    # (4, 1, H, W, A(x))  --> (1, H, W, stack(A))
    scores = [np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchor, 2])[:,:,:,:,1],
                [-1, 1])
                for height, width, rpn_cls_prob_reshape, _num_anchor in
                zip(heights, widths, rpn_cls_prob_reshapes, _num_anchors)]

    # scores are (1 * H(P) * W(P) * A(P), 1) format
    # reshape to (sum(1 * H * W * A), 1) where rows are ordered by (h, w, a)
    scores = np.concatenate(scores, axis=0)

    if DEBUG:
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))

    # 1. Generate proposals from bbox deltas and shifted anchors
    if DEBUG:
        print('score map size: {}'.format(scores.shape))

    def gen_shift(height, width, _feat_stride):
        # Enumerate all shifts
        shift_x = np.arange(0, width) * _feat_stride
        shift_y = np.arange(0, height) * _feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shift = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()
        return shift

    shifts = [gen_shift(height, width, _feat_stride)
              for height, width, _feat_stride in zip(heights, widths, _feat_strides)]

    # Enumerate all shifted anchors:
    #
    # add A anchors (4, 1, A(x), 4) to
    # cell K shifts (4, K, 1, 4) to get
    # shift anchors (4, K, A(x), 4)
    # reshape to (K*stack(A), 4) shifted anchors
    As = _num_anchors
    Ks = [shift.shape[0] for shift in shifts]
    anchors = [_anchor.reshape((1, A, 4)) +
               shift.reshape((1, K, 4)).transpose((1, 0, 2))
               for A, K, _anchor, shift in zip(As, Ks, _anchors, shifts)]
    anchors = [anchor.reshape((K * A, 4))
               for anchor, A, K in zip(anchors, As, Ks)]
    anchors = np.concatenate(anchors, axis=0)

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A(x), H, W) format
    # transpose to (1, H, W, 4 * A(x))
    # reshape to (1 * H * W * A(x), 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order

    #bbox_deltas = bbox_deltas.reshape((-1, 4)) #(HxWxA, 4)

    bbox_deltas = [bbox_delta.reshape((-1, 4)) for bbox_delta in bbox_deltas]
    bbox_deltas = np.concatenate(bbox_deltas, axis=0)

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas)

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info[:2])

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals, min_size * im_info[2])
    proposals = proposals[keep, :]
    scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    rpn_rois = blob

    if cfg_train_key == False:
        # assign rois to level Pk    (P2 ~ P6)
        def calc_level(width, height):
            return min(6, max(2, int(4 + np.log2(np.sqrt(width * height) / 224))))

        level = lambda roi : calc_level(roi[3] - roi[1], roi[4] - roi[2])   # roi: [0, x0, y0, x1, y1]

        leveled_rois = [None] * 5
        leveled_idxs = [[], [], [], [], []]
        for idx, roi in enumerate(rpn_rois):
            level_idx = level(roi) - 2
            leveled_idxs[level_idx].append(idx)

        for level_idx in range(0, 5):
            leveled_rois[level_idx] = rpn_rois[leveled_idxs[level_idx]]

        rpn_rois = np.concatenate(leveled_rois, axis=0)

        return leveled_rois[0], leveled_rois[1], leveled_rois[2], leveled_rois[3], leveled_rois[4], rpn_rois

    return rpn_rois
Exemple #10
0
def proposal_layer(inputs, proposal_count, nms_threshold, anchors, config=None):
    """Receives anchor scores and selects a subset to pass as proposals
    to the second stage. Filtering is done based on anchor scores and
    non-max suppression to remove overlaps. It also applies bounding
    box refinment detals to anchors.

    Inputs:
        rpn_probs: [batch, anchors, (bg prob, fg prob)]
        rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))]

    Returns:
        Proposals in normalized coordinates [batch, rois, (y1, x1, y2, x2)]
    """

    # Currently only supports batchsize 1
    inputs[0] = inputs[0].squeeze(0)
    inputs[1] = inputs[1].squeeze(0)

    # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1]
    scores = inputs[0][:, 1]

    # Box deltas [batch, num_rois, 4]
    deltas = inputs[1]
    std_dev = Variable(torch.from_numpy(np.reshape(config.RPN.BBOX_STD_DEV, [1, 4])).float(), requires_grad=False)
    if config.GPU_COUNT:
        std_dev = std_dev.cuda()
    deltas = deltas * std_dev

    # Improve performance by trimming to top anchors by score
    # and doing the rest on the smaller subset.
    pre_nms_limit = min(6000, anchors.size()[0])
    scores, order = scores.sort(descending=True)
    order = order[:pre_nms_limit]
    scores = scores[:pre_nms_limit]
    deltas = deltas[order.data, :] # TODO: Support batch size > 1 ff.
    anchors = anchors[order.data, :]

    # Apply deltas to anchors to get refined anchors.
    # [batch, N, (y1, x1, y2, x2)]
    boxes = apply_box_deltas(anchors, deltas)

    # Clip to image boundaries. [batch, N, (y1, x1, y2, x2)]
    height, width = config.TRAIN.IMAGE_SHAPE[:2]
    window = np.array([0, 0, height, width]).astype(np.float32)
    boxes = clip_boxes(boxes, window)

    # Filter out small boxes
    # According to Xinlei Chen's paper, this reduces detection accuracy
    # for small objects, so we're skipping it.

    # Non-max suppression
    keep = nms(torch.cat((boxes, scores.unsqueeze(1)), 1).data, nms_threshold)
    keep = keep[:proposal_count]
    boxes = boxes[keep, :]

    # Normalize dimensions to range of 0 to 1.
    norm = Variable(torch.from_numpy(np.array([height, width, height, width])).float(), requires_grad=False)
    if config.GPU_COUNT:
        norm = norm.cuda()
    normalized_boxes = boxes / norm

    # Add back batch dimension
    normalized_boxes = normalized_boxes.unsqueeze(0)

    return normalized_boxes
            im2show = np.copy(im)
        for j in range(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    im2show = vis_detections(im2show, imdb.classes[j],
                                             cls_dets.cpu().numpy(), 0.3)
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
Exemple #12
0
def _proposal_layer_py(rpn_bbox_cls_prob, rpn_bbox_pred, im_dims, cfg_key,
                       _feat_stride, anchor_scales):
    '''
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    '''
    _anchors = generate_anchors(
        scales=np.array(anchor_scales))  #anchor_scales(8,16,32)
    _num_anchors = _anchors.shape[0]
    rpn_bbox_cls_prob = np.transpose(rpn_bbox_cls_prob,
                                     [0, 3, 1, 2])  #(n,18,H,W)
    rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2])  #(n,36,H,W)

    # Only minibatch of 1 supported
    assert rpn_bbox_cls_prob.shape[0] == 1, \
        'Only single item batches are supported'

    if cfg_key == 'TRAIN':
        pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg.TRAIN.RPN_POST_NMS_TOP_N
        nms_thresh = cfg.TRAIN.RPN_NMS_THRESH
        min_size = cfg.TRAIN.RPN_MIN_SIZE
    else:  # cfg_key == 'TEST':
        pre_nms_topN = cfg.TEST.RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg.TEST.RPN_POST_NMS_TOP_N
        nms_thresh = cfg.TEST.RPN_NMS_THRESH
        min_size = cfg.TEST.RPN_MIN_SIZE

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    scores = rpn_bbox_cls_prob[:, _num_anchors:, :, :]  #(n,9,H,W)
    bbox_deltas = rpn_bbox_pred  #(n,36,H,W)

    # 1. Generate proposals from bbox deltas and shifted anchors
    height, width = scores.shape[-2:]

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))  #(1*h*w*a,4)

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas)  #(1*h*w*a,4)

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_dims)  #(1*h*w*a,4)

    # 3. remove predicted boxes with either height or width < threshold
    keep = _filter_boxes(proposals, min_size)
    proposals = proposals[keep, :]  #(-1,4)
    scores = scores[keep]  #(-1,4)

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                   copy=False)))  #(n,5)
    return blob
Exemple #13
0
        for j in xrange(1, len(pascal_classes)):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets,
                           cfg.TEST.NMS,
                           force_cpu=not cfg.USE_GPU_NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    im2show = vis_detections(im2show, pascal_classes[j],
                                             cls_dets.cpu().numpy(), 0.5)

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        if webcam_num == -1:
            sys.stdout.write(
                'im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r'.format(
                    num_images + 1, len(imglist), detect_time, nms_time))
            sys.stdout.flush()
Exemple #14
0
def proposal_layer(rpn_cls_prob_reshape,
                   rpn_bbox_pred,
                   im_info,
                   cfg_key=True,
                   _feat_stride=cfg.ZLRM.RESNET_50_FEAT_STRIDE,
                   anchor_scales=cfg.ZLRM.ANCHOR_SCALE):
    """
    Parameters
    ----------
    rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg
                         NOTICE: the old version is ordered by (1, H, W, 2, A) !!!!
    rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN
    im_info: a list of [image_height, image_width, scale_ratios]
    cfg_key: 'TRAIN' or 'TEST'
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    Returns
    ----------
    rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2]

    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    #layer_params = yaml.load(self.param_str_)

    """
    _anchors = generate_anchors(scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]
    # rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape,[0,3,1,2]) #-> (1 , 2xA, H , W)
    # rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,1,2])              # -> (1 , Ax4, H , W)

    #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1])
    #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1])
    im_info = im_info[0]

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
    #cfg_key = 'TEST'
    # print('========================', cfg.ZLRM.TRAIN.RPN_PRE_NMS_TOP_N)
    # print('===================', cfg_key)
    if cfg_key == True:
        # print('使用TEST')
        pre_nms_topN = cfg.ZLRM.TRAIN.RPN_PRE_NMS_TOP_N  # 12000
        post_nms_topN = cfg.ZLRM.TRAIN.RPN_POST_NMS_TOP_N  # 2000
        nms_thresh = cfg.ZLRM.TRAIN.RPN_NMS_THRESH  # 0.7
        min_size = cfg.ZLRM.TRAIN.RPN_MIN_SIZE  # 16
    else:
        pre_nms_topN = cfg.ZLRM.TEST.RPN_PRE_NMS_TOP_N  # 6000
        post_nms_topN = cfg.ZLRM.TEST.RPN_POST_NMS_TOP_N  # 300
        nms_thresh = cfg.ZLRM.TEST.RPN_NMS_THRESH  # 0.7
        min_size = cfg.ZLRM.TEST.RPN_MIN_SIZE  # 16

    height, width = rpn_cls_prob_reshape.shape[1:3]

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    # (1, H, W, A)
    scores = np.reshape(
        np.reshape(rpn_cls_prob_reshape,
                   [1, height, width, _num_anchors, 2])[:, :, :, :, 1],
        [1, height, width, _num_anchors])

    # TODO: NOTICE: the old version is ordered by (1, H, W, 2, A) !!!!
    # TODO: if you use the old trained model, VGGnet_fast_rcnn_iter_70000.ckpt, uncomment this line
    # scores = rpn_cls_prob_reshape[:,:,:,_num_anchors:]

    bbox_deltas = rpn_bbox_pred
    #im_info = bottom[2].data[0, :]

    if DEBUG:
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))

    # 1. Generate proposals from bbox deltas and shifted anchors
    if DEBUG:
        print('score map size: {}'.format(scores.shape))

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.reshape((-1, 4))  #(HxWxA, 4)

    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    scores = scores.reshape((-1, 1))

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas)

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info[:2])

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals, min_size * im_info[2])
    proposals = proposals[keep, :]
    scores = scores[keep]

    # # remove irregular boxes, too fat too tall
    # keep = _filter_irregular_boxes(proposals)
    # proposals = proposals[keep, :]
    # scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    # dets = np.hstack((blob, scores)).astype(np.float32)
    # print(dets.shape)
    # print('jjjjj=============', dets[:, -1])
    return blob, scores
Exemple #15
0
def proposal_layer(rpn_cls_prob_reshape,
                   rpn_bbox_pred,
                   im_info,
                   _feat_stride=[
                       cfg["ANCHOR_WIDTH"],
                   ]):
    """
    Parameters
    ----------
    rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg
    rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN
    im_info: a list of [image_height, image_width, scale_ratios]
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    Returns
    ----------
    rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2]

    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    #layer_params = yaml.load(self.param_str_)

    """
    _anchors = generate_anchors()  # 生成基本的10个anchor
    _num_anchors = _anchors.shape[0]  # 10个anchor

    im_info = im_info[0]  # 原始图像的高宽、缩放尺度

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    pre_nms_topN = cfg["TEST"][
        "RPN_PRE_NMS_TOP_N"]  # 12000,在做nms之前,最多保留的候选box数目
    post_nms_topN = cfg["TEST"][
        "RPN_POST_NMS_TOP_N"]  # 2000,做完nms之后,最多保留的box的数目
    nms_thresh = cfg["TEST"]["RPN_NMS_THRESH"]  # nms用参数,阈值是0.7
    min_size = cfg["TEST"]["RPN_MIN_SIZE"]  # 候选box的最小尺寸,目前是16,高宽均要大于16

    height, width = rpn_cls_prob_reshape.shape[1:3]  # feature-map的高宽
    width = width // 10

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    # (1, H, W, A)
    # 获取第一个分类结果
    scores = np.reshape(
        np.reshape(rpn_cls_prob_reshape,
                   [1, height, width, _num_anchors, 2])[:, :, :, :, 1],
        [1, height, width, _num_anchors])
    # 提取到object的分数,non-object的我们不关心
    # 并reshape到1*H*W*10

    bbox_deltas = rpn_bbox_pred  # 模型输出的pred是相对值,需要进一步处理成真实图像中的坐标

    # Enumerate all shifts
    # 同anchor-target-layer-tf这个文件一样,生成anchor的shift,进一步得到整张图像上的所有anchor
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride

    # shift_x shape = [height, width]
    # 生成同样维度的两个矩阵
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    # print("shift_x", shift_x.shape)
    # print("shift_y", shift_y.shape)
    # shifts shape = [height*width,4]
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors  # 10
    K = shifts.shape[0]  # height*width,[height*width,4]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))  # 这里得到的anchor就是整张图像上的所有anchor

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.reshape((-1, 4))  # (HxWxA, 4)

    # Same story for the scores:
    scores = scores.reshape((-1, 1))

    # TODO: 回归2个值需要进行修改
    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas)  # 做逆变换,得到box在图像上的真实坐标
    # TODO: 回归2个值需要进行修改
    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals,
                           im_info[:2])  # 将所有的proposal修建一下,超出图像范围的将会被修剪掉

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals, min_size)  # 移除那些proposal小于一定尺寸的proposal
    proposals = proposals[keep, :]  # 保留剩下的proposal
    scores = scores[keep]
    bbox_deltas = bbox_deltas[keep, :]
    print('proposals1', proposals.shape)
    score_filter = np.where(scores > 0.0)[0]
    proposals = proposals[score_filter, :]
    scores = scores[score_filter]
    bbox_deltas = bbox_deltas[score_filter, :]
    print('proposals2', proposals.shape)
    # remove irregular boxes, too fat too tall
    # keep = _filter_irregular_boxes(proposals)
    # proposals = proposals[keep, :]
    # scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]  # score按得分的高低进行排序
    if pre_nms_topN > 0:  # 保留12000个proposal进去做nms
        order = order[:pre_nms_topN]
    # print('proposals3', proposals.shape)
    proposals = proposals[order, :]
    scores = scores[order]
    bbox_deltas = bbox_deltas[order, :]
    print('proposals3', proposals.shape)
    s = time.time()

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)

    keep = nms(np.hstack((proposals, scores)),
               nms_thresh)  # 进行nms操作,保留2000个proposal
    print(time.time() - s)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    bbox_deltas = bbox_deltas[keep, :]

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    blob = np.hstack(
        (scores.astype(np.float32,
                       copy=False), proposals.astype(np.float32, copy=False)))

    return blob, bbox_deltas