Exemple #1
0
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride,
                   anchors, num_anchors, is_tfchannel=False):
    """A simplified version compared to fast/er RCNN
       For details please see the technical report
    """
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

    im_info = im_info[0]
    # from IPython import embed; embed()
    # Get the scores and bounding boxes
    if is_tfchannel:
        scores = rpn_cls_prob.reshape(-1, 2)
        scores = scores[:, 1]
    else:
        scores = rpn_cls_prob[:, :, :, num_anchors:]
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    #if cfg_key == 'TRAIN' and 'RPN_NORMALIZE_TARGETS' in cfg.TRAIN.keys() \
    if 'RPN_NORMALIZE_TARGETS' in cfg.TRAIN.keys() \
         and cfg.TRAIN.RPN_NORMALIZE_TARGETS:
            rpn_bbox_pred *= cfg.TRAIN.RPN_NORMALIZE_STDS
            rpn_bbox_pred += cfg.TRAIN.RPN_NORMALIZE_MEANS

    scores = scores.reshape((-1, 1))
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    proposals = clip_boxes(proposals, im_info[:2])


    #filter boxes
    if 'RPN_MIN_SIZE' in cfg[cfg_key].keys():
        min_size = cfg[cfg_key].RPN_MIN_SIZE
        if min_size > 0:
            keep = _filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep, :]
            scores = scores[keep]

    # Pick the top region proposals
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # Non-maximal suppression
    keep = nms(np.hstack((proposals, scores)), nms_thresh)

    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    # Only support single image as input
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    return blob, scores.flatten()
Exemple #2
0
def inference(val_func, inputs, data_dict):
    image = data_dict['data']
    ori_shape = image.shape

    if config.eval_resize == False:
        resized_img, scale = image, 1
    else:
        resized_img, scale = dataset.resize_img_by_short_and_max_size(
            image, config.eval_image_short_size, config.eval_image_max_size)
    height, width = resized_img.shape[0:2]

    resized_img = resized_img.astype(np.float32) - config.image_mean
    resized_img = np.ascontiguousarray(resized_img[:, :, [2, 1, 0]])

    im_info = np.array([[height, width, scale, ori_shape[0], ori_shape[1], 0]],
                       dtype=np.float32)

    feed_dict = {inputs[0]: resized_img[None, :, :, :], inputs[1]: im_info}
    print('fd:\n', feed_dict)
    #st = time.time()
    _, scores, pred_boxes, rois = val_func(feed_dict=feed_dict)
    #ed = time.time()
    #print(ed -st)

    boxes = rois[:, 1:5] / scale

    if cfg.TEST.BBOX_REG:
        pred_boxes = bbox_transform_inv(boxes, pred_boxes)
        pred_boxes = clip_boxes(pred_boxes, ori_shape)

    pred_boxes = pred_boxes.reshape(-1, config.num_classes, 4)
    result_boxes = []
    for j in range(1, config.num_classes):
        inds = np.where(scores[:, j] > config.test_cls_threshold)[0]
        cls_scores = scores[inds, j]
        cls_bboxes = pred_boxes[inds, j, :]
        cls_dets = np.hstack(
            (cls_bboxes, cls_scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)

        keep = nms(cls_dets, config.test_nms)
        cls_dets = np.array(cls_dets[keep, :], dtype=np.float, copy=False)
        for i in range(cls_dets.shape[0]):
            db = cls_dets[i, :]
            dbox = DetBox(db[0],
                          db[1],
                          db[2] - db[0],
                          db[3] - db[1],
                          tag=config.class_names[j],
                          score=db[-1])
            result_boxes.append(dbox)
    if len(result_boxes) > config.test_max_boxes_per_image:
        result_boxes = sorted(
            result_boxes, reverse=True, key=lambda t_res: t_res.score) \
            [:config.test_max_boxes_per_image]

    result_dict = data_dict.copy()
    result_dict['result_boxes'] = result_boxes
    return result_dict
Exemple #3
0
def proposal_without_nms_layer(rpn_cls_prob,
                               rpn_bbox_pred,
                               im_info,
                               cfg_key,
                               feat_stride,
                               anchors,
                               num_anchors,
                               is_tfchannel=False):
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')

    if cfg_key == 'TRAIN':
        pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N
    else:
        pre_nms_topN = cfg.TEST.RPN_PRE_NMS_TOP_N
    im_info = im_info[0]
    # Get the scores and bounding boxes
    if is_tfchannel:
        scores = rpn_cls_prob.reshape(-1, 2)
        scores = scores[:, 1]
    else:
        scores = rpn_cls_prob[:, :, :, num_anchors:]
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))

    if 'RPN_NORMALIZE_TARGETS' in cfg.TRAIN.keys() \
            and cfg.TRAIN.RPN_NORMALIZE_TARGETS:
        rpn_bbox_pred *= cfg.TRAIN.RPN_NORMALIZE_STDS
        rpn_bbox_pred += cfg.TRAIN.RPN_NORMALIZE_MEANS

    scores = scores.reshape((-1, 1))
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    proposals = clip_boxes(proposals, im_info[:2])

    # filter boxes
    min_size = 0
    if cfg_key == 'TRAIN':
        if 'RPN_MIN_SIZE' in cfg.TRAIN.keys():
            min_size = cfg.TRAIN.RPN_MIN_SIZE
    elif cfg_key == 'TEST':
        if 'RPN_MIN_SIZE' in cfg.TEST.keys():
            min_size = cfg.TEST.RPN_MIN_SIZE
    if min_size > 0:
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

    # Pick the top region proposals
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order].flatten()

    ##why add one, because tf nms assume x2,y2 does not include border
    proposals_addone = np.array(proposals)
    proposals_addone[:, 2] += 1
    proposals_addone[:, 3] += 1
    return proposals, scores, proposals_addone
Exemple #4
0
def inference(val_func, inputs, data_dict):
    image = data_dict['data']
    ori_shape = image.shape

    if config.eval_resize == False:
        resized_img, scale = image, 1
    else:
        resized_img, scale = dataset.resize_img_by_short_and_max_size(
            image, config.eval_image_short_size, config.eval_image_max_size)
    height, width = resized_img.shape[0:2]

    resized_img = resized_img.astype(np.float32) - config.image_mean
    resized_img = np.ascontiguousarray(resized_img[:, :, [2, 1, 0]])

    im_info = np.array(
        [[height, width, scale, ori_shape[0], ori_shape[1], 0]],
        dtype=np.float32)

    feed_dict = {inputs[0]: resized_img[None, :, :, :], inputs[1]: im_info}

    #st = time.time()
    _, scores, pred_boxes, rois = val_func(feed_dict=feed_dict)
    #ed = time.time()
    #print(ed -st)

    boxes = rois[:, 1:5] / scale

    if cfg.TEST.BBOX_REG:
        pred_boxes = bbox_transform_inv(boxes, pred_boxes)
        pred_boxes = clip_boxes(pred_boxes, ori_shape)

    pred_boxes = pred_boxes.reshape(-1, config.num_classes, 4)
    result_boxes = []
    for j in range(1, config.num_classes):
        inds = np.where(scores[:, j] > config.test_cls_threshold)[0]
        cls_scores = scores[inds, j]
        cls_bboxes = pred_boxes[inds, j, :]
        cls_dets = np.hstack((cls_bboxes, cls_scores[:, np.newaxis])).astype(
            np.float32, copy=False)

        keep = nms(cls_dets, config.test_nms)
        cls_dets = np.array(cls_dets[keep, :], dtype=np.float, copy=False)
        for i in range(cls_dets.shape[0]):
            db = cls_dets[i, :]
            dbox = DetBox(
                db[0], db[1], db[2] - db[0], db[3] - db[1],
                tag=config.class_names[j], score=db[-1])
            result_boxes.append(dbox)
    if len(result_boxes) > config.test_max_boxes_per_image:
        result_boxes = sorted(
            result_boxes, reverse=True, key=lambda t_res: t_res.score) \
            [:config.test_max_boxes_per_image]

    result_dict = data_dict.copy()
    result_dict['result_boxes'] = result_boxes
    return result_dict
def proposal_without_nms_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key,
                               feat_stride, anchors, num_anchors,
                               is_tfchannel=False):
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')

    if cfg_key == 'TRAIN':
        pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N
    else:
        pre_nms_topN = cfg.TEST.RPN_PRE_NMS_TOP_N
    im_info = im_info[0]
    # Get the scores and bounding boxes
    if is_tfchannel:
        scores = rpn_cls_prob.reshape(-1, 2)
        scores = scores[:, 1]
    else:
        scores = rpn_cls_prob[:, :, :, num_anchors:]
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))

    if 'RPN_NORMALIZE_TARGETS' in cfg.TRAIN.keys() \
            and cfg.TRAIN.RPN_NORMALIZE_TARGETS:
        rpn_bbox_pred *= cfg.TRAIN.RPN_NORMALIZE_STDS
        rpn_bbox_pred += cfg.TRAIN.RPN_NORMALIZE_MEANS

    scores = scores.reshape((-1, 1))
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    proposals = clip_boxes(proposals, im_info[:2])

    # filter boxes
    min_size = 0
    if cfg_key == 'TRAIN':
        if 'RPN_MIN_SIZE' in cfg.TRAIN.keys():
            min_size = cfg.TRAIN.RPN_MIN_SIZE
    elif cfg_key == 'TEST':
        if 'RPN_MIN_SIZE' in cfg.TEST.keys():
            min_size = cfg.TEST.RPN_MIN_SIZE
    if min_size > 0:
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

    # Pick the top region proposals
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order].flatten()

    ##why add one, because tf nms assume x2,y2 does not include border
    proposals_addone = np.array(proposals)
    proposals_addone[:, 2] += 1
    proposals_addone[:, 3] += 1
    return proposals, scores, proposals_addone
Exemple #6
0
def coco_results_one_category_kernel(data_pack):
    cat_id = data_pack['cat_id']
    ann_type = data_pack['ann_type']
    binary_thresh = data_pack['binary_thresh']
    all_im_info = data_pack['all_im_info']
    boxes = data_pack['boxes']
    if ann_type == 'bbox':
        masks = []
    elif ann_type == 'segm':
        masks = data_pack['masks']
    else:
        print('unimplemented ann_type: ' + ann_type)
    cat_results = []
    for im_ind, im_info in enumerate(all_im_info):
        index = im_info['index']

        dets = boxes[im_ind].astype(np.float)
        from IPython import embed
        embed()
        # if len(masks[im_ind]) != dets.shape[0]:
        #     masks[im_ind] = masks[im_ind][:dets.shape[0]]
        print(dets.shape, len(masks[im_ind]))
        if len(dets) == 0:
            continue
        scores = dets[:, -1]
        if ann_type == 'bbox':
            xs = dets[:, 0]
            ys = dets[:, 1]
            ws = dets[:, 2] - xs + 1
            hs = dets[:, 3] - ys + 1
            result = [{
                'image_id': index,
                'category_id': cat_id,
                'bbox': [xs[k], ys[k], ws[k], hs[k]],
                'score': scores[k]
            } for k in range(dets.shape[0])]
        elif ann_type == 'segm':
            width = im_info['width']
            height = im_info['height']
            dets[:, :4] = clip_boxes(dets[:, :4], [height, width])
            mask_encode = mask_voc2coco(masks[im_ind], dets[:, :4], height,
                                        width, binary_thresh)
            result = [{
                'image_id': index,
                'category_id': cat_id,
                'segmentation': mask_encode[k],
                'score': scores[k]
            } for k in range(len(mask_encode))]
        cat_results.extend(result)
    return cat_results
Exemple #7
0
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info,
                       _feat_stride, anchors, num_anchors):
    """A layer that just selects the top region proposals
       without using non-maximal suppression,
       For details please see the technical report
    """
    rpn_top_n = cfg.TEST.RPN_TOP_N
    im_info = im_info[0]
    scores = rpn_cls_prob[:, :, :, num_anchors:]

    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    scores = scores.reshape((-1, 1))

    length = scores.shape[0]
    if length < rpn_top_n:
        # Random selection, maybe unnecessary and loses good proposals
        # But such case rarely happens
        top_inds = npr.choice(length, size=rpn_top_n, replace=True)
    else:
        top_inds = scores.argsort(0)[::-1]
        top_inds = top_inds[:rpn_top_n]
        top_inds = top_inds.reshape(rpn_top_n, )

    # Do the selection here
    anchors = anchors[top_inds, :]
    rpn_bbox_pred = rpn_bbox_pred[top_inds, :]
    scores = scores[top_inds]

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)

    # Clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info[:2])

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    #from IPython import embed; embed()
    return blob, scores
Exemple #8
0
def proposal_without_nms_layer(rpn_cls_prob_fg, rpn_bbox_pred, im_info,
                               anchors):

    pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N
    im_info = im_info[0]

    scores = rpn_cls_prob_fg
    scores = scores.reshape((-1, 1))
    rpn_bbox_pred[:, 2:4] = np.minimum(20, rpn_bbox_pred[:, 2:4])
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    proposals = clip_boxes(proposals, im_info[:2])

    # Pick the top region proposals
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order].flatten()

    ##why add one, because tf nms assume x2,y2 does not include border
    proposals_addone = np.array(proposals)
    proposals_addone[:, 2] += 1
    proposals_addone[:, 3] += 1
    return proposals, scores, proposals_addone, order
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride,
                   anchors, num_anchors, is_tfchannel=False):
    """A simplified version compared to fast/er RCNN
       For details please see the technical report
    """
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    if cfg_key == 'TRAIN':
        pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg.TRAIN.RPN_POST_NMS_TOP_N
        nms_thresh = cfg.TRAIN.RPN_NMS_THRESH
    else:
        pre_nms_topN = cfg.TEST.RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg.TEST.RPN_POST_NMS_TOP_N
        nms_thresh = cfg.TEST.RPN_NMS_THRESH

    im_info = im_info[0]
    # from IPython import embed; embed()
    # Get the scores and bounding boxes
    if is_tfchannel:
        scores = rpn_cls_prob.reshape(-1, 2)
        scores = scores[:, 1]
    else:
        scores = rpn_cls_prob[:, :, :, num_anchors:]
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    # if cfg_key == 'TRAIN' and 'RPN_NORMALIZE_TARGETS' in cfg.TRAIN.keys() \
    if 'RPN_NORMALIZE_TARGETS' in cfg.TRAIN.keys() \
            and cfg.TRAIN.RPN_NORMALIZE_TARGETS:
        rpn_bbox_pred *= cfg.TRAIN.RPN_NORMALIZE_STDS
        rpn_bbox_pred += cfg.TRAIN.RPN_NORMALIZE_MEANS

    scores = scores.reshape((-1, 1))
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    proposals = clip_boxes(proposals, im_info[:2])

    # filter boxes
    min_size = 0
    if cfg_key == 'TRAIN':
        if 'RPN_MIN_SIZE' in cfg.TRAIN.keys():
            min_size = cfg.TRAIN.RPN_MIN_SIZE
    elif cfg_key == 'TEST':
        if 'RPN_MIN_SIZE' in cfg.TEST.keys():
            min_size = cfg.TEST.RPN_MIN_SIZE

    if min_size > 0:
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

    # Pick the top region proposals
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # Non-maximal suppression
    keep = nms(np.hstack((proposals, scores)), nms_thresh)

    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    # Only support single image as input
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    return blob, scores.flatten()