예제 #1
0
def draw_densecap(image, scores, rois, im_info, cap_probs, bbox_pred):
    """
    bbox_pred: [None, 4]
    rois: [None, 5]

    """
    # for bbox unnormalization

    bbox_mean = np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS).reshape((1, 4))
    bbox_stds = np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS).reshape((1, 4))

    boxes = rois[:, 1:5] / im_info[2]
    # [None, 12]
    cap_ids = np.argmax(cap_probs, axis=1).reshape((-1, cfg.TIME_STEPS))

    # bbox target unnormalization
    box_deltas = bbox_pred * bbox_stds + bbox_mean

    # do the transformation
    pred_boxes = bbox_transform_inv(boxes, box_deltas)
    pred_boxes = clip_boxes(pred_boxes, image.shape)

    pos_dets = np.hstack(
        (pred_boxes, scores[:, 1][:, np.newaxis])).astype(np.float32,
                                                          copy=False)
    keep = nms(pos_dets, cfg.TEST.NMS)
    pos_boxes = boxes[keep, :]
    cap_ids = cap_ids[keep, :]
    im_info[2] = 1.
    img_cap = draw_bounding_boxes(image, pos_boxes, im_info, cap_ids)

    return img_cap
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
    """A simplified version compared to fast/er RCNN
     For details please see the technical report
  """
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    scores = scores.reshape((-1, 1))
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    if cfg.DEBUG_ALL:
        print ('number of proposals before clip boxes to image board: {}'.format(
            proposals.shape[0]
        ))
    proposals = clip_boxes(proposals, im_info[:2])

    # remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    if cfg.FILTER_SMALL_BOX:
        min_size = cfg[cfg_key].RPN_MIN_SIZE
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

    # Pick the top region proposals
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # Non-maximal suppression
    if cfg.DEBUG_ALL:
        print("number of proposals before nms: {}".format(proposals.shape[0]))
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if cfg.DEBUG_ALL:
        print("number of proposals after nms: {}".format(len(keep)))

    # Pick th top region proposals after NMS
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    # Only support single image as input
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    return blob, scores
예제 #3
0
def interpret_objects(cls_prob,
                      bbox_pred,
                      rois,
                      im_info,
                      nms_thres=-1.,
                      min_score=0.00001,
                      use_gt_boxes=False,
                      max_per_image=2000):
    box_deltas = bbox_pred.data.cpu().numpy()
    cls_prob = cls_prob.data.cpu().numpy()
    all_boxes = [[] for _ in xrange(cls_prob.shape[1])]

    for j in xrange(1, cls_prob.shape[1]):  # skip the background
        inds = np.where(cls_prob[:, j] > min_score)[0]
        if len(inds) == 0:
            continue
        cls_scores = cls_prob[inds, j]
        if use_gt_boxes:
            cls_boxes = rois.data.cpu().numpy()[inds, 1:5] / im_info[0][2]
        else:
            t_box_deltas = np.asarray(
                [box_deltas[i, (j * 4):(j * 4 + 4)] for i in inds],
                dtype=np.float)
            cls_boxes = bbox_transform_inv_hdn(
                rois.data.cpu().numpy()[inds, 1:5],
                t_box_deltas) / im_info[0][2]
            cls_boxes = clip_boxes(cls_boxes, im_info[0][:2] / im_info[0][2])

        cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
            .astype(np.float32, copy=False)
        if nms_thres > 0.:
            keep = nms(cls_dets, nms_thres)
            cls_dets = cls_dets[keep, :]

        all_boxes[j] = cls_dets

    if max_per_image > 0:
        image_scores = np.hstack([
            all_boxes[j][:, -1] for j in xrange(1, cls_prob.shape[1])
            if len(all_boxes[j]) > 0
        ])
        #print('{} detections.'.format(len(image_scores)))
        if len(image_scores) > max_per_image:
            image_thresh = np.sort(image_scores)[-max_per_image]
            for j in xrange(1, cls_prob.shape[1]):
                if len(all_boxes[j]) == 0:
                    continue
                keep = np.where(all_boxes[j][:, -1] >= image_thresh)[0]
                all_boxes[j] = all_boxes[j][keep, :]

    return all_boxes
def compute_rois_offset(rois, offset, im_info=None):
    """Compute bounding-box offset for region of interests"""

    assert rois.shape[1] == 4
    assert offset.shape[1] == 4

    if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
        # Optionally normalize targets by a precomputed mean and stdev -- reverse the transformation
        offset_unnorm = offset * np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS) + \
                        np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
    else:
        offset_unnorm = offset.copy()
    rois_offset = bbox_transform_inv(rois, offset_unnorm)
    if not im_info is None:
        rois_offset = clip_boxes(rois_offset, im_info[:2])
    return rois_offset
예제 #5
0
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride,
                       anchors, num_anchors):
    """A layer that just selects the top region proposals
     without using non-maximal suppression,
     For details please see the technical report
  """
    rpn_top_n = cfg.TEST.RPN_TOP_N

    scores = rpn_cls_prob[:, :, :, num_anchors:]

    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    scores = scores.reshape((-1, 1))

    length = scores.shape[0]
    if length < rpn_top_n:
        # Random selection, maybe unnecessary and loses good proposals
        # But such case rarely happens
        top_inds = npr.choice(length, size=rpn_top_n, replace=True)
    else:
        top_inds = scores.argsort(0)[::-1]
        top_inds = top_inds[:rpn_top_n]
        top_inds = top_inds.reshape(rpn_top_n, )

    # Do the selection here
    anchors = anchors[top_inds, :]
    rpn_bbox_pred = rpn_bbox_pred[top_inds, :]
    scores = scores[top_inds]

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)

    # Clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info[:2])

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    return blob, scores
예제 #6
0
def im_detect(sess, net, im, boxes=None, use_box_at=-1):
    """Detect object classes in an image given object proposals.

    Arguments:
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)
        use_box_at (int32): Use predicted box at a given timestep, default to the last one (use_box_at=-1)
    Returns:
        scores (ndarray): R x 1 array of object class scores
        pred_boxes (ndarray)): R x 4 array of predicted bounding boxes
        captions (list): length R list of list of word tokens (captions)
    """

    # for bbox unnormalization
    bbox_mean = np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS).reshape((1, 4))
    bbox_stds = np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS).reshape((1, 4))

    blobs, im_scales = _get_blobs(im, boxes)
    assert len(im_scales) == 1, "Only single-image batch implemented"
    im_blob = blobs['data']
    blobs['im_info'] = np.array(
        [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)

    if cfg.TEST.USE_BEAM_SEARCH:
        scores, box_offsets, captions, boxes = beam_search(
            sess, net, blobs, im_scales)
    else:
        scores, box_offsets, captions, boxes = greedy_search(
            sess, net, blobs, im_scales)

    # bbox target unnormalization
    box_deltas = box_offsets * bbox_stds + bbox_mean

    # do the transformation
    pred_boxes = bbox_transform_inv(boxes, box_deltas)
    pred_boxes = clip_boxes(pred_boxes, im.shape)

    return scores[:, 1], pred_boxes, captions
예제 #7
0
def proposal_layer(rpn_cls_prob_reshape,
                   rpn_bbox_pred,
                   im_info,
                   cfg_key,
                   _feat_stride=[
                       16,
                   ],
                   anchor_scales=[
                       16,
                   ]):
    """
    Parameters
    ----------
    rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg
                         NOTICE: the old version is ordered by (1, H, W, 2, A) !!!!
    rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN
    im_info: a list of [image_height, image_width, scale_ratios]
    cfg_key: 'TRAIN' or 'TEST'
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    Returns
    ----------
    rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2]

    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    #layer_params = yaml.load(self.param_str_)

    """
    cfg_key = cfg_key.decode('ascii')
    # TODO 后期可能进行修改anchor的尺度,因为文本较为密集,需要进行完善修改
    # _anchors value
    # [[0    2   15   13]
    #  [0    0   15   15]
    #  [0   -4   15   19]
    #  [0   -9   15   24]
    #  [0  -16   15   31]
    #  [0  -26   15   41]
    #  [0  -41   15   56]
    #  [0  -62   15   77]
    #  [0  -91   15  106]
    #  [0 -134   15  149]]
    _anchors = generate_anchors(
        scales=np.array(anchor_scales))  #生成基本的10个anchor
    _num_anchors = _anchors.shape[0]  #10个anchor

    im_info = im_info[0]  #原始图像的高宽、缩放尺度

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N  #12000,在做nms之前,最多保留的候选box数目
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N  #2000,做完nms之后,最多保留的box的数目
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH  #nms用参数,阈值是0.7
    min_size = cfg[cfg_key].RPN_MIN_SIZE  #候选box的最小尺寸,目前是16,高宽均要大于16

    height, width = rpn_cls_prob_reshape.shape[1:3]  #feature-map的高宽

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    # (1, H, W, A)
    # 获取第一个分类结果
    scores = np.reshape(
        np.reshape(rpn_cls_prob_reshape,
                   [1, height, width, _num_anchors, 2])[:, :, :, :, 1],
        [1, height, width, _num_anchors])
    #提取到object的分数,non-object的我们不关心
    #并reshape到1*H*W*10

    bbox_deltas = rpn_bbox_pred  #模型输出的pred是相对值,需要进一步处理成真实图像中的坐标
    #im_info = bottom[2].data[0, :]

    if DEBUG:
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))

    # 1. Generate proposals from bbox deltas and shifted anchors
    if DEBUG:
        print('score map size: {}'.format(scores.shape))

    # Enumerate all shifts
    # 同anchor-target-layer-tf这个文件一样,生成anchor的shift,进一步得到整张图像上的所有anchor
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    #print('w,h,x',width,height,width*height)

    # shift_x shape = [height, width]
    # 生成同样维度的两个矩阵
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    # print("shift_x", shift_x.shape)
    # print("shift_y", shift_y.shape)
    # shifts shape = [height*width,4]
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    #print("shift shape", shifts.shape)

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors  # 10
    K = shifts.shape[0]  # height*width,[height*width,4]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    # print('_anchors.reshape((1, A, 4))',np.shape(_anchors.reshape((1, A, 4))))
    # print('shifts.reshape((1, K, 4)).transpose((1, 0, 2))',np.shape(shifts.reshape((1, K, 4)).transpose((1, 0, 2))))
    anchors = anchors.reshape((K * A, 4))  #这里得到的anchor就是整张图像上的所有anchor
    # print(anchors)

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.reshape((-1, 4))  #(HxWxA, 4)

    # Same story for the scores:
    scores = scores.reshape((-1, 1))

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas)  #做逆变换,得到box在图像上的真实坐标

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals,
                           im_info[:2])  #将所有的proposal修建一下,超出图像范围的将会被修剪掉

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals,
                         min_size * im_info[2])  #移除那些proposal小于一定尺寸的proposal
    proposals = proposals[keep, :]  #保留剩下的proposal
    scores = scores[keep]
    bbox_deltas = bbox_deltas[keep, :]

    # # remove irregular boxes, too fat too tall
    # keep = _filter_irregular_boxes(proposals)
    # proposals = proposals[keep, :]
    # scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]  #score按得分的高低进行排序
    if pre_nms_topN > 0:  #保留12000个proposal进去做nms
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]
    bbox_deltas = bbox_deltas[order, :]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)),
               nms_thresh)  #进行nms操作,保留2000个proposal
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    bbox_deltas = bbox_deltas[keep, :]

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    blob = np.hstack(
        (scores.astype(np.float32,
                       copy=False), proposals.astype(np.float32, copy=False)))

    return blob, bbox_deltas
예제 #8
0
def interpret_relationships(cls_prob,
                            bbox_pred,
                            rois,
                            cls_prob_predicate,
                            mat_phrase,
                            im_info,
                            nms=-1.,
                            clip=True,
                            min_score=0.01,
                            top_N=100,
                            use_gt_boxes=False,
                            triplet_nms=-1.,
                            topk=10,
                            reranked_score=None):

    scores, inds = cls_prob[:, 1:].data.max(1)
    if reranked_score is not None:
        if isinstance(reranked_score, Variable):
            reranked_score = reranked_score.data
        scores *= reranked_score.squeeze()
    inds += 1
    scores, inds = scores.cpu().numpy(), inds.cpu().numpy()

    predicate_scores, predicate_inds = cls_prob_predicate[:,
                                                          1:].data.topk(dim=1,
                                                                        k=topk)
    predicate_inds += 1
    predicate_scores, predicate_inds = predicate_scores.cpu().numpy().reshape(
        -1), predicate_inds.cpu().numpy().reshape(-1)

    # Apply bounding-box regression deltas
    box_deltas = bbox_pred.data.cpu().numpy()
    box_deltas = np.asarray([
        box_deltas[i, (inds[i] * 4):(inds[i] * 4 + 4)]
        for i in range(len(inds))
    ],
                            dtype=np.float)
    keep = range(scores.shape[0])
    if use_gt_boxes:
        triplet_nms = -1.
        pred_boxes = rois.data.cpu().numpy()[:, 1:5] / im_info[0][2]
    else:
        pred_boxes = bbox_transform_inv_hdn(rois.data.cpu().numpy()[:, 1:5],
                                            box_deltas) / im_info[0][2]
        pred_boxes = clip_boxes(pred_boxes, im_info[0][:2] / im_info[0][2])

        # nms
        if nms > 0. and pred_boxes.shape[0] > 0:
            assert nms < 1., 'Wrong nms parameters'
            pred_boxes, scores, inds, keep = nms_detections(pred_boxes,
                                                            scores,
                                                            nms,
                                                            inds=inds)

    sub_list = np.array([], dtype=int)
    obj_list = np.array([], dtype=int)
    pred_list = np.array([], dtype=int)

    # mapping the object id
    mapping = np.ones(cls_prob.size(0), dtype=np.int64) * -1
    mapping[keep] = range(len(keep))

    sub_list = mapping[mat_phrase[:, 0]]
    obj_list = mapping[mat_phrase[:, 1]]
    pred_remain = np.logical_and(sub_list >= 0, obj_list >= 0)
    pred_list = np.where(pred_remain)[0]
    sub_list = sub_list[pred_remain]
    obj_list = obj_list[pred_remain]

    # expand the sub/obj and pred list to k-column
    pred_list = np.vstack([pred_list * topk + i
                           for i in range(topk)]).transpose().reshape(-1)
    sub_list = np.vstack([sub_list
                          for i in range(topk)]).transpose().reshape(-1)
    obj_list = np.vstack([obj_list
                          for i in range(topk)]).transpose().reshape(-1)

    if use_gt_boxes:
        total_scores = predicate_scores[pred_list]
    else:
        total_scores = predicate_scores[pred_list] * \
            scores[sub_list] * scores[obj_list]

    top_N_list = total_scores.argsort()[::-1][:10000]
    total_scores = total_scores[top_N_list]
    pred_ids = predicate_inds[pred_list[top_N_list]]  # category of predicates
    sub_assignment = sub_list[top_N_list]  # subjects assignments
    obj_assignment = obj_list[top_N_list]  # objects assignments
    sub_ids = inds[sub_assignment]  # category of subjects
    obj_ids = inds[obj_assignment]  # category of objects
    sub_boxes = pred_boxes[sub_assignment]  # boxes of subjects
    obj_boxes = pred_boxes[obj_assignment]  # boxes of objects

    if triplet_nms > 0.:
        sub_ids, obj_ids, pred_ids, sub_boxes, obj_boxes, keep = triplet_nms_py(
            sub_ids, obj_ids, pred_ids, sub_boxes, obj_boxes, triplet_nms)
        sub_assignment = sub_assignment[keep]
        obj_assignment = obj_assignment[keep]
        total_scores = total_scores[keep]
    if len(sub_list) == 0:
        print('No Relatinoship remains')
        # pdb.set_trace()

    return pred_boxes, scores, inds, sub_ids, obj_ids, sub_boxes, obj_boxes, pred_ids, sub_assignment, obj_assignment, total_scores
예제 #9
0
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_infos, 
                    _feat_stride, opts, anchor_scales, anchor_ratios,
                    mappings):
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    # layer_params = yaml.load(self.param_str_)
    batch_size = rpn_cls_prob_reshape.shape[0]
    _anchors = generate_anchors.generate_anchors(scales=anchor_scales, ratios=anchor_ratios)
    _num_anchors = _anchors.shape[0]
    pre_nms_topN = opts['num_box_pre_NMS']
    post_nms_topN = opts['num_box_post_NMS']
    nms_thres = opts['nms_thres']
    min_size = opts['min_size']

    blob = []
    
    for i in range(batch_size):
        im_info = im_infos[i]
        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        height = mappings[int(im_info[0])]
        width = mappings[int(im_info[1])]
        scores = rpn_cls_prob_reshape[i, _num_anchors:, :height, :width]
        bbox_deltas = rpn_bbox_pred[i, :, :height, :width]

        if DEBUG:
            print( 'im_size: ({}, {})'.format(im_info[0], im_info[1]))
            print( 'scale: {}'.format(im_info[2]))
        if DEBUG:
            print( 'score map size: {}'.format(scores.shape))

        # Enumerate all shifts
        shift_x = np.arange(0, width) * _feat_stride
        shift_y = np.arange(0, height) * _feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = _num_anchors
        K = shifts.shape[0]
        anchors = _anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((1, 2, 0)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        if opts['dropout_box_runoff_image']:
            _allowed_border = 16
            inds_inside = np.where(
                (proposals[:, 0] >= -_allowed_border) &
                (proposals[:, 1] >= -_allowed_border) &
                (proposals[:, 2] < im_info[1] + _allowed_border) &  # width
                (proposals[:, 3] < im_info[0] + _allowed_border)  # height
            )[0]
            proposals = proposals[inds_inside, :]
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        # print 'proposals', proposals
        # print 'scores', scores
        keep = nms(np.hstack((proposals, scores)).astype(np.float32), nms_thres)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]
        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.ones((proposals.shape[0], 1), dtype=np.float32) * i
        blob.append(np.hstack((batch_inds, proposals.astype(np.float32, copy=False), scores.astype(np.float32, copy=False))))

    return np.concatenate(blob, axis=0)
예제 #10
0
def proposal_layer(rpn_cls_prob_reshape,rpn_bbox_pred,im_info,cfg_key,_feat_stride = [16,],anchor_scales = [8, 16, 32]):
    '''
    input[0],input[1],input[2], cfg_key, _feat_stride, anchor_scales
    :param rpn_cls_prob_reshape: 提取得到的bbox的是否保存的score, shape is N, W, H, 18, 其实就是区分是前景还是是背景 fg:前景,bg:背景
    :param rpn_bbox_pred: shape is N, W, H, 36, 提取得到的bbox的坐标 并不是ground truth
    :param im_info:
    :param cfg_key:
    :param _feat_stride:
    :param anchor_scales:
    :return:
    '''
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    # layer_params = yaml.load(self.param_str_)
    _anchors = generate_anchors(scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]
    rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape, [0,3,1,2])
    rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0,3,1,2])
    #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1])
    #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1])
    im_info = im_info[0]
    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
    #cfg_key = 'TEST'
    # 在执行NMS(non-maximize suppress, 非最大抑制)之前最多的proposal的个数
    pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    # 在执行NMS(non-maximize suppress, 非最大抑制)之后最多的proposal的个数
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    # non-maximize suppress所使用的阈值
    nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
    # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
    min_size      = cfg[cfg_key].RPN_MIN_SIZE

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :]
    bbox_deltas = rpn_bbox_pred
    #im_info = bottom[2].data[0, :]

    if DEBUG:
        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
        print 'scale: {}'.format(im_info[2])

    # 1. Generate proposals from bbox deltas and shifted anchors
    height, width = scores.shape[-2:]
    if DEBUG:
        print 'score map size: {}'.format(scores.shape)

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                        shift_x.ravel(), shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    # 上面的操作其实是将features map的坐标映射到原图中的位置,方便计算IoU
    anchors = anchors.reshape((K * A, 4))

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

    # Convert anchors into proposals via bbox transformations
    # 所以说anchor和bounding box还是有一定区别的,对anchor进行一定的放缩处理后才是proposal 也就是bounding box
    # 至于放缩的系数是bbox_deltas 预测得到的
    proposals = bbox_transform_inv(anchors, bbox_deltas)
    # 2. clip predicted boxes to image,将proposal切割成合法尺寸
    proposals = clip_boxes(proposals, im_info[:2])

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals, min_size * im_info[2])
    proposals = proposals[keep, :]
    scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    return blob
예제 #11
0
파일: demo.py 프로젝트: zhefan/TFFRCNN
def im_detect(sess, net, im, boxes=None):
    """Detect object classes in an image given object proposals.
    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals
    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """

    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)
    # forward pass
    if cfg.TEST.HAS_RPN:
        feed_dict = {
            net.data: blobs['data'],
            net.im_info: blobs['im_info'],
            net.keep_prob: 1.0
        }
    else:
        feed_dict = {
            net.data: blobs['data'],
            net.rois: blobs['rois'],
            net.keep_prob: 1.0
        }

    cls_score, cls_prob, bbox_pred, rois = \
        sess.run([net.get_output('cls_score'), net.get_output('cls_prob'), net.get_output('bbox_pred'),net.get_output('rois')],\
                 feed_dict=feed_dict)

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = cls_score
    else:
        # use softmax estimated probabilities
        scores = cls_prob

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes
예제 #12
0
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride = [16,], anchor_scales = [16,]):
    """
    Parameters
    ----------
    rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg
                         NOTICE: the old version is ordered by (1, H, W, 2, A) !!!!
    rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN
    im_info: a list of [image_height, image_width, scale_ratios]
    cfg_key: 'TRAIN' or 'TEST'
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    Returns
    ----------
    rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2]

    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    #layer_params = yaml.load(self.param_str_)

    """
    # cfg_key=cfg_key.decode('ascii')
    _anchors = generate_anchors(scales=np.array(anchor_scales))#生成基本的9个anchor
    _num_anchors = _anchors.shape[0]#9个anchor

    im_info = im_info[0]#原始图像的高宽、缩放尺度

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'

    pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N#12000,在做nms之前,最多保留的候选box数目
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N#2000,做完nms之后,最多保留的box的数目
    nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH#nms用参数,阈值是0.7
    min_size      = cfg[cfg_key].RPN_MIN_SIZE#候选box的最小尺寸,目前是16,高宽均要大于16
    #TODO 后期需要修改这个最小尺寸,改为8?

    height, width = rpn_cls_prob_reshape.shape[1:3]#feature-map的高宽

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    # (1, H, W, A)
    scores = np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:,:,:,:,1],
                        [1, height, width, _num_anchors])
    #提取到object的分数,non-object的我们不关心
    #并reshape到1*H*W*9

    bbox_deltas = rpn_bbox_pred#模型输出的pred是相对值,需要进一步处理成真实图像中的坐标
    #im_info = bottom[2].data[0, :]

    if DEBUG:
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))

    # 1. Generate proposals from bbox deltas and shifted anchors
    if DEBUG:
        print('score map size: {}'.format(scores.shape))

    # Enumerate all shifts
    # 同anchor-target-layer-tf这个文件一样,生成anchor的shift,进一步得到整张图像上的所有anchor
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                        shift_x.ravel(), shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))#这里得到的anchor就是整张图像上的所有anchor

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.reshape((-1, 4)) #(HxWxA, 4)

    # Same story for the scores:
    scores = scores.reshape((-1, 1))

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas)#做逆变换,得到box在图像上的真实坐标

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info[:2])#将所有的proposal修建一下,超出图像范围的将会被修剪掉

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals, min_size * im_info[2])#移除那些proposal小于一定尺寸的proposal
    proposals = proposals[keep, :]#保留剩下的proposal
    scores = scores[keep]
    bbox_deltas=bbox_deltas[keep,:]


    # # remove irregular boxes, too fat too tall
    # keep = _filter_irregular_boxes(proposals)
    # proposals = proposals[keep, :]
    # scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]#score按得分的高低进行排序
    if pre_nms_topN > 0:                #保留12000个proposal进去做nms
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]
    bbox_deltas=bbox_deltas[order,:]


    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)#进行nms操作,保留2000个proposal
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    bbox_deltas=bbox_deltas[keep,:]


    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    blob = np.hstack((scores.astype(np.float32, copy=False), proposals.astype(np.float32, copy=False)))

    return blob,bbox_deltas
예제 #13
0
def im_detect(sess, net, im, boxes=None, use_box_at=-1):
    """Detect object classes in an image given object proposals.

    Arguments:
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)
        use_box_at (int32): Use predicted box at a given timestep, default to the last one (use_box_at=-1)
    Returns:
        scores (ndarray): R x 1 array of object class scores
        pred_boxes (ndarray)): R x 4 array of predicted bounding boxes
        captions (list): length R list of list of word tokens (captions)
    """

    # for bbox unnormalization
    bbox_mean = np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS).reshape((1, 4))
    bbox_stds = np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS).reshape((1, 4))

    blobs, im_scales = _get_blobs(im, boxes)
    assert len(im_scales) == 1, "Only single-image batch implemented"
    im_blob = blobs['data']
    blobs['im_info'] = np.array(
        [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)

    # (TODO wu) for now it only works with "concat" mode

    # get initial states and rois
    cap_state, loc_state, scores, rois = net.feed_image(
        sess, blobs['data'], blobs['im_info'][0])

    # proposal boxes
    boxes = rois[:, 1:5] / im_scales[0]
    proposal_n = rois.shape[0]

    cap_probs = np.ones((proposal_n, 1), dtype=np.int32)
    # index of <EOS> in vocab
    end_idx = 2
    # captions = np.empty([proposal_n, 1], dtype=np.int32)
    bbox_offsets_list = []
    box_offsets = np.zeros((proposal_n, 4), dtype=np.float32)
    bbox_pred = np.zeros((proposal_n, 4), dtype=np.float32)
    for i in xrange(cfg.TIME_STEPS - 1):
        # dim: [proposal_n, ]
        input_feed = np.argmax(cap_probs, axis=1)
        if i == 0:
            captions = input_feed[:, None]
        else:
            captions = np.concatenate((captions, input_feed[:, None]), axis=1)
        # dim: [proposal_n, i+1]
        end_ids = np.where(input_feed == end_idx)[0]
        # prepare for seq length in dynamic rnn
        input_feed[end_ids] = 0
        box_offsets[end_ids] = bbox_pred[end_ids]

        cap_probs, bbox_pred, cap_state, loc_state = net.inference_step(
            sess, input_feed, cap_state, loc_state)
        bbox_offsets_list.append(bbox_pred)

    # bbox target unnormalization
    box_deltas = box_offsets * bbox_stds + bbox_mean

    # do the transformation
    pred_boxes = bbox_transform_inv(boxes, box_deltas)
    pred_boxes = clip_boxes(pred_boxes, im.shape)

    return scores[:, 1], pred_boxes, captions