Beispiel #1
0
def detect(net, im):
    # Detect all object classes and regress object bounds
    ims = []
    ims.append(im)
    scores, boxes = im_detect(net, ims)
    scores = scores[0]
    boxes = boxes[0]
    # filter boxes according to prob scores
    keeps = np.where(scores[:,0] > cfg.TEST.PROB)[0]
    scores = scores[keeps, :]
    boxes = boxes[keeps, :]

    # change boxes according to input size and the original image size
    im_shape = np.array(im.shape[0:2])
    im_scales = float(cfg.TEST.SCALES[0]) / im_shape
    
    boxes[:, 0::2] =  boxes[:, 0::2] / im_scales[1]
    boxes[:, 1::2] =  boxes[:, 1::2] / im_scales[0]

    # filter boxes with small sizes
    boxes = clip_boxes(boxes, im_shape)
    keeps = filter_boxes(boxes, cfg.TEST.RON_MIN_SIZE )
    scores = scores[keeps,:]
    boxes = boxes[keeps, :]

    scores = np.tile(scores[:, 0], (len(CLASSES), 1)).transpose() * scores

    return scores, boxes
Beispiel #2
0
def im_detect(net, im, boxes=None):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes, return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    # now in blobs['data'] there are images
    # and in blobs['rois'] proposals
    ################################################### YOUR CODE GOES HERE
    # reshape network inputs to match blobs['data'].shape and blobs['rois'].shape
    #

    # do forward with blobs['data'] and blobs['rois']
    #

    # use softmax estimated probabilities (net output)
    # scores = ...

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes
Beispiel #3
0
def im_detectreg(net, im, boxes=None):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    im_blob = blobs['data']
    blobs['im_info'] = np.array(
        #[[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
        [np.hstack((im_blob.shape[2], im_blob.shape[3], im_scales[0]))],
        dtype=np.float32)

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    net.blobs['im_info'].reshape(*(blobs['im_info'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)

    assert len(im_scales) == 1, "Only single-image batch implemented"
    rois = net.blobs['rois'].data.copy()
    # unscale back to raw image space
    boxes = rois[:, 1:5]/ im_scales[0]

    # output is not named 'bbox_pred' and train snapshot saving is not modified,
    # so scale means and stds
    # TODO add means and stds global average
    box_deltas = blobs_out['one_bbox_pred']
    bbox_means=np.array([0,0,0,0],dtype=np.float32)
    bbox_stds=np.array([0.1,0.1,0.2,0.2],dtype=np.float32)
    box_deltas=box_deltas * bbox_stds + bbox_means

    pred_boxes = bbox_transform_inv(boxes, box_deltas)
    pred_boxes = clip_boxes(pred_boxes, im.shape)

    return pred_boxes
Beispiel #4
0
def gao(net):
    from fast_rcnn.bbox_transform import clip_boxes, bbox_transform_inv

    im = net.blobs['data'].data.copy()
    im = im[0, :, :, :]
    im = im.transpose(1, 2, 0)
    im += cfg.PIXEL_MEANS
    im = im.astype(np.uint8, copy=False)

    cls_prob = net.blobs['cls_prob'].data.copy()
    cls_prob_repool_head = net.blobs['cls_prob_repool_head'].data.copy()

    rois = net.blobs['head_repool'].data.copy()
    boxes = rois[:, 1:5]

    # bbox_targets_hard's shape : (128, 8)
    # labels_hard's shape : (128,)
    bbox_targets_hard = net.blobs['head_pred_repool'].data.copy()

    pred_boxes = bbox_transform_inv(boxes, bbox_targets_hard)
    pred_boxes = clip_boxes(pred_boxes, im.shape)
    #    cls_boxes = pred_boxes[:, 4:]

    inds = np.where(cls_prob_repool_head[:, 1] > 0.05)[0]
    cls_scores_head = cls_prob_repool_head[inds, 1]
    cls_head = pred_boxes[inds, 4:8]
    cls_head_dets = np.hstack((cls_head, cls_scores_head[:, np.newaxis])) \
                .astype(np.float32, copy=False)

    cls_boxes = cls_head_dets
    print(cls_head_dets.shape)
    print(cls_head_dets[0])
    '''
    keep = nms(cls_head_dets, cfg.TEST.NMS)
    head_NMSed = cls_head_dets[keep, :]
    cls_boxes = head_NMSed
    '''

    print(cls_head_dets.shape)
    print(cls_head_dets[0:10])
    print(cls_prob[0:10])
    print(cls_prob_repool_head[0:10])
    '''
    plt.figure()
    plt.plot(cls_prob[:, 1])
    plt.figure()
    plt.plot(cls_prob_repool_head[:, 1])
    plt.show()
    '''

    vis_detections(im, cls_boxes)
Beispiel #5
0
def im_detect(net, im, boxes):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)

    # use softmax estimated probabilities
    scores = blobs_out['cls_prob']

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes
Beispiel #6
0
def im_detect(net, im):
    """Detect object classes in an image given object proposals.

	Arguments:
		net (caffe.Net): Fast R-CNN network to use
		im (ndarray): color image to test (in BGR order)
		boxes (ndarray): R x 4 array of object proposals or None (for RPN)

	Returns:
		scores (ndarray): R x K array of object class scores (K includes
			background as object category 0)
		boxes (ndarray): R x (4*K) array of predicted bounding boxes
	"""
    blobs, im_scale = _get_blobs(im)
    resized_shape = (int(im.shape[0] * im_scale), int(im.shape[1] * im_scale),
                     im.shape[2])

    net.blobs['data'].reshape(*(blobs['data'].shape))
    net.blobs['im_info'].reshape(*(blobs['im_info'].shape))

    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)

    rois = net.blobs['rois'].data.copy()
    boxes = rois[:, 1:5]

    #return proposal roi
    pred_boxes = boxes
    scores = net.blobs['rpn_scores'].data.copy()
    # scores = blobs_out['cls_prob']
    # box_deltas = blobs_out['bbox_pred']
    # pred_boxes = bbox_transform_inv(boxes, box_deltas)
    pred_boxes = clip_boxes(pred_boxes, resized_shape)
    pred_boxes = pred_boxes / im_scale
    color = (0, 0, 255)
    for i in range(len(pred_boxes)):
        bbox = pred_boxes[i, :]
        score = scores[i]
        if score < 0.95:
            continue
        cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
        info = "{}".format(str(score)[:4])
        cv2.putText(im, info, (bbox[0], bbox[1]), cv2.FONT_HERSHEY_SIMPLEX,
                    0.5, color, 1)

    window = "test"
    cv2.imshow(window, im)
    if cv2.waitKey(-1) == 27:
        sys.exit(0)
    return scores, pred_boxes
Beispiel #7
0
def im_detect(net, im, boxes):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes, return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)

    # use softmax estimated probabilities
    scores = blobs_out['cls_prob']

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes
    def forward(self, bottom, top):
        proposals = bottom[0].data
        proposals = proposals[:, 1:]
        predicted_box_deltas = bottom[1].data
        predicted_box_deltas = predicted_box_deltas.reshape((-1, 8))
        im_info = bottom[2].data

        pred_boxes = bbox_transform_inv(proposals, predicted_box_deltas[:, 4:])
        pred_boxes = clip_boxes(pred_boxes, [im_info[0, 0], im_info[0, 1]])

        pred_boxes_final = np.zeros((pred_boxes.shape[0], 5))
        pred_boxes_final[:, 1:] = pred_boxes

        top[0].reshape(*pred_boxes_final.shape)
        top[0].data[...] = pred_boxes_final
Beispiel #9
0
def im_detect_split(net, im, boxes, use_wzctx):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs_all, im_scales = _get_blobs(im, boxes)
    num_boxes = boxes.shape[0]
    scores = np.zeros((num_boxes, 201), dtype=np.float32)
    box_deltas = np.zeros((num_boxes, 4*201), dtype=np.float32)
    for i in xrange(blobs_all['data'].shape[0]):
        # load blobs
        inds = np.where(blobs_all['rois'][:, 0] == i)[0]
        if inds.shape[0] == 0:
            continue
        blobs = {'data' : None, 'rois' : None}
        blobs['data'] = blobs_all['data'][[i]]
        blobs['rois'] = blobs_all['rois'][inds]
        blobs['rois'][:, 0] = 0

        # reshape network inputs
        net.blobs['data'].reshape(*(blobs['data'].shape))
        net.blobs['rois'].reshape(*(blobs['rois'].shape))

        # do forward
        forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
        blobs_out = net.forward(**forward_kwargs)

        # use softmax estimated probabilities
        score = blobs_out['cls_prob']
        scores[inds] = score

        box_delta = blobs_out['bbox_pred_avg']
        box_deltas[inds] = box_delta

    # Apply bounding-box regression deltas
    pred_boxes = bbox_transform_inv(boxes, box_deltas)
    pred_boxes = clip_boxes(pred_boxes, im.shape)

    return scores, pred_boxes
Beispiel #10
0
def transform_kp_to_box(gt_keyPoints, gt_boxes, im, fh=3):
    if cfg.FILTER_INVALID_BOX:
        kp_num = cfg.TRAIN.ATTRIBUTES[0]['gt_keyPoints'] / 2
        gt_keyPoints = gt_keyPoints.reshape([-1, kp_num, 2])
        x1 = np.min(gt_keyPoints[:, :, 0], 1).reshape(-1, 1)
        y1 = np.min(gt_keyPoints[:, :, 1], 1).reshape(-1, 1)
        x2 = np.max(gt_keyPoints[:, :, 0], 1).reshape(-1, 1)
        y2 = np.max(gt_keyPoints[:, :, 1], 1).reshape(-1, 1)
        if cfg.WIDER_FACE_STYLE == 1:
            offset = (y2 - y1) / fh
            y1 = y1 - offset
        elif cfg.WIDER_FACE_STYLE == 2:
            if kp_num == 19:  # aflw-full
                y_offset = (y2 - y1) / fh
                y1 = y1 - y_offset
                x_offset = (gt_keyPoints[:, 1, 0] - gt_keyPoints[:, 0, 0]
                            )  # 2, 1
                x1 = x1 - x_offset
                x_offset = (gt_keyPoints[:, 5, 0] - gt_keyPoints[:, 4, 0]
                            )  # 5, 4
                x2 = x2 + x_offset
            elif kp_num == 29:  # cofw
                y_offset = (y2 - y1) / fh
                y1 = y1 - y_offset
                x_offset = (gt_keyPoints[:, 4, 0] - gt_keyPoints[:, 0, 0])
                x1 = x1 - x_offset
                x_offset = (gt_keyPoints[:, 1, 0] - gt_keyPoints[:, 6, 0])
                x2 = x2 + x_offset
        boxes = np.hstack([x1, y1, x2, y2])
    else:
        boxes = np.zeros([gt_keyPoints.shape[0], 4])
        for i, gt_keyPoint in enumerate(gt_keyPoints):
            if sum(gt_keyPoint) != 0:
                gt_keyPoint = gt_keyPoint.reshape([kp_num, 2])
                x1 = np.min(gt_keyPoint[:, 0])
                y1 = np.min(gt_keyPoint[:, 1])
                x2 = np.max(gt_keyPoint[:, 0])
                y2 = np.max(gt_keyPoint[:, 1])
                if cfg.WIDER_FACE_STYLE:
                    offset = (y2 - y1) / fh
                    y1 = y1 - offset
                boxes[i] = [x1, y1, x2, y2]
            else:
                boxes[i] = gt_boxes[i]
    if cfg.CLIP_BOXES:
        im_shape = cv2.imread(im).shape[0:2]
        boxes = clip_boxes(boxes, im_shape)
    return boxes
Beispiel #11
0
def compute_rois_offset(rois, offset, im_info=None):
    """Compute bounding-box offset for region of interests"""

    
    assert rois.shape[1] == 4
    assert offset.shape[1] == 4
    
    if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
        # Optionally normalize targets by a precomputed mean and stdev -- reverse the transformation
        offset_unnorm = offset * np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS) + np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
    else:
        offset_unnorm = offset.copy()
    rois_offset = bbox_transform_inv(rois, offset_unnorm)
    if not im_info is None:         
        rois_offset = clip_boxes(rois_offset, im_info[:2])
    return rois_offset
Beispiel #12
0
    def forward(self, bottom, top):
        """Compute loss, select RoIs using OHEM. Use RoIs to get blobs and copy them into this layer's top blob vector."""

        boxes = bottom[0].data.copy()[:, 1:5]
        box_deltas = bottom[1].data.copy()
        im_info = bottom[2].data.copy()
        im_shape = (im_info[0, 0], im_info[0, 1])

        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape)

        rois_repool = pred_boxes[:, 4:]
        zeros = np.zeros((rois_repool.shape[0], 1), dtype=np.float32)
        rois_repool = np.hstack((zeros, rois_repool))

        top[0].reshape(*(rois_repool.shape))
        top[0].data[...] = rois_repool.astype(np.float32, copy=False)
Beispiel #13
0
def unnormalize_box(Phi_labels, bbox_targets, boxes, stds, means, M,
                    im_shape_w, im_shape_h):
    """
    un-normalize boxes by using stds and means
    """
    Phi_argmax = 4 * Phi_labels
    bbox_target = bbox_targets[np.tile(range(M), 4),
                               np.hstack(
                                   (4 * Phi_labels, 4 * Phi_labels + 1,
                                    4 * Phi_labels + 2, 4 * Phi_labels + 3))]
    bbox_target = np.reshape(bbox_target, (M, 4), order='F')
    bbox_target = bbox_target * stds[Phi_argmax / 4, :] + means[Phi_argmax /
                                                                4, :]
    unnormalized_bbox_targets = bbox_transform_inv(boxes, bbox_target)
    unnormalized_bbox_targets = clip_boxes(unnormalized_bbox_targets,
                                           (im_shape_w, im_shape_h))
    return unnormalized_bbox_targets
Beispiel #14
0
def im_detect(net, im, boxes=None):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.

    im_blob = blobs['data']
    blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    net.blobs['im_info'].reshape(*(blobs['im_info'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False)

    blobs_out = net.forward(**forward_kwargs) 

    rois = net.blobs['rois'].data.copy()
    boxes = rois[:, 1:5] / im_scales[0]
    scores = blobs_out['cls_prob']

    # Apply bounding-box regression deltas
    box_deltas = blobs_out['bbox_pred']
    pred_boxes = bbox_transform_inv(boxes, box_deltas)
    pred_boxes = clip_boxes(pred_boxes, im.shape)
    return scores, pred_boxes
Beispiel #15
0
def run_single(sess, net, inputs, outputs, im, boxes, relations, bbox_reg,
               multi_iter):
    blobs, im_scales = _get_blobs(im, boxes)

    relations = np.array(relations,
                         dtype=np.int32)  # all possible combinations
    num_roi = blobs['rois'].shape[0]
    num_rel = relations.shape[0]

    inputs_feed = data_utils.create_graph_data(num_roi, num_rel, relations)

    feed_dict = {
        inputs['ims']: blobs['data'],
        inputs['rois']: blobs['rois'],
        inputs['relations']: relations,
        net.keep_prob: 1
    }

    for k in inputs_feed:
        feed_dict[inputs[k]] = inputs_feed[k]

    # compute relation rois
    feed_dict[inputs['rel_rois']] = \
        data_utils.compute_rel_rois(num_rel, blobs['rois'], relations)

    ops_value = sess.run(outputs, feed_dict=feed_dict)

    mi = multi_iter[-1]
    rel_probs_flat = ops_value['rel_probs'][mi]
    rel_probs = np.zeros([num_roi, num_roi, rel_probs_flat.shape[1]])
    for i, rel in enumerate(relations):
        rel_probs[rel[0], rel[1], :] = rel_probs_flat[i, :]

    cls_probs = ops_value['cls_probs'][mi]

    if bbox_reg:
        # Apply bounding-box regression deltas
        pred_boxes = bbox_transform_inv(boxes, ops_value['bbox_deltas'][mi])
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, cls_probs.shape[1]))

    return {'scores': cls_probs, 'boxes': pred_boxes, 'relations': rel_probs}
Beispiel #16
0
def im_detect_tensorflow(sess_tuple, im):
    """Detect object classes in an image given object proposals.

    Arguments:
        sess_tuple: the tuple containing tensorflow sessions and
            input placeholders and output tensors
        im (ndarray): color image to test (in BGR order)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im, rois=None)

    sess, ph_data, ph_im_info, out_rois, out_cls_prob, out_bbox_pred = sess_tuple

    im_blob = blobs['data']
    blobs['im_info'] = np.array(
        [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32)

    # Convert Caffe format to tensorflow format
    # BGR to RGB, N x C x H x W to N x H x W x C
    blobs['data'] = np.transpose(blobs['data'][:, ::-1, :, :], (0, 2, 3, 1))

    rois, scores, box_deltas = sess.run(
        (out_rois, out_cls_prob, out_bbox_pred), {
            ph_data: blobs['data'],
            ph_im_info: blobs['im_info']
        })

    assert len(im_scales) == 1, "Only single-image batch implemented"
    # unscale back to raw image space
    boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    return scores, pred_boxes
Beispiel #17
0
def pred_box_trans(rois, cls_pred, bbox_deltas, im_scale, im_shape):
    """
	input:
		rois: output get from RPN, [256, 5],[prob, x1, x2, y1, y2] for top 256 proposals
		cls_pred: output get from the detection net, [256, 21] for top 256 proposals
		bbox_deltas: output get from the detection net, [256, 21*4] for top 256 proposals
		im_scale: 
	output:
		Given cls_pred, get the exact pred bboxes on scaled im
	"""
    boxes = rois[:, 1:5] / im_scale  # prob first, then bbox
    boxes = bbox_transform_inv(boxes, bbox_deltas)
    boxes = clip_boxes(boxes, im_shape)  # [num_box]
    cat_ids = np.argmax(cls_pred, axis=1)
    pred_boxes = np.zeros([0, 4])
    for box_id, cat_id in enumerate(cat_ids):
        pred_boxes = np.vstack(
            (pred_boxes, boxes[box_id, cat_id * 4:(cat_id + 1) * 4]))
    pred_boxes *= im_scale
    return pred_boxes
Beispiel #18
0
    def interpret_faster_rcnn(self,
                              cls_prob,
                              bbox_pred,
                              rois,
                              im_info,
                              im_shape,
                              nms=True,
                              clip=True,
                              min_score=0.0):
        # find class
        scores, inds = cls_prob.data.max(1)
        scores, inds = scores.cpu().numpy(), inds.cpu().numpy()

        keep = np.where((inds > 0) & (scores >= min_score))
        scores, inds = scores[keep], inds[keep]

        # Apply bounding-box regression deltas
        keep = keep[0]
        box_deltas = bbox_pred.data.cpu().numpy()[keep]
        """
        box_deltas = np.asarray([
            box_deltas[i, (inds[i] * 4): (inds[i] * 4 + 4)] for i in range(len(inds))
        ], dtype=np.float)
        """
        box_deltas = np.asarray([box_deltas[i, 4:] for i in range(len(inds))],
                                dtype=np.float)
        boxes = rois.data.cpu().numpy()[keep, 1:5] / im_info[0][2]
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        if clip:
            pred_boxes = clip_boxes(pred_boxes, im_shape)

        # nms
        if nms and pred_boxes.shape[0] > 0:
            pred_boxes, scores, inds = nms_detections(pred_boxes,
                                                      scores,
                                                      0.3,
                                                      inds=inds)

        return pred_boxes, scores, self.classes[inds]
Beispiel #19
0
    def caption(self,
                im_path,
                gt_objects=None,
                gt_regions=None,
                thr=0.0,
                nms=False,
                top_N=100,
                clip=True,
                use_beam_search=False):
        image = cv2.imread(im_path)
        # print 'image.shape', image.shape
        im_data, im_scales = self.get_image_blob_noscale(image)
        # print 'im_data.shape', im_data.shape
        # print 'im_scales', im_scales
        if gt_objects is not None:
            gt_objects[:, :4] = gt_objects[:, :4] * im_scales[0]
        if gt_regions is not None:
            gt_regions[:, :4] = gt_regions[:, :4] * im_scales[0]

        im_info = np.array(
            [[im_data.shape[1], im_data.shape[2], im_scales[0]]],
            dtype=np.float32)
        # pdb.set_trace()
        region_result = self(im_data,
                             im_info,
                             gt_objects,
                             gt_regions=gt_regions,
                             use_beam_search=use_beam_search)[2]
        region_caption, bbox_pred, region_rois, logprobs = region_result[:]

        boxes = region_rois.data.cpu().numpy()[:, 1:5] / im_info[0][2]
        box_deltas = bbox_pred.data.cpu().numpy()
        pred_boxes = bbox_transform_inv_hdn(boxes, box_deltas)
        if clip:
            pred_boxes = clip_boxes(pred_boxes, image.shape)

        # print 'im_scales[0]', im_scales[0]
        return (region_caption.numpy(), logprobs.numpy(), pred_boxes)
Beispiel #20
0
    def object_detection(self, image_path, gt_boxes=None):
        min_score = 1 / 150.
        image = cv2.imread(image_path)
        # print 'image.shape', image.shape
        im_data, im_scales = self.get_image_blob_noscale(image)
        if gt_boxes is not None:
            gt_boxes[:, :4] = gt_boxes[:, :4] * im_scales[0]
        # print 'im_data.shape', im_data.shape
        # print 'im_scales', im_scales
        im_info = np.array(
            [[im_data.shape[1], im_data.shape[2], im_scales[0]]],
            dtype=np.float32)
        object_result = self(im_data, im_info)[0]
        cls_prob_object, bbox_object, object_rois = object_result[:]

        prob_object = F.softmax(cls_prob_object)
        prob = prob_object.cpu().data.numpy()
        boxes = object_rois.data.cpu().numpy()[:, 1:5] / im_info[0][2]
        fg_id = np.where(prob > min_score)
        box_id = fg_id[0]
        cls_id = fg_id[1]
        box_id = box_id[cls_id > 0]
        cls_id = cls_id[cls_id > 0]
        box_deltas = bbox_object.data.cpu().numpy()
        new_box_delta = np.asarray([
            box_deltas[box_id[i], (cls_id[i] * 4):(cls_id[i] * 4 + 4)]
            for i in range(len(cls_id))
        ],
                                   dtype=np.float)
        regressed_boxes = bbox_transform_inv_hdn(boxes[box_id], new_box_delta)
        regressed_boxes = clip_boxes(regressed_boxes, image.shape)

        object_score = np.asarray(
            [prob[box_id[i], cls_id[i]] for i in range(len(cls_id))],
            dtype=np.float)

        # print 'im_scales[0]', im_scales[0]
        return (cls_id, object_score, regressed_boxes)
Beispiel #21
0
    def compute_kernel(self, labels, boxes, Phi, loc_argmax,
                       unnormalized_bbox_targets, im_shape_w, im_shape_h):
        """
        Compute DPP Kernel Matrix
        """
        M = boxes.shape[0]  # number of rois of 1 image in the minibatch

        pred_boxes = bbox_transform_inv(boxes, loc_argmax)
        pred_boxes = clip_boxes(pred_boxes, (im_shape_w, im_shape_h))

        IoU_with_gt_all = IoU_target(pred_boxes, unnormalized_bbox_targets)
        # nonzero argmax labels for background images will have wrong target boxes
        IoU_with_gt_all[np.where(labels == 0)[0]] = 0.5
        IoU_with_gt_all = IoU_with_gt_all
        sim_images = self.sim_classes[(labels - 1), :][:, (labels - 1)]

        # Compute IoU, S, Phi, L
        IoU = pair_IoU(pred_boxes)
        S = np.multiply(IoU, sim_images) + self.epsilon * np.eye(M, M)
        Phi = np.multiply(IoU_with_gt_all, Phi)
        L = np.reshape(np.repeat(Phi, M),
                       (M, M)) * S * np.reshape(np.tile(Phi, M), (M, M))
        det_L_I = np.linalg.det(L + np.eye(M))
        return IoU, S, L, IoU_with_gt_all, pred_boxes, det_L_I
Beispiel #22
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        cfg_key = str(
            'TRAIN' if self.phase == 0 else 'TEST')  # either 'TRAIN' or 'TEST'
        # cfg_key = 'TRAIN'
        enable_nms = cfg[cfg_key].ENABLE_NMS
        nms_thresh = cfg[cfg_key].NMS
        pre_nms_topN = cfg[cfg_key].PRE_RON_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RON_NMS_TOP_N

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        # scores = bottom[0].data[:, 1].reshape(-1, 1)
        # bbox_deltas = bottom[1].data[:, 4:]
        im_info = bottom[-1].data[0, :]
        # rois = bottom[3].data[:, 1:5]

        # RON
        rois = np.zeros((0, 4), dtype=np.float32)
        rois_scores = np.zeros((0, 1), dtype=np.float32)  # 2 class
        rois_rpn_nos = np.zeros((0, 1), dtype=np.int)
        RPN_NO_sum = len(cfg.MULTI_SCALE_RPN_NO)

        for used_rpn_no in cfg.USED_RPN_NO:
            if used_rpn_no in cfg.MULTI_SCALE_RPN_NO:
                rpn_no = cfg.MULTI_SCALE_RPN_NO.index(used_rpn_no)
                rois = np.concatenate((rois, bottom[rpn_no].data[0]), axis=0)
                rois_scores = np.concatenate(
                    (rois_scores, bottom[rpn_no + RPN_NO_sum].data[0]), axis=0)
                rois_rpn_nos = np.concatenate(
                    (rois_rpn_nos,
                     np.repeat([int(used_rpn_no)],
                               bottom[rpn_no].data[0].shape[0]).reshape(-1,
                                                                        1)),
                    axis=0)

        # reshape rois (-1, 4)
        if len(rois.shape) == 4:
            rois = rois.reshape(rois.shape[0], rois.shape[1])

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox deltas and shifted anchors

        # Convert anchors into proposals via bbox transformations
        # proposals = bbox_transform_inv(rois, bbox_deltas)
        proposals = rois.copy()
        scores = rois_scores.copy()

        # 1.5 filter boxes according to prob scores
        pro_thresh = cfg[cfg_key].PROB
        while True:
            keeps = np.where(scores[:, 0] > pro_thresh)[0]
            if len(keeps) == 0 and pro_thresh - 0.1 >= 0:
                pro_thresh = pro_thresh - 0.1
            else:
                # print pro_thresh
                break

        scores = scores[keeps, :]
        proposals = proposals[keeps, :]
        rois_rpn_nos = rois_rpn_nos[keeps, :]

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(
            proposals, cfg[cfg_key].RON_MIN_SIZE)  # min_size * im_info[2]
        proposals = proposals[keep, :]
        scores = scores[keep]
        rois_rpn_nos = rois_rpn_nos[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)

        if enable_nms:
            nms_keep = nms(np.hstack((proposals, scores)), nms_thresh)
            nms_keep = nms_keep[:post_nms_topN]
            proposals = proposals[nms_keep, :]
            scores = scores[nms_keep]
            rois_rpn_nos = rois_rpn_nos[nms_keep]
        else:
            order = scores.ravel().argsort()[::-1]
            order = order[:pre_nms_topN]
            proposals = proposals[order, :]
            scores = scores[order]
            rois_rpn_nos = rois_rpn_nos[order]

        # concat several groups of proposals from other rpn maps

        # using gt as roi
        if cfg[cfg_key].USING_GT:
            gt_kps = cfg.TRAIN.ANNOINFOS[:, 5:]
            gt_boxes = cfg.TRAIN.ANNOINFOS[:, :4]
            if cfg.TRANSFORM_KP_TO_BOX:
                gt_boxes = transform_kp_to_box(gt_kps, gt_boxes,
                                               cfg.TRAIN.VISUAL_ANCHORS_IMG)
            proposals = gt_boxes * cfg.TRAIN.VISUAL_ANCHORS_IMG_SCALE
            scores = cfg.TRAIN.ANNOINFOS[:, 4].reshape([1, 1])

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        # print blob.shape
        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores

        if len(top) > 2:
            top[2].reshape(*(rois_rpn_nos.shape))
            top[2].data[...] = rois_rpn_nos
    def forward(self, bottom, top):
        # prep incoming data==========
        rpn_boxes = bottom[0].data.copy()
        bbox_pred = bottom[1].data
        scores = bottom[2].data
        im_info = bottom[3].data[0]
        im_idx = int(bottom[4].data)
        im_data = bottom[5].data[0, :, :, :].transpose((1, 2, 0)).copy()
        m = self.meta
        im_id = self._image_id[im_idx]
        r_anno = self.r_anno[im_id]
        # prep done============

        # prep blobs for forward
        blobs = {}
        s_classeme = []
        s_rois = []
        s_rois_encoded = []
        o_classeme = []
        o_rois = []
        o_rois_encoded = []
        relation_label = []

        gt_boxes = []
        if hasattr(r_anno, 'relationship'):
            rpn_boxes_img_coor = rpn_boxes[:, 1:5] / im_info[2]
            boxes = rpn_boxes_img_coor
            boxes = bbox_transform_inv(boxes, bbox_pred)
            boxes = clip_boxes(
                boxes, (im_info[0] / im_info[2], im_info[1] / im_info[2]))

            cv2.normalize(im_data, im_data, 255, 0, cv2.NORM_MINMAX)
            im_data = im_data.astype(np.uint8)

            origsz = (im_info[1] / im_info[2], im_info[0] / im_info[2])
            im_data = cv2.resize(im_data, origsz)
            thresh_final = .5

            res_locations = []
            res_classemes = []
            res_cls_confs = []
            boxes_tosort = []
            for j in xrange(1, 101):
                inds = np.where(scores[:, j] > .3)[0]
                cls_scores = scores[inds, j]
                cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
                cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], inds[:, np.newaxis])) \
                    .astype(np.float32, copy=False)
                # pred_boxes = clip_boxes(pred_boxes, im.shape)
                if len(cls_scores) <= 0:
                    boxes_tosort.append(cls_dets)
                    continue

                res_loc = np.hstack((cls_boxes, inds[:, np.newaxis]))
                res_classeme = scores[inds]
                res_cls_conf = np.column_stack(
                    (np.zeros(cls_scores.shape[0]) + j, cls_scores))
                keep = nms(cls_dets[:, :5], .3)  # nms threshold
                cls_dets = cls_dets[keep, :]
                res_loc = res_loc[keep]
                res_classeme = res_classeme[keep]
                res_cls_conf = res_cls_conf[keep]
                res_classemes.extend(res_classeme)
                res_locations.extend(res_loc)
                res_cls_confs.extend(res_cls_conf)
                boxes_tosort.append(cls_dets)
            try:
                # final class confidence
                inds = np.where(
                    np.array(res_cls_confs)[:, 1] > thresh_final)[0]

                classemes = np.array(res_classemes)[inds]
                locations = np.array(res_locations)[inds]
                cls_confs = np.array(res_cls_confs)[inds]
                # decide what to pass to top

                # limit max
                w, h = self.meta['train/' + im_id +
                                 '/w'][...], self.meta['train/' + im_id +
                                                       '/h'][...]
                if not isinstance(r_anno.relationship, np.ndarray):
                    r_anno.relationship = [r_anno.relationship]
                for r in xrange(len(r_anno.relationship)):
                    if not hasattr(r_anno.relationship[r], 'phrase'):
                        continue
                    predicate = r_anno.relationship[r].phrase[1]
                    ymin, ymax, xmin, xmax = r_anno.relationship[r].subBox
                    sub_bbox = [xmin, ymin, xmax, ymax]
                    gt_boxes.append(sub_bbox)

                    ymin, ymax, xmin, xmax = r_anno.relationship[r].objBox

                    obj_bbox = [xmin, ymin, xmax, ymax]
                    gt_boxes.append(obj_bbox)
                    overlaps = bbox_overlaps(
                        np.ascontiguousarray([sub_bbox, obj_bbox],
                                             dtype=np.float),
                        np.ascontiguousarray(locations, dtype=np.float))
                    if overlaps.shape[0] == 0:
                        continue

                    sub_sorted = overlaps[0].argsort()[-40:][::-1]
                    obj_sorted = overlaps[1].argsort()[-40:][::-1]
                    while len(sub_sorted) > 0 and overlaps[0][
                            sub_sorted[-1]] < .6:
                        sub_sorted = sub_sorted[:-1]
                    while len(obj_sorted) > 0 and overlaps[1][
                            obj_sorted[-1]] < .6:
                        obj_sorted = obj_sorted[:-1]

                    if len(sub_sorted) <= 0 or len(obj_sorted) <= 0:
                        continue

                    cnt = 0
                    for s in sub_sorted[:1]:  # sub_idx:
                        for o in obj_sorted[:1]:  # obj_idx:
                            if s != o and cnt < 20:
                                sub_clsmemes = classemes[s]
                                obj_clsmemes = classemes[o]
                                sub_box_encoded = bbox_transform(
                                    np.array([[0, 0, w, h]]),
                                    np.array([locations[s]]))[0]
                                obj_box_encoded = bbox_transform(
                                    np.array([[0, 0, w, h]]),
                                    np.array([locations[o]]))[0]
                                relation = self.meta['meta/pre/name2idx/' +
                                                     predicate][...]
                                # all done, now we put forward
                                s_classeme.append(sub_clsmemes)
                                o_classeme.append(obj_clsmemes)
                                s_rois.append(rpn_boxes[locations[s][-1]])
                                o_rois.append(rpn_boxes[locations[o][-1]])
                                s_rois_encoded.append(sub_box_encoded)
                                o_rois_encoded.append(obj_box_encoded)
                                relation_label.append(np.float32(relation))
                                cnt += 1
                # final step copy all the stuff for forward
                blobs['s_classeme'] = np.array(s_classeme)
                blobs['o_classeme'] = np.array(o_classeme)
                blobs['s_rois'] = np.array(s_rois)
                blobs['o_rois'] = np.array(o_rois)
                blobs['s_rois_encoded'] = np.array(s_rois_encoded)
                blobs['o_rois_encoded'] = np.array(o_rois_encoded)
                blobs['relation_label'] = np.array(relation_label)
            except:
                blobs = self._prev_blob
            if blobs['s_classeme'].shape[0] == 0:
                blobs = self._prev_blob
        else:
            blobs = self._prev_blob
        visualize_gt(im_data, gt_boxes)
        visualize(im_data, boxes_tosort, rpn_boxes_img_coor, m, thresh_final)
        for blob_name, blob in blobs.iteritems():
            top_ind = self._name_to_top_map[blob_name]
            # Reshape net's input blobs
            top[top_ind].reshape(*(blob.shape))
            # Copy data into net's input blobs
            top[top_ind].data[...] = blob.astype(np.float32, copy=False)

        # this becomes a dummy for forward in case things fail
        if blobs['relation_label'][0] != -1:
            for blob_name, blob in blobs.iteritems():
                blobs[blob_name] = blob[0, np.newaxis]
                if blob_name == 'relation_label':
                    blobs[blob_name][...] = -1
        self._prev_blob = blobs
Beispiel #24
0
    def _generate_rpn_rois(self, scores, bbox_deltas, im_info):
        cfg_key = str(self.phase)  # either 'TRAIN' or 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove small predicted boxes (we removed this step)

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # assert len(keep) == post_nms_topN, \
        #         '{} vs {}'.format(len(keep), post_nms_topN)
        proposals = proposals[keep, :]
        return proposals
Beispiel #25
0
def im_detect2(net, im, boxes=None):

    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
    else:
        net.blobs['rois'].reshape(*(blobs['rois'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    if cfg.TEST.HAS_RPN:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32,
                                                            copy=False)
    else:
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)

    blobs_out = net.forward(**forward_kwargs)
    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        rois = net.blobs['rois'].data.copy()
        boxes = rois[:, 1:5] / im_scales[0]
    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        # scores = net.blobs['cls_score'].data
        1
    else:
        # use softmax estimated probabilities
        # scores = blobs_out['cls_prob']
        scores = net.blobs['cls_prob'].data.copy()
        if cfg.TEST.MASK_REG:
            rois_class_score = blobs_out['rois_class_score']
            rois_class_ind = blobs_out['rois_class_ind']
            rois_final = blobs_out['rois_final']

    if cfg.TEST.BBOX_REG:
        box_deltas = net.blobs['bbox_pred'].data.copy()
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
        if cfg.TEST.MASK_REG:
            masks_out = blobs_out[
                'mask_prob']  #Nx2x14x14 where N is number of boxess
            #print '------------------ MASKS OUT SHAPE: ', masks_out.shape

            #masks_out = masks_out[:, 1, :, :]  # masks = Nx14x14 ## DO NOT remove #channel class
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    if cfg.TEST.MASK_REG:
        #return scores, pred_boxes, pred_boxes_before_clip, masks
        return rois_final, rois_class_score, rois_class_ind, masks_out, scores, pred_boxes
    else:
        return scores, pred_boxes
Beispiel #26
0
    def predict(self, inputs):
        # class_prediction, inputs[0] (1, 38, 63, 24))
        # box_encodings, inputs[1] (1, 38, 63, 48))
        # image_shape
        print('input_0.shape=', inputs[0].shape)
        print('input_1.shape=', inputs[1].shape)
        print('input_2.shape=', inputs[2])
        image_shape = inputs[2]
        scales = config.cfg.POSTPROCESSOR.SCALES
        aspect_ratios = config.cfg.POSTPROCESSOR.ASPECT_RATIOS
        height_stride = config.cfg.POSTPROCESSOR.HEIGHT_STRIDE
        width_stride = config.cfg.POSTPROCESSOR.WIDTH_STRIDE

        _num_anchors = len(scales) * len(aspect_ratios)
        print("_num_anchors:", _num_anchors)
        scores = inputs[0][:, :, :, _num_anchors:]
        bbox_deltas = inputs[1]
        # box
        bbox_deltas = bbox_deltas.reshape((-1, 4))

        # scores
        scores = scores.reshape((-1, 1))
        print("scores:", scores.shape)
        # anchors
        height, width = inputs[0].shape[1], inputs[0].shape[2]

        feature_map_shape_list = [(height, width)]

        anchors = generate_anchors(
            scales=[scale for scale in scales],
            aspect_ratios=[aspect_ratio for aspect_ratio in aspect_ratios],
            base_anchor_size=None,
            anchor_stride=[height_stride, width_stride],
            anchor_offset=None,
            feature_map_shape_list=feature_map_shape_list)

        pre_nms_topN = 6000
        post_nms_topN = 100
        nms_thresh = 0.699999988079
        min_size = 16
        #  box_encodings, inputs[1] (1, 38, 63, 48))
        # bbox_deltas:', (28728, 4)
        # clip_window:', array([   0,    0,  600, 1002]))

        # tf clip_to_window
        print("============== clip_to_window ===================")

        proposals = bbox_transform_inv_tf(anchors, bbox_deltas)

        boxdecode = proposals

        clip_window = np.array([0, 0, height, width])
        print("clip_window:", clip_window)
        # ('proposals_clip  :', (1829, 4))
        # proposals_clip = clip_to_window(proposals, clip_window)

        boxdecode = proposals

        im_info = np.array([height, width, 0])

        proposals = proposals[:, (1, 0, 3, 2)]

        proposals = clip_boxes(proposals, im_info[:2])
        print("proposals_clip clip_to_window :", proposals.shape)
        print("proposals_clip clip_to_window[0] :", proposals[0])
        # array([ 0.     ,  0.     , 37.36838, 15.30636], dtype=float32))
        # ', array([ 0.     ,  0.     , 15.30636, 37.     ], dtype=float32))

        # anchors[0]:', array([ 0.      ,  0.      , 45.254834, 22.627417]))

        # proposals1 0:', array([11.60263 ,  3.129001, 41.311607, 18.966888], dtype=float32))
        # ('proposals1 1:', array([  0.22217222,   1.6537127 , 100.95798   ,  44.21667   ],
        #       dtype=float32))

        # im_info:', array([38, 63,  0]))
        # proposals = clip_boxes(proposals, im_info[:2])

        print("proposals clip_boxes :", proposals.shape)
        print("proposals clip_boxes [0]:", proposals[0])

        # im_info:[:2]', array([38, 63]

        print("im_info:[:2]", im_info[:2])
        keep = self._filter_boxes(proposals, min_size * im_info[2])

        proposals = proposals[keep, :]
        print("proposals3:", proposals.shape)

        # 'scores.shape1', (28728, 1

        print("scores.shape1", scores.shape)
        scores = scores[keep]
        print("scores.shape2", scores.shape)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]

        # proposals4:', (6000, 4))
        print("proposals4 pre:", proposals.shape)
        scores = scores[order]
        # TODO nms 方法是否重写
        keep = nms(np.hstack((proposals, scores)), nms_thresh)

        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        # ('proposals5:', (100, 4))
        print("proposals final:", proposals.shape)
        return proposals, boxdecode, anchors
Beispiel #27
0
def im_detect_array(net, imgs):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(imgs)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes, return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']

        images = []
        for i in xrange(blobs['data'].shape[0]):
            images.append(np.array(
                [[im_blob.shape[2], im_blob.shape[3], im_scales[i]]],
                dtype=np.float32))

        blobs['im_info'] = np.array(images)


    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
    else:
        net.blobs['rois'].reshape(*(blobs['rois'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    if cfg.TEST.HAS_RPN:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False)
    else:
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)

    deviders = []
    boxes = []
    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == len(imgs), "Only one scale per image implemented"
        rois = net.blobs['rois'].data.copy()


        for idx in xrange(im_scales.shape[0] - 1):
            deviders.append(np.searchsorted(rois[:, 0], idx + 1, 'left'))

        boxes = np.split(rois[:, 1:5], deviders)

        # unscale back to raw image space
        for idx in xrange(len(imgs)):
            boxes[idx] = boxes[idx] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = np.split(net.blobs['cls_score'].data, deviders)

    else:
        # use softmax estimated probabilities
        scores = np.split(blobs_out['cls_prob'], deviders)

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = np.split(blobs_out['bbox_pred'], deviders)

        pred_boxes = []
        for idx in xrange(im_scales.shape[0]):
            pred_boxes.append(bbox_transform_inv(boxes[idx], box_deltas[idx]))
            pred_boxes[idx] = clip_boxes(pred_boxes[idx], imgs[idx].shape)

    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes
Beispiel #28
0
def im_detect(feature_net, embed_net, recurrent_net, im, boxes=None, use_box_at = -1):
    """Detect object classes in an image given object proposals.

    Arguments:
        feature_net (caffe.Net): CNN model for extracting features
        embed_net (caffe.Net): A word embedding layer
        recurrent_net (caffe.Net): Recurrent model for generating captions and locations
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)
        use_box_at (int32): Use predicted box at a given timestep, default to the last one (use_box_at=-1)
    Returns:
        scores (ndarray): R x 1 array of object class scores 
        pred_boxes (ndarray)): R x 4 array of predicted bounding boxes
        captions (list): length R list of list of word tokens (captions)
    """

    # for bbox unnormalization
    bbox_mean = np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS).reshape((1,4))
    bbox_stds = np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS).reshape((1,4))

    blobs, im_scales = _get_blobs(im, boxes)
    assert len(im_scales) == 1, "Only single-image batch implemented"
    im_blob = blobs['data']
    blobs['im_info'] = np.array(
        [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
        dtype=np.float32)

    # reshape network inputs
    feature_net.blobs['data'].reshape(*(blobs['data'].shape))
    feature_net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
    
    feature_net.forward(data = im_blob, im_info = blobs['im_info'])
    region_features = feature_net.blobs['region_features'].data.copy()
    rois = feature_net.blobs['rois'].data.copy()
    # detection scores
    scores = feature_net.blobs['cls_probs'].data[:,1].copy()
    # proposal boxes
    boxes = rois[:, 1:5] / im_scales[0]
    proposal_n = rois.shape[0]
    feat_args = {'input_features': region_features}
    opt_args = {}
    # global feature as an optional input: context
    if 'global_features' in feature_net.blobs and 'global_features' in recurrent_net.blobs:
        #changed according to the global feature shape
        opt_args['global_features'] = np.tile(feature_net.blobs['global_features'].data, (1,proposal_n,1)) 
    
    bbox_pred_direct = ('bbox_pred' in feature_net.blobs)

    if bbox_pred_direct:
        # do greedy search
        captions, _, logprobs = _greedy_search(embed_net, recurrent_net, feat_args, opt_args, proposal_n, pred_bbox = False)
        #bbox target unnormalization
        box_offsets = feature_net.blobs['bbox_pred'].data
    else:

        captions, box_offsets, logprobs = _greedy_search(embed_net, recurrent_net, feat_args, opt_args, proposal_n, \
            pred_bbox = True, use_box_at = use_box_at)

    #bbox target unnormalization
    box_deltas = box_offsets * bbox_stds + bbox_mean

    #do the transformation
    pred_boxes = bbox_transform_inv(boxes, box_deltas)
    pred_boxes = clip_boxes(pred_boxes, im.shape)
    
    return scores, pred_boxes, captions
    def forward(self, bottom, top):
        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
        # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
        all_rois = bottom[0].data
        aaa = all_rois[:]
        # GT boxes (x1, y1, x2, y2, label)
        # TODO(rbg): it's annoying that sometimes I have extra info before
        # and other times after box coordinates -- normalize to one format
        gt_boxes = bottom[1].data
        im = bottom[2].data
        # Include ground-truth boxes in the set of candidate rois
        zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
        all_rois = np.vstack(
            (all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
        )
        
        num_images = 1
        rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
        fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)

        rois, labels, bbox_targets, bbox_weights ,layer_indexs = _sample_rois(
            all_rois, gt_boxes, fg_rois_per_image,
            rois_per_image, self._num_classes,sample_type='fpn', k0 = 4)
        vis =False
        if vis:
            ind = np.where(labels!=0)[0]
            im_shape = im.shape
            means = np.tile(
                     np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (21, 1)).ravel()
            stds = np.tile(
                    np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (21, 1)).ravel()
            bbox_targets = bbox_targets*stds +means
            
            pred_boxes = bbox_transform_inv(rois[:,1:], bbox_targets)
            pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])
            l =labels[ind]
            ro = rois[ind,1:]
            b = bbox_targets[ind,:]
            p = pred_boxes[ind,:]*bbox_weights[ind,:]
            r = []
            for i in range(p.shape[0]):
                r.append(p[i,l[i]*4:l[i]*4+4])
            r_ =  np.vstack(r)

      #  Optionally normalize targets by a precomputed mean and stdev

            vis_all_detection(im, aaa[:,1:], l, 1)

        rois_ = np.zeros((self._batch_rois*4, 5), dtype=rois.dtype)
        labels_all = np.ones((self._batch_rois*4, ), dtype=labels.dtype)*-1
        bbox_targets_all = np.zeros((self._batch_rois*4, self._num_classes * 4), dtype=bbox_targets.dtype)
        bbox_weights_all = np.zeros((self._batch_rois*4, self._num_classes * 4), dtype=bbox_weights.dtype)
        rois_all =[]
        for i in range(4):
            index = (layer_indexs == (i + 2))
            num_index = sum(index)
           
            start = self._batch_rois*i
            end = start+num_index
            index_range = range(start, end)
            rois_[index_range, :] = rois[index, :]
            rois_all.append(rois_[range(start,start + self._batch_rois), :])
            labels_all[index_range] = labels[index]  
            bbox_targets_all[index_range,:] = bbox_targets[index, :]
            bbox_weights_all[index_range,:] = bbox_weights[index, :]


        rois_p2 = rois_all[0]
        rois_p3 = rois_all[1]
        rois_p4 = rois_all[2]
        rois_p5 = rois_all[3]    
  


        top[0].reshape(*rois_p2.shape)
        top[0].data[...] = rois_p2
    
        top[1].reshape(*rois_p3.shape)
        top[1].data[...] = rois_p3

        top[2].reshape(*rois_p4.shape)
        top[2].data[...] = rois_p4
        
        top[3].reshape(*rois_p5.shape)
        top[3].data[...] = rois_p5
        
        # classification labels
        top[4].reshape(*labels_all.shape)
        top[4].data[...] = labels_all

        # bbox_targets
        top[5].reshape(*bbox_targets_all.shape)
        top[5].data[...] = bbox_targets_all

        # bbox_inside_weights
        top[6].reshape(*bbox_weights_all.shape)
        top[6].data[...] = bbox_weights_all

        # bbox_outside_weights
        top[7].reshape(*bbox_weights_all.shape)
        top[7].data[...] = np.array(bbox_weights_all > 0).astype(np.float32)
    def __call__(self, x, bbox_deltas, im_info):
        if isinstance(bbox_deltas.data, chainer.cuda.ndarray):
            bbox_deltas = chainer.cuda.to_cpu(bbox_deltas.data)
        if isinstance(x.data, chainer.cuda.ndarray):
            x = chainer.cuda.to_cpu(x.data)

        assert x.shape[0] == 1, 'Only single item batches are supported'

        if self.train:
            pre_nms_topN = self.TRAIN_RPN_PRE_NMS_TOP_N
            post_nms_topN = self.TRAIN_RPN_POST_NMS_TOP_N
            nms_thresh = self.TRAIN_RPN_NMS_THRESH
            min_size = self.TRAIN_RPN_MIN_SIZE
        else:
            pre_nms_topN = self.TEST_RPN_PRE_NMS_TOP_N
            post_nms_topN = self.TEST_RPN_POST_NMS_TOP_N
            nms_thresh = self.TEST_RPN_NMS_THRESH
            min_size = self.TEST_RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = x[:, self.num_anchors:, :, :]

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self.feat_stride
        shift_y = np.arange(0, height) * self.feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self.num_anchors
        K = shifts.shape[0]
        anchors = self.anchors.reshape((1, A, 4)) + \
            shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack(
            (batch_inds, proposals.astype(np.float32, copy=False)))
        blob = chainer.cuda.cupy.asarray(blob, np.float32)
        rois = chainer.Variable(blob, volatile=not self.train)

        return rois
Beispiel #31
0
def im_detect(net, im, boxes=None,num_classes=21):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes, return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
    else:
        net.blobs['rois'].reshape(*(blobs['rois'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    if cfg.TEST.HAS_RPN:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False)
    else:
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)



    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        rois = net.blobs['rois'].data.copy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] 
        index= np.where(np.sum(boxes,axis=1)!=0)[0]
        boxes = boxes[index,:]
     
# / im_scales[0]
    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = net.blobs['cls_score'].data
    else:
        # use softmax estimated probabilities
        scores = blobs_out['cls_prob']
        scores = scores[index]

      #  print scores[0:10]
    
    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
    
        box_deltas = box_deltas[index,:]
     

        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            means = np.tile(
                    np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1)).ravel()
            stds = np.tile(
                    np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1)).ravel()
      #  Optionally normalize targets by a precomputed mean and stdev
            box_deltas = box_deltas * stds + means
        

      #  print boxes.shape,box_deltas.shape
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        s = (blobs['data'].astype(np.float32, copy=False).shape[2],blobs['data'].astype(np.float32, copy=False).shape[3],blobs['data'].astype(np.float32, copy=False).shape[1])
 
        pred_boxes = clip_boxes(pred_boxes, s)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    vis = False
    if vis:
        vis_rois_detection(blobs['data'].astype(np.float32, copy=False),pred_boxes/ im_scales[0])
  

    return scores, pred_boxes/ im_scales[0]
Beispiel #32
0
def proposal_layer_3d(rpn_cls_prob_reshape,
                      rpn_bbox_pred,
                      im_info,
                      calib,
                      cfg_key,
                      _feat_stride=[
                          8,
                      ],
                      anchor_scales=[1.0, 1.0]):
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)

    #layer_params = yaml.load(self.param_str_)

    _anchors = generate_anchors_bv()
    #  _anchors = generate_anchors(scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]

    #print "aaaaaaa",_anchors.shape (4,4)
    #print "bbbbbbb",im_info          (601,601,1)
    #print "ccccccc", calib.shape   (4,12)

    im_info = im_info[0]

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'

    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
    min_size = cfg[cfg_key].RPN_MIN_SIZE

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    # print rpn_cls_prob_reshape.shape

    height, width = rpn_cls_prob_reshape.shape[1:3]
    # scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :]
    scores = np.reshape(
        np.reshape(rpn_cls_prob_reshape,
                   [1, height, width, _num_anchors, 2])[:, :, :, :, 1],
        [1, height, width, _num_anchors])

    bbox_deltas = rpn_bbox_pred

    if DEBUG:
        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
        print 'scale: {}'.format(im_info[2])

    # 1. Generate proposals from bbox deltas and shifted anchors

    if DEBUG:
        print 'score map size: {}'.format(scores.shape)

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    # bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 6))
    bbox_deltas = bbox_deltas.reshape((-1, 6))

    # print "bbox_deltas",bbox_deltas.shape
    # print anchors.shape
    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    # scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
    scores = scores.reshape((-1, 1))

    # print np.sort(scores.ravel())[-30:]

    # convert anchors bv to anchors_3d
    anchors_3d = bv_anchor_to_lidar(anchors)
    # Convert anchors into proposals via bbox transformations
    proposals_3d = bbox_transform_inv_3d(anchors_3d, bbox_deltas)
    # convert back to lidar_bv
    proposals_bv = lidar_3d_to_bv(proposals_3d)

    lidar_corners = lidar_3d_to_corners(proposals_3d)
    proposals_img = lidar_cnr_to_img(lidar_corners, calib[3], calib[2],
                                     calib[0])

    if DEBUG:
        # print "bbox_deltas: ", bbox_deltas[:10]
        # print "proposals number: ", proposals_3d[:10]
        print "proposals_bv shape: ", proposals_bv.shape
        print "proposals_3d shape: ", proposals_3d.shape
        print "proposals_img shape:", proposals_img.shape

    # 2. clip predicted boxes to image
    proposals_bv = clip_boxes(proposals_bv, im_info[:2])

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals_bv, min_size * im_info[2])
    proposals_bv = proposals_bv[keep, :]
    proposals_3d = proposals_3d[keep, :]
    proposals_img = proposals_img[keep, :]
    scores = scores[keep]

    # TODO: pass real image_info
    keep = _filter_img_boxes(proposals_img, [375, 1242])
    proposals_bv = proposals_bv[keep, :]
    proposals_3d = proposals_3d[keep, :]
    proposals_img = proposals_img[keep, :]
    scores = scores[keep]

    if DEBUG:
        print "proposals after clip"
        print "proposals_bv shape: ", proposals_bv.shape
        print "proposals_3d shape: ", proposals_3d.shape
        print "proposals_img shape: ", proposals_img.shape
    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals_bv = proposals_bv[order, :]
    proposals_3d = proposals_3d[order, :]
    proposals_img = proposals_img[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals_bv, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals_bv = proposals_bv[keep, :]
    proposals_3d = proposals_3d[keep, :]
    proposals_img = proposals_img[keep, :]
    scores = scores[keep]

    if DEBUG:
        print "proposals after nms"
        print "proposals_bv shape: ", proposals_bv.shape
        print "proposals_3d shape: ", proposals_3d.shape

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals_bv.shape[0], 1), dtype=np.float32)
    blob_bv = np.hstack((batch_inds, proposals_bv.astype(np.float32,
                                                         copy=False)))
    blob_img = np.hstack(
        (batch_inds, proposals_img.astype(np.float32, copy=False)))
    blob_3d = np.hstack((batch_inds, proposals_3d.astype(np.float32,
                                                         copy=False)))

    if DEBUG:
        print "blob shape ====================:"
        print blob_bv.shape
        print blob_img.shape
        # print '3d', blob_3d[:10]
        # print lidar_corners[:10]
        # print 'bv', blob_bv[:10]
        # print 'img', blob_img[:10]

    return blob_bv, blob_img, blob_3d
Beispiel #33
0
    def forward_t(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        cfg_key = str(
            'TRAIN' if self.phase == 0 else 'TEST')  # either 'TRAIN' or 'TEST'
        # cfg_key = 'TRAIN'
        pre_nms_topN = cfg[cfg_key].Frozen_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].Frozen_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].Frozen_NMS_THRESH
        min_size = cfg[cfg_key].Frozen_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        # scores = bottom[0].data[:, 1].reshape(-1, 1)
        # bbox_deltas = bottom[1].data[:, 4:]
        im_info = bottom[6].data[0, :]
        # rois = bottom[3].data[:, 1:5]

        # RON
        rois = np.zeros((0, 4), dtype=np.float32)
        scores = np.zeros((0, 1), dtype=np.float32)  # 2 class
        RPN_NO_sum = len(cfg.MULTI_SCALE_RPN_NO)

        for rpn_no in range(RPN_NO_sum):
            rois = np.concatenate((rois, bottom[rpn_no].data[0]), axis=0)
            scores = np.concatenate(
                (scores, bottom[rpn_no + RPN_NO_sum].data[0]), axis=0)

        # reshape rois (-1, 4)
        if len(rois.shape) == 4:
            rois = rois.reshape(rois.shape[0], rois.shape[1])

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox deltas and shifted anchors

        # Convert anchors into proposals via bbox transformations
        # proposals = bbox_transform_inv(rois, bbox_deltas)
        proposals = rois

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals,
                             cfg.TEST.RON_MIN_SIZE)  # min_size * im_info[2]
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        if cfg[cfg_key].Frozen_NMS:
            nms_keep = nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                nms_keep = nms_keep[:post_nms_topN]
            proposals = proposals[nms_keep, :]
            scores = scores[nms_keep]

        # concat several groups of proposals from other rpn maps

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        # print blob.shape
        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores
Beispiel #34
0
def im_detect(net, im, feat_blob, boxes=None):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        feat_blob (str): name of the feature blob to be extracted
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
        features (ndarray): R x D array of features
    """
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes, return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
    else:
        net.blobs['rois'].reshape(*(blobs['rois'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    if cfg.TEST.HAS_RPN:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False)
    else:
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        rois = net.blobs['rois'].data.copy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = net.blobs['cls_score'].data
    else:
        # the last column of the pid_prob is the non-person box score
        scores = blobs_out['pid_prob'][:, -1]
        scores = scores[:, np.newaxis]
        scores = np.hstack([scores, 1. - scores])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    features = net.blobs[feat_blob].data.copy()

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]
        features = features[inv_index, :]

    return scores, pred_boxes, features
def _im_detect(net, im, roidb, blob_names=None):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        roidb (an roidb item): to provide gt_boxes if necessary
        blob_names (list of str): list of feature blob names to be extracted

    Returns:
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        features (dict of ndarray): {blob name: R x D array of features}
    """
    im_blob, im_scales = get_image_blob(im)
    assert len(im_scales) == 1, "Only single-image batch implemented"

    blobs = {
        'data': im_blob,
        'im_info': np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32),
    }

    if 'gt_boxes' in net.blobs:
        # Supply gt_boxes as input. Used to get pid_labels for proposals.
        blobs['gt_boxes'] = get_gt_boxes_blob(
            roidb['boxes'], roidb['gt_classes'], roidb['gt_pids'], im_scales)

    # reshape network inputs
    for k, v in blobs.iteritems():
        net.blobs[k].reshape(*(v.shape))

    # do forward
    forward_kwargs = {k: v.astype(np.float32, copy=False)
                      for k, v in blobs.iteritems()}
    blobs_out = net.forward(**forward_kwargs)

    # unscale rois back to raw image space
    rois = net.blobs['rois'].data.copy()
    boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = net.blobs['cls_score'].data
    else:
        # the first column of the pid_prob is the non-person box score
        scores = blobs_out['pid_prob'][:, 0]
        scores = scores[:, np.newaxis]
        scores = np.hstack([scores, 1. - scores])

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
        # As we no longer scale and shift the bbox_pred weights when snapshot,
        # we need to manually do this during test.
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS and \
                cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            num_classes = box_deltas.shape[1] // 4
            stds = np.tile(cfg.TRAIN.BBOX_NORMALIZE_STDS, num_classes)
            means = np.tile(cfg.TRAIN.BBOX_NORMALIZE_MEANS, num_classes)
            box_deltas = box_deltas * stds + means
        boxes = bbox_transform_inv(boxes, box_deltas)
        boxes = clip_boxes(boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        boxes = np.tile(boxes, (1, scores.shape[1]))

    features = {blob: net.blobs[blob].data.copy() for blob in blob_names} \
               if blob_names is not None else {}

    return boxes, scores, features
Beispiel #36
0
def im_detect(net,
              im,
              boxes=None,
              extra_boxes=np.zeros((0, 4), dtype=np.float32),
              dc_boxes=np.zeros((0, 4), dtype=np.float32)):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im, boxes)

    if extra_boxes.shape[0] > 0:
        assert cfg.TEST.EXTERNAL_ROIS == True, "To use external proposals, the proper \
                                                configuration parameter must be set"

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        assert cfg.TEST.EXTERNAL_ROIS == False
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
        if cfg.TEST.EXTERNAL_ROIS:
            net.blobs['extra_rois'].reshape(*(extra_boxes.shape))
            sc_extra_boxes, _ = _project_im_rois(extra_boxes, im_scales)
            net.blobs['dc_rois'].reshape(*(dc_boxes.shape))
            sc_dc_boxes, _ = _project_im_rois(dc_boxes, im_scales)
    else:
        assert cfg.TEST.EXTERNAL_ROIS == False
        net.blobs['rois'].reshape(*(blobs['rois'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    if cfg.TEST.HAS_RPN:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32,
                                                            copy=False)
        if cfg.TEST.EXTERNAL_ROIS:
            forward_kwargs['extra_rois'] = sc_extra_boxes
            forward_kwargs['dc_rois'] = sc_dc_boxes
    else:
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        rois = net.blobs['rois'].data.copy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = net.blobs['cls_score'].data
    else:
        # use softmax estimated probabilities
        scores = blobs_out['cls_prob']

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.VIEWPOINTS:
        try:
            viewpoints = blobs_out['viewpoint_pred']
        except KeyError, e:
            viewpoints = blobs_out['viewpoints_pd']
        except:
Beispiel #37
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'

        cfg_key = str(self.phase)  # either 'TRAIN' or 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        # scores = bottom[0].data[:, self._num_anchors:, :, :]
        # bbox_deltas = bottom[1].data
        im_info = bottom[0].data[0, :]
        cls_prob_dict = {
            'stride32': bottom[8].data,
            'stride16': bottom[7].data,
            'stride8': bottom[6].data,
            'stride4': bottom[5].data,
        }
        bbox_pred_dict = {
            'stride32': bottom[4].data,
            'stride16': bottom[3].data,
            'stride8': bottom[2].data,
            'stride4': bottom[1].data,
        }

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox deltas and shifted anchors
        proposal_list = []
        score_list = []
        for s in self._feat_stride:
            stride = int(s)
            sub_anchors = generate_anchors(base_size=stride,
                                           scales=np.array(self._scales),
                                           ratios=self._ratios)

            scores = cls_prob_dict['stride' + str(s)][:,
                                                      self._num_anchors:, :, :]
            bbox_deltas = bbox_pred_dict['stride' + str(s)]

            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes
            #height, width = int(im_info[0] / stride), int(im_info[1] / stride)
            height, width = scores.shape[-2:]

            if DEBUG:
                print 'score map size: {}'.format(scores.shape)

            # Enumerate all shifts
            shift_x = np.arange(0, width) * stride
            shift_y = np.arange(0, height) * stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            A = self._num_anchors
            K = shifts.shape[0]
            anchors = sub_anchors.reshape((1, A, 4)) + \
                    shifts.reshape((1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))

            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

            # Same story for the scores:
            #
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

            # Convert anchors into proposals via bbox transformations
            proposals = bbox_transform_inv(anchors, bbox_deltas)

            # 2. clip predicted boxes to image
            proposals = clip_boxes(proposals, im_info[:2])

            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])
            keep = _filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep, :]
            scores = scores[keep]

            proposal_list.append(proposals)
            score_list.append(scores)

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        proposals = np.vstack(proposal_list)
        scores = np.vstack(score_list)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]
        #print "keep len is ", len(keep)

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        w = (proposals[:, 2] - proposals[:, 0])
        h = (proposals[:, 3] - proposals[:, 1])
        s = w * h
        s[s <= 0] = 1e-6
        # layer_index = np.floor(k0+np.log2(np.sqrt(s)/224))
        image_area = im_info[0] * im_info[1]
        alpha = np.sqrt(h * w) / (224.0 / np.sqrt(image_area))
        layer_index_ = np.log(alpha) / np.log(2.0)

        layer_index = []
        for i in layer_index_:
            layer_index.append(
                np.min([5, np.max([2, 4 + np.round(i).astype(np.int32)])]))

        layer_index[layer_index < 2] = 2
        layer_index[layer_index > 5] = 5
        layer_indexs = np.array(layer_index)

        rois_layers = []

        for i in xrange(4):
            index = (layer_indexs == (i + 2))
            if np.any(index) == False:
                rois_layers.append(np.array([]))
            else:
                rois_layers.append(proposals[index, :])

        for i in xrange(4):
            if len(rois_layers[i]) == 0:
                index = i
                if index - 1 >= 0 and rois_layers[index - 1].shape[0] > 1:
                    len_rois_layers = rois_layers[index - 1].shape[0]
                    rois_layers[i] = rois_layers[index - 1][len_rois_layers -
                                                            1, :].reshape(
                                                                1, 4)
                    rois_layers[index - 1] = rois_layers[index -
                                                         1][0:len_rois_layers -
                                                            1, :]
                elif index + 1 < 4 and rois_layers[index + 1].shape[0] > 1:
                    rois_layers[i] = rois_layers[index + 1][0, :].reshape(1, 4)
                    rois_layers[index + 1] = rois_layers[index + 1][1:, :]
                elif index - 2 >= 0 and rois_layers[index - 2].shape[0] > 1:
                    len_rois_layers = rois_layers[index - 2].shape[0]
                    # print len_rois_layers,'eeeeeeeeeeeee',index
                    rois_layers[i] = rois_layers[index - 1][0, :].reshape(1, 4)
                    rois_layers[index - 1] = rois_layers[index -
                                                         2][len_rois_layers -
                                                            1, :].reshape(
                                                                1, 4)
                    # rois_layers[i]=rois_layers[index-2][0,:].reshape(1,5)
                    rois_layers[index - 2] = rois_layers[index -
                                                         2][0:len_rois_layers -
                                                            1, :]
                elif index + 2 < 4 and rois_layers[index + 2].shape[0] > 1:
                    # print rois_layers[index+1]
                    # print rois_layers[index+1][0,:]
                    # print rois_layers[index+2][0,:]
                    # print rois_layers[index+2]
                    if rois_layers[index + 1].shape[0] == 0:
                        rois_layers[i + 1] = rois_layers[index +
                                                         2][1, :].reshape(
                                                             1, 4)
                        rois_layers[i] = rois_layers[index + 2][0, :].reshape(
                            1, 4)
                        rois_layers[index + 2] = rois_layers[index + 2][2:, :]
                    else:
                        rois_layers[i] = rois_layers[index + 1][0, :].reshape(
                            1, 4)
                        rois_layers[i + 1] = rois_layers[index +
                                                         2][0, :].reshape(
                                                             1, 4)
                        rois_layers[index + 2] = rois_layers[index + 2][1:, :]
                elif index - 3 >= 0 and rois_layers[index - 3].shape[0] > 1:
                    len_rois_layers = rois_layers[index - 3].shape[0]
                    # print len_rois_layers,'ddddddddddddd',index
                    rois_layers[i] = rois_layers[index - 1][0, :].reshape(1, 4)
                    rois_layers[index - 1] = rois_layers[index -
                                                         2][0, :].reshape(
                                                             1, 4)
                    rois_layers[index - 2] = rois_layers[index -
                                                         3][len_rois_layers -
                                                            1, :]
                    # rois_layers[i]=rois_layers[index-2][0,:].reshape(1,5)
                    rois_layers[index - 3] = rois_layers[index -
                                                         3][0:len_rois_layers -
                                                            1, :]
                elif index + 3 < 4 and rois_layers[index + 3].shape[0] > 1:
                    len_rois_layers = rois_layers[index + 3].shape[0]
                    rois_layers[i] = rois_layers[index + 1][0, :].reshape(1, 4)
                    rois_layers[index + 1] = rois_layers[index +
                                                         2][0, :].reshape(
                                                             1, 4)
                    rois_layers[index + 2] = rois_layers[index +
                                                         3][0, :].reshape(
                                                             1, 4)
                    # rois_layers[i]=rois_layers[index-2][0,:].reshape(1,5)
                    rois_layers[index + 3] = rois_layers[index + 3][1:, :]

        # [Optional] output scores blob
        # if len(top) > 1:
        #     top[1].reshape(*(scores.shape))
        #     top[1].data[...] = scores
        rpn_rois = np.zeros((proposals.shape[0], proposals.shape[1]),
                            dtype=np.float32)
        count = 0
        for i in xrange(4):
            batch_inds_i = np.zeros((rois_layers[i].shape[0], 1),
                                    dtype=np.float32)
            blob_i = np.hstack(
                (batch_inds_i, rois_layers[i].astype(np.float32, copy=False)))
            top[i].reshape(*(blob_i.shape))
            top[i].data[...] = blob_i
            rpn_rois[count:rois_layers[i].shape[0] + count, :] = rois_layers[i]
            count += rois_layers[i].shape[0]

        batch_inds = np.zeros((rpn_rois.shape[0], 1), dtype=np.float32)
        blob_rpn_rois = np.hstack(
            (batch_inds, rpn_rois.astype(np.float32, copy=False)))
        top[4].reshape(*(blob_rpn_rois.shape))
        top[4].data[...] = blob_rpn_rois
Beispiel #38
0
def im_detect(net, im, boxes=None, num_classes=21):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
    else:
        net.blobs['rois'].reshape(*(blobs['rois'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    if cfg.TEST.HAS_RPN:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32,
                                                            copy=False)
    else:
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        rois = net.blobs['rois'].data.copy()
        # unscale back to raw image space
        boxes = rois[:, 1:5]
        index = np.where(np.sum(boxes, axis=1) != 0)[0]
        boxes = boxes[index, :]


# / im_scales[0]
    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = net.blobs['cls_score'].data
    else:
        # use softmax estimated probabilities
        scores = blobs_out['cls_prob']
        scores = scores[index]

    #  print scores[0:10]

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']

        box_deltas = box_deltas[index, :]

        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS),
                            (num_classes, 1)).ravel()
            stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS),
                           (num_classes, 1)).ravel()
            #  Optionally normalize targets by a precomputed mean and stdev
            box_deltas = box_deltas * stds + means

    #  print boxes.shape,box_deltas.shape
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        s = (blobs['data'].astype(np.float32, copy=False).shape[2],
             blobs['data'].astype(np.float32, copy=False).shape[3],
             blobs['data'].astype(np.float32, copy=False).shape[1])

        pred_boxes = clip_boxes(pred_boxes, s)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    vis = False
    if vis:
        vis_rois_detection(blobs['data'].astype(np.float32, copy=False),
                           pred_boxes / im_scales[0])

    return scores, pred_boxes / im_scales[0]
def im_detect(net, im, boxes=None):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im, boxes)
    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    # if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
    # v = np.array([1, 1e3, 1e6, 1e9, 1e12])
    # hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
    # _, index, inv_index = np.unique(hashes, return_index=True,
    # return_inverse=True)
    # blobs['rois'] = blobs['rois'][index, :]
    # boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
    else:
        net.blobs['rois'].reshape(*(blobs['rois'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    if cfg.TEST.HAS_RPN:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32,
                                                            copy=False)
    else:
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        rois = net.blobs['rois'].data.copy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = net.blobs['cls_score'].data
    else:
        # use softmax estimated probabilities
        scores = blobs_out['cls_prob']

    # if cfg.TEST.BBOX_REG:
    if False:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    # if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
    # # Map scores and predictions back to the original set of boxes
    # scores = scores[inv_index, :]
    # pred_boxes = pred_boxes[inv_index, :]
    fc7 = net.blobs['fc7'].data
    return net.blobs['cls_score'].data[:, :], scores, fc7, pred_boxes
Beispiel #40
0
def test_net(net, imdb, vis = 0):
    """Test RON network on an image database."""
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, net)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # timers
    _t = {'im_detect' : Timer(), 'misc' : Timer()}
    for i in xrange(0, num_images, cfg.TEST.BATCH_SIZE): 
        _t['misc'].tic()
        ims = []
        for im_i in xrange(cfg.TEST.BATCH_SIZE):
            im = cv2.imread(imdb.image_path_at(i+im_i))
            ims.append(im)
        _t['im_detect'].tic()
        batch_scores, batch_boxes = im_detect(net,ims)
        _t['im_detect'].toc()

        for im_i in xrange(cfg.TEST.BATCH_SIZE):
            im = ims[im_i]
            scores = batch_scores[im_i]
            boxes = batch_boxes[im_i]

            # filter boxes according to prob scores
            keeps = np.where(scores[:,0] > cfg.TEST.PROB)[0]
            scores = scores[keeps, :]
            boxes = boxes[keeps, :]

            # change boxes according to input size and the original image size
            im_shape = im.shape[0:2]
            im_scales = float(cfg.TEST.SCALES[0]) / np.array(im_shape)

            boxes[:, 0::2] =  boxes[:, 0::2] / im_scales[1]
            boxes[:, 1::2] =  boxes[:, 1::2] / im_scales[0]

            # filter boxes with small sizes
            boxes = clip_boxes(boxes, im_shape)
            keep = filter_boxes(boxes, cfg.TEST.RON_MIN_SIZE )
            scores = scores[keep,:]
            boxes = boxes[keep, :]

            scores = np.tile(scores[:, 0], (imdb.num_classes, 1)).transpose() * scores

            for j in xrange(1, imdb.num_classes):
                inds = np.where(scores[:, j] > cfg.TEST.DET_MIN_PROB)[0]
                cls_scores = scores[inds, j]
                cls_boxes = boxes[inds, :]
                cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)

                keep = nms(cls_dets, cfg.TEST.NMS)
                cls_dets = cls_dets[keep, :]
                if len(keep) > cfg.TEST.BOXES_PER_CLASS:
                    cls_dets = cls_dets[:cfg.TEST.BOXES_PER_CLASS,:]
                all_boxes[j][i+im_i] = cls_dets
            
                if vis:
                    vis_detections(im, imdb.classes[j], cls_dets)
            _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time)

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Evaluating detections'
    imdb.evaluate_detections(all_boxes, output_dir)
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'

        cfg_key = self.phase # either 'TRAIN' or 'TEST'
        if cfg_key == 0:
          cfg_ = cfg.TRAIN
        else:
          cfg_ = cfg.TEST
        pre_nms_topN  = cfg_.RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg_.RPN_POST_NMS_TOP_N
        nms_thresh    = cfg_.RPN_NMS_THRESH
        min_size      = cfg_.RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data[0, :]

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores
Beispiel #42
0
    def forward(self, bottom, top):
        cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
        min_size = self._min_sizes
        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
    
        im_info = bottom[0].data[0, :]
        batch_size = bottom[1].data.shape[0]
        if batch_size > 1:
            raise ValueError("Sorry, multiple images each device is not implemented")

        cls_prob_dict = {
            'stride64': bottom[10].data,
            'stride32': bottom[9].data,
            'stride16': bottom[8].data,
            'stride8': bottom[7].data,
            'stride4': bottom[6].data,
        }
        bbox_pred_dict = {
            'stride64': bottom[5].data,
            'stride32': bottom[4].data,
            'stride16': bottom[3].data,
            'stride8': bottom[2].data,
            'stride4': bottom[1].data,
        }
      
        proposal_list = []
        score_list = []
        for s in self._feat_stride:
            stride = int(s)
            sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios)
    
            scores = cls_prob_dict['stride' + str(s)][:, self._num_anchors:, :, :]
            bbox_deltas = bbox_pred_dict['stride' + str(s)]
          
            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes
            height, width = int(im_info[0] / stride), int(im_info[1] / stride)

            # Enumerate all shifts
            shift_x = np.arange(0, width) * stride
            shift_y = np.arange(0, height) * stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            A = self._num_anchors
            K = shifts.shape[0]
    
            anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))

            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order
            bbox_deltas = _clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

            # Same story for the scores:
            #
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            scores = _clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

            # Convert anchors into proposals via bbox transformations
            proposals = bbox_transform_inv(anchors, bbox_deltas)

            # 2. clip predicted boxes to image
            proposals = clip_boxes(proposals, im_info[:2])

            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])
            keep = _filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep, :]
            scores = scores[keep]

            proposal_list.append(proposals)
            score_list.append(scores)

        proposals = np.vstack(proposal_list)
        scores = np.vstack(score_list)

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det,nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))

        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            try:
                pad = npr.choice(keep, size=post_nms_topN - len(keep))
            except:
                proposals = np.zeros((post_nms_topN, 4), dtype=np.float32)
                proposals[:,2] = 16
                proposals[:,3] = 16
                batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
                blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
                top[0].reshape(*(blob.shape))
                top[0].data[...] = blob
                return      
            keep = np.hstack((keep, pad))
           
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        # if is_train:
    
        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'

        cfg_key = str(self.phase)  # either 'TRAIN' or 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data[0, :]

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size)
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores
Beispiel #44
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
        min_size      = cfg[cfg_key].RPN_MIN_SIZE

        self._num_anchors = bottom[0].shape[0] / 2
        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[1].data[:, 1:, :, :]
        sio.savemat('scores',{'scores':scores})
        bbox_deltas = bottom[2].data
        im_info = bottom[3].data[0, :]
        anchors = bottom[0].data
        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas[0, :, :, 0]
        bbox_deltas = bbox_deltas.reshape((-1, 4))
        #bbox_deltas = bbox_deltas.transpose(1, 0)

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores