Beispiel #1
0
import copy

import mxnet as mx
import pprint
from common.vis.vis_im import draw_bbox, show_text, draw_points
from common.processing.image_roi import  get_roi_images
from common.processing.bbox_transform import bbox_pred, clip_boxes
from common.processing.generate_anchor import generate_base_anchors
from common.processing.image import resize, transform
from common.predict import MutablePredictor
from common.processing.nms import py_nms_wrapper

#from rcnn_symbol_fpn import get_rcnn_symbol
from rcnn.rcnn_symbol_fpn_corr import get_rcnn_symbol

nms = py_nms_wrapper(0.3)

class RCNNPredictor(MutablePredictor):
    def __init__(self, network, output_folder, epoch, ctx, config, has_json_symbol=False, fpn=False, step='rcnn'):
        symbol = get_rcnn_symbol(network, config.proposal_type, config.num_classes, config.num_anchors, config)
        self.proposal_type = config.proposal_type
        self.feat_sym = []
        if fpn:
            for i in range(len(config.RPN_FEAT_STRIDE)):
                inter_feat_sym = symbol.get_internals()["rpn_cls_score_p%d_output"%(i+2)]
                (self.feat_sym).append(inter_feat_sym)
             
        if self.proposal_type == 'rpn':
            prefix = '{0}/model/{0}'.format(output_folder)
            input_shapes = [('data', (1, config.input_channel,
                            config.target_size, config.max_size)), ('im_info', (1, 3)), ('feat_shape', (1, len(config.RPN_FEAT_STRIDE), 4))]
Beispiel #2
0
def pred_eval(predictor, test_data, imdb, vis=False, thresh=1e-3):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffle
    :param imdb: image database
    :param vis: controls visualization
    :param thresh: valid detection threshold
    :return:
    """
    assert vis or not test_data.shuffle
    data_names = [k[0] for k in test_data.provide_data]

    nms = py_nms_wrapper(config.TEST.NMS)

    # limit detections to max_per_image over all classes
    max_per_image = -1

    num_images = imdb.num_images
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    i = 0
    t = time.time()
    for im_info, data_batch in test_data:
        t1 = time.time() - t
        t = time.time()

        scale = im_info[0, 2]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scale)

        t2 = time.time() - t
        t = time.time()

        for j in range(1, imdb.num_classes):
            indexes = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[indexes, j, np.newaxis]
            cls_boxes = boxes[indexes, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            all_boxes[j][i] = cls_dets[keep, :]

        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        if vis:
            boxes_this_image = [[]] + [
                all_boxes[j][i] for j in range(1, imdb.num_classes)
            ]
            vis_all_detection(data_dict['data'].asnumpy(), boxes_this_image,
                              imdb.classes, scale)

        t3 = time.time() - t
        t = time.time()
        print('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(
            i, imdb.num_images, t1, t2, t3))
        i += 1

    det_file = os.path.join(imdb.cache_path, imdb.name + '_detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL)

    imdb.evaluate_detections(all_boxes)
    def forward(self, is_train, req, in_data, out_data, aux):
        #nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id)
        nms = py_nms_wrapper(self._threshold)
        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError(
                "Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        # the first set of anchors are background probabilities
        # keep the second part
        scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :]
        bbox_deltas = in_data[1].asnumpy()

        im_info = in_data[2].asnumpy()[0, :]

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])
        # feat_shape = in_data[3].asnumpy()
        # # 1. Generate proposals from bbox_deltas and shifted anchors
        # # use real image size instead of padded feature map sizes
        # height = feat_shape[0,i,2]
        # width = feat_shape[0,i,3]
        height, width = int(im_info[0] / self._feat_stride), int(
            im_info[1] / self._feat_stride)

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)
            print "resudial: {}".format(
                (scores.shape[2] - height, scores.shape[3] - width))

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order

        bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = self._clip_pad(scores, (height, width))
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_pred(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = self._filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det)

        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))

        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        self.assign(out_data[0], req[0], blob)

        if self._output_score:
            self.assign(out_data[1], req[1],
                        scores.astype(np.float32, copy=False))