Esempio n. 1
0
def demo_net(detector, image_name):
    """
    wrapper for detector
    :param detector: Detector
    :param image_name: image name
    :return: None
    """
    # load demo data
    im = cv2.imread(image_name + '.jpg')
    im_array, im_scale = resize(im, config.TEST.SCALES[0], config.TRAIN.MAX_SIZE)
    im_array = transform(im_array, config.PIXEL_MEANS)
    roi_array = sio.loadmat(image_name + '_boxes.mat')['boxes']
    batch_index_array = np.zeros((roi_array.shape[0], 1))
    projected_rois = roi_array * im_scale
    roi_array = np.hstack((batch_index_array, projected_rois))

    scores, boxes = detector.im_detect(im_array, roi_array)

    all_boxes = [[] for _ in CLASSES]
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls in CLASSES:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        keep = np.where(cls_scores >= CONF_THRESH)[0]
        cls_boxes = cls_boxes[keep, :]
        cls_scores = cls_scores[keep]
        dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        all_boxes[cls_ind] = dets[keep, :]

    boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))]
    vis_all_detection(im_array, boxes_this_image, CLASSES, 0)
Esempio n. 2
0
def demo_net(detector, image_name):
    """
    wrapper for detector
    :param detector: Detector
    :param image_name: image name
    :return: None
    """
    config.TEST.HAS_RPN = True
    assert os.path.exists(image_name), image_name + ' not found'
    im = cv2.imread(image_name)
    im_array, im_scale = resize(im, config.SCALES[0], config.MAX_SIZE)
    im_array = transform(im_array, config.PIXEL_MEANS)
    im_info = np.array([[im_array.shape[2], im_array.shape[3], im_scale]], dtype=np.float32)

    scores, boxes = detector.im_detect(im_array, im_info)

    all_boxes = [[] for _ in CLASSES]
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls in CLASSES:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        keep = np.where(cls_scores >= CONF_THRESH)[0]
        cls_boxes = cls_boxes[keep, :]
        cls_scores = cls_scores[keep]
        dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets.astype(np.float32), NMS_THRESH)
        all_boxes[cls_ind] = dets[keep, :]

    boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))]
    vis_all_detection(im_array, boxes_this_image, CLASSES, 0)
Esempio n. 3
0
def demo_net(detector, image_name):
    """
    wrapper for detector
    :param detector: Detector
    :param image_name: image name
    :return: None
    """
    config.TEST.HAS_RPN = True
    assert os.path.exists(image_name), image_name + ' not found'
    im = cv2.imread(image_name)
    im_array, im_scale = resize(im, config.SCALES[0], config.MAX_SIZE)
    im_array = transform(im_array, config.PIXEL_MEANS)
    im_info = np.array([[im_array.shape[2], im_array.shape[3], im_scale]],
                       dtype=np.float32)

    scores, boxes = detector.im_detect(im_array, im_info)

    all_boxes = [[] for _ in CLASSES]
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls in CLASSES:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        keep = np.where(cls_scores >= CONF_THRESH)[0]
        cls_boxes = cls_boxes[keep, :]
        cls_scores = cls_scores[keep]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets.astype(np.float32), NMS_THRESH)
        all_boxes[cls_ind] = dets[keep, :]

    boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))]
    vis_all_detection(im_array, boxes_this_image, CLASSES, 0)
Esempio n. 4
0
def main():
    color = cv2.imread(args.img)  # read image in b,g,r order
    img, scale = resize(color.copy(), 640, 1024)
    im_info = np.array([[img.shape[0], img.shape[1], scale]],
                       dtype=np.float32)  # (h, w, scale)
    img = np.swapaxes(img, 0, 2)
    img = np.swapaxes(img, 1, 2)  # change to r,g,b order
    img = img[np.newaxis, :]  # extend to (n, c, h, w)

    ctx = mx.gpu(args.gpu)
    _, arg_params, aux_params = mx.model.load_checkpoint(
        args.prefix, args.epoch)
    arg_params, aux_params = ch_dev(arg_params, aux_params, ctx)
    if 'resnet' in args.prefix:
        sym = resnet_50(num_class=2,
                        bn_mom=0.99,
                        bn_global=True,
                        is_train=False)
    else:
        sym = get_vgg_test(num_classes=2)
    arg_params["data"] = mx.nd.array(img, ctx)
    arg_params["im_info"] = mx.nd.array(im_info, ctx)

    exe = sym.bind(ctx,
                   arg_params,
                   args_grad=None,
                   grad_req="null",
                   aux_states=aux_params)
    exe.forward(is_train=False)
    output_dict = {
        name: nd
        for name, nd in zip(sym.list_outputs(), exe.outputs)
    }
    rois = output_dict['rpn_rois_output'].asnumpy(
    )[:, 1:]  # first column is index
    scores = output_dict['cls_prob_reshape_output'].asnumpy()[0]
    bbox_deltas = output_dict['bbox_pred_reshape_output'].asnumpy()[0]
    pred_boxes = bbox_pred(rois, bbox_deltas)
    pred_boxes = clip_boxes(pred_boxes, (im_info[0][0], im_info[0][1]))

    cls_boxes = pred_boxes[:, 4:8]
    cls_scores = scores[:, 1]
    keep = np.where(cls_scores >= args.thresh)[0]
    cls_boxes = cls_boxes[keep, :]
    cls_scores = cls_scores[keep]
    dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
    keep = nms(dets.astype(np.float32), args.nms_thresh)
    dets = dets[keep, :]
    keep = nest(dets, thresh=args.nest_thresh)
    dets = dets[keep, :]

    for i in range(dets.shape[0]):
        bbox = dets[i, :4]
        cv2.rectangle(
            color, (int(round(bbox[0] / scale)), int(round(bbox[1] / scale))),
            (int(round(bbox[2] / scale)), int(round(bbox[3] / scale))),
            (0, 255, 0), 2)
    cv2.imwrite("result.jpg", color)
Esempio n. 5
0
def main():
    color = cv2.imread(args.img)  # read image in b,g,r order
    img, scale = resize(color.copy(), 640, 1024)
    im_info = np.array([[img.shape[0], img.shape[1], scale]], dtype=np.float32)  # (h, w, scale)
    img = np.swapaxes(img, 0, 2)
    img = np.swapaxes(img, 1, 2)  # change to r,g,b order
    img = img[np.newaxis, :]  # extend to (n, c, h, w)

    ctx = mx.gpu(args.gpu)
    _, arg_params, aux_params = mx.model.load_checkpoint(args.prefix, args.epoch)
    arg_params, aux_params = ch_dev(arg_params, aux_params, ctx)
    if 'resnet' in args.prefix:
        sym = resnet_50(num_class=2, bn_mom=0.99, bn_global=True, is_train=False)
    else:
        sym = get_vgg_test(num_classes=2)
    arg_params["data"] = mx.nd.array(img, ctx)
    arg_params["im_info"] = mx.nd.array(im_info, ctx)

    exe = sym.bind(ctx, arg_params, args_grad=None, grad_req="null", aux_states=aux_params)
    exe.forward(is_train=False)
    output_dict = {name: nd for name, nd in zip(sym.list_outputs(), exe.outputs)}
    rois = output_dict['rpn_rois_output'].asnumpy()[:, 1:]  # first column is index
    scores = output_dict['cls_prob_reshape_output'].asnumpy()[0]
    bbox_deltas = output_dict['bbox_pred_reshape_output'].asnumpy()[0]
    pred_boxes = bbox_pred(rois, bbox_deltas)
    pred_boxes = clip_boxes(pred_boxes, (im_info[0][0], im_info[0][1]))

    cls_boxes = pred_boxes[:, 4:8]
    cls_scores = scores[:, 1]
    keep = np.where(cls_scores >= args.thresh)[0]
    cls_boxes = cls_boxes[keep, :]
    cls_scores = cls_scores[keep]
    dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
    keep = nms(dets.astype(np.float32), args.nms_thresh)
    dets = dets[keep, :]
    keep = nest(dets, thresh=args.nest_thresh)
    dets = dets[keep, :]

    for i in range(dets.shape[0]):
        bbox = dets[i, :4]
        cv2.rectangle(color, (int(round(bbox[0]/scale)), int(round(bbox[1]/scale))),
                      (int(round(bbox[2]/scale)), int(round(bbox[3]/scale))),  (0, 255, 0), 2)
    cv2.imwrite("result.jpg", color)
Esempio n. 6
0
def demo_net(detector, image_name):
    """
    wrapper for detector
    :param detector: Detector
    :param image_name: image name
    :return: None
    """
    # load demo data
    im = cv2.imread(image_name + '.jpg')
    im_array, im_scale = resize(im, config.TEST.SCALES[0],
                                config.TRAIN.MAX_SIZE)
    im_array = transform(im_array, config.PIXEL_MEANS)
    roi_array = sio.loadmat(image_name + '_boxes.mat')['boxes']
    batch_index_array = np.zeros((roi_array.shape[0], 1))
    projected_rois = roi_array * im_scale
    roi_array = np.hstack((batch_index_array, projected_rois))

    scores, boxes = detector.im_detect(im_array, roi_array)

    all_boxes = [[] for _ in CLASSES]
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls in CLASSES:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        keep = np.where(cls_scores >= CONF_THRESH)[0]
        cls_boxes = cls_boxes[keep, :]
        cls_scores = cls_scores[keep]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        all_boxes[cls_ind] = dets[keep, :]

    boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))]
    vis_all_detection(im_array, boxes_this_image, CLASSES, 0)
Esempio n. 7
0
def pred_eval(detector, test_data, imdb, vis=False):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param detector: Detector
    :param test_data: data iterator, must be non-shuffle
    :param imdb: image database
    :param vis: controls visualization
    :return:
    """
    assert not test_data.shuffle

    thresh = 0.1
    # limit detections to max_per_image over all classes
    max_per_image = 100

    num_images = imdb.num_images
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    i = 0
    for databatch in test_data:
        if i % 10 == 0:
            print 'testing {}/{}'.format(i, imdb.num_images)

        scores, boxes = detector.im_detect(databatch.data['data'], databatch.data['rois'])

        # we used scaled image & roi to train, so it is necessary to transform them back
        # visualization should also be from the original size
        im_path = imdb.image_path_from_index(imdb.image_set_index[i])
        im = cv2.imread(im_path)
        im_height = im.shape[0]
        scale = float(databatch.data['data'].shape[2]) / float(im_height)
        im = image_processing.transform(im, config.PIXEL_MEANS)

        for j in range(1, imdb.num_classes):
            indexes = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[indexes, j]
            cls_boxes = boxes[indexes, j * 4:(j + 1) * 4] / scale
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis]))
            keep = nms(cls_dets, config.TEST.NMS)
            all_boxes[j][i] = cls_dets[keep, :]

        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i][:, -1]
                                      for j in range(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        boxes_this_image = [[]] + [all_boxes[j][i] for j in range(1, imdb.num_classes)]
        if vis:
            vis_all_detection(im, boxes_this_image,
                              imdb_classes=imdb.classes)
        i += 1

    cache_folder = os.path.join(imdb.cache_path, imdb.name)
    if not os.path.exists(cache_folder):
        os.mkdir(cache_folder)
    det_file = os.path.join(cache_folder, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f)

    imdb.evaluate_detections(all_boxes)
Esempio n. 8
0
    def forward(self, is_train, req, in_data, out_data, aux):
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        pre_nms_topN = config[self.cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = config[self.cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = config[self.cfg_key].RPN_NMS_THRESH
        min_size = config[self.cfg_key].RPN_MIN_SIZE

        # the first set of anchors are background probabilities
        # keep the second part
        scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :]
        bbox_deltas = in_data[1].asnumpy()
        im_info = in_data[2].asnumpy()[0, :]

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox_deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_pred(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = ProposalOperator._filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        self.assign(out_data[0], req[0], blob)

        if self._output_score:
            self.assign(out_data[1], req[1],
                        scores.astype(np.float32, copy=False))
Esempio n. 9
0
    def forward(self, is_train, req, in_data, out_data, aux):
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)
        pre_nms_topN = config[self.cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = config[self.cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = config[self.cfg_key].RPN_NMS_THRESH
        min_size = config[self.cfg_key].RPN_MIN_SIZE

        # the first set of anchors are background probabilities
        # keep the second part
        scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :]
        if np.isnan(scores).any():
            raise ValueError("there is nan in input scores")
        bbox_deltas = in_data[1].asnumpy()
        if np.isnan(bbox_deltas).any():
            raise ValueError("there is nan in input bbox_deltas")
        im_info = in_data[2].asnumpy()[0, :]
        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox_deltas and shifted anchors
        height, width = scores.shape[-2:]
        if self.cfg_key == 'TRAIN':
            height, width = int(im_info[0] / self._feat_stride), int(im_info[1] / self._feat_stride)

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)
            print "resudial = ", scores.shape[2] - height, scores.shape[3] - width
        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))
        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = clip_pad(bbox_deltas, (height, width))
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_pred(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = ProposalOperator._filter_boxes(proposals, min_size * im_info[2])

        proposals = proposals[keep, :]
        scores = scores[keep]
        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]
        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            if len(keep) == 0:
                logging.log(logging.ERROR, "currently len(keep) is zero")
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]
        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        self.assign(out_data[0], req[0], blob)
        if self._output_score:
            self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
Esempio n. 10
0
def pred_eval(detector, test_data, imdb, vis=False):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param detector: Detector
    :param test_data: data iterator, must be non-shuffle
    :param imdb: image database
    :param vis: controls visualization
    :return:
    """
    assert not test_data.shuffle

    thresh = 0.05
    # limit detections to max_per_image over all classes
    max_per_image = 100

    num_images = imdb.num_images
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    i = 0
    for databatch in test_data:
        if i % 10 == 0:
            print 'testing {}/{}'.format(i, imdb.num_images)

        if config.TEST.HAS_RPN:
            scores, boxes = detector.im_detect(databatch.data['data'], im_info=databatch.data['im_info'])
            scale = databatch.data['im_info'][0, 2]
        else:
            scores, boxes = detector.im_detect(databatch.data['data'], roi_array=databatch.data['rois'])
            # we used scaled image & roi to train, so it is necessary to transform them back
            # visualization should also be from the original size
            im_path = imdb.image_path_from_index(imdb.image_set_index[i])
            im = cv2.imread(im_path)
            im_height = im.shape[0]
            scale = float(databatch.data['data'].shape[2]) / float(im_height)

        for j in range(1, imdb.num_classes):
            indexes = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[indexes, j]
            cls_boxes = boxes[indexes, j * 4:(j + 1) * 4] / scale
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis]))
            keep = nms(cls_dets, config.TEST.NMS)
            all_boxes[j][i] = cls_dets[keep, :]

        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i][:, -1]
                                      for j in range(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        boxes_this_image = [[]] + [all_boxes[j][i] for j in range(1, imdb.num_classes)]
        if vis:
            # visualize the testing scale
            for box in boxes_this_image:
                if isinstance(box, np.ndarray):
                    box[:, :4] *= scale
            vis_all_detection(databatch.data['data'], boxes_this_image,
                              imdb_classes=imdb.classes)
        i += 1

    cache_folder = os.path.join(imdb.cache_path, imdb.name)
    if not os.path.exists(cache_folder):
        os.mkdir(cache_folder)
    det_file = os.path.join(cache_folder, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f)

    imdb.evaluate_detections(all_boxes)