Exemple #1
0
def test_proposals(net, imdb):
    """Generate proposals using AZ-Net on an image database."""
    num_images = len(imdb.image_index)
    # all proposals are collected into:
    #    prpo_boxes[cls] = N x 5 array of proposals in
    #    (x1, y1, x2, y2, score)
    prop_boxes = [[] for _ in xrange(num_images)]

    output_dir = get_output_dir(imdb, net['full'])
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # timers
    _t = {'im_prop' : Timer()}
    
    # initialize counters
    num_boxes = 0.0
    num_gt = 0.0
    num_det = 0.0
    
#    gt_roidb = imdb.gt_roidb()

    for i in xrange(num_images):
        im = cv2.imread(imdb.image_path_at(i))            
        _t['im_prop'].tic()
        prop_boxes[i] = \
            im_propose(net, im)
        _t['im_prop'].toc()
    
#        gt_boxes = gt_roidb[i]['boxes']
        
#        if not comp_mode:
#            if prop_boxes[i].shape[0] > 0:
#                overlaps = bbox_overlaps(prop_boxes[i].astype(np.float), 
#                                         gt_boxes.astype(np.float))
#                det_inds = np.where(np.max(overlaps, axis=0) >= 0.5)[0] 
#                num_det += len(det_inds)
            
#            num_gt += gt_boxes.shape[0]
#            num_boxes += prop_boxes[i].shape[0]

        print 'im_prop: {:d}/{:d} {:.3f}s' \
              .format(i + 1, num_images, _t['im_prop'].average_time)

#    recall = num_det / num_gt
    recall = 0
    prop = {'boxes': prop_boxes, 'time': _t['im_prop'].average_time, 'recall': recall}
    prop_file = os.path.join(output_dir, 'proposals.pkl')
    with open(prop_file, 'wb') as f:
        cPickle.dump(prop, f, cPickle.HIGHEST_PROTOCOL)
    
    print 'The recall is {:.3f}'.format(recall)
    print 'On average, {0} boxes per image are generated'.format(num_boxes/num_images)
    print 'The average proposal generation time is {:.3f}s'.format(_t['im_prop'].average_time)
Exemple #2
0
def test_proposals(net, imdb):
    """The purpose of this function is to record all information necessary for
    fine-grained analysis: including ground truths, proposals, and all anchor regions..."""
    num_images = len(imdb.image_index)
    # all proposals are collected into:
    #    prop_boxes[cls] = N x 5 array of proposals in
    #    (x1, y1, x2, y2, score)
    prop_boxes = np.zeros((num_images, ), dtype=np.object)
    anchor_boxes = np.zeros((num_images, ), dtype=np.object)
    gt_boxes = np.zeros((num_images, ), dtype=np.object)
    fn = np.zeros((num_images, ), dtype=np.object)
    im_shapes = np.zeros((num_images, ), dtype=np.object)

    output_dir = get_output_dir(imdb, net['full'])
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # timers
    _t = {'im_prop': Timer()}

    gt_roidb = imdb.gt_roidb()
    roidb = imdb.roidb

    for i in xrange(num_images):
        im = cv2.imread(imdb.image_path_at(i))
        im_shapes[i] = im.shape
        _t['im_prop'].tic()
        prop_boxes[i], anchor_boxes[i] = \
            im_propose(net, im)

        _t['im_prop'].toc()

        gt_boxes[i] = gt_roidb[i]['boxes']
        fn[i] = os.path.basename(imdb.image_path_at(i))

        gt_overlaps = roidb[i]['gt_overlaps'].toarray()
        # max overlap with gt over classes (columns)
        max_overlaps = gt_overlaps.max(axis=1)
        print 'im_prop: {:d}/{:d} {:.3f}s' \
              .format(i + 1, num_images, _t['im_prop'].average_time)

    Tz = cfg.SEAR.Tz
    num_proposals = cfg.SEAR.NUM_PROPOSALS

    outfile = os.path.join(output_dir, 'AZ_results.mat')
    sio.savemat(
        outfile,
        dict(prop_boxes=prop_boxes,
             anchor_boxes=anchor_boxes,
             gt_boxes=gt_boxes,
             fn=fn,
             Tz=Tz,
             num_proposals=num_proposals,
             im_shapes=im_shapes))
Exemple #3
0
def test_proposals(net, imdb):
    """The purpose of this function is to record all information necessary for
    fine-grained analysis: including ground truths, proposals, and all anchor regions..."""
    num_images = len(imdb.image_index)
    # all proposals are collected into:
    #    prop_boxes[cls] = N x 5 array of proposals in
    #    (x1, y1, x2, y2, score)
    prop_boxes = np.zeros((num_images,), dtype=np.object)
    anchor_boxes = np.zeros((num_images,), dtype=np.object)
    gt_boxes = np.zeros((num_images,), dtype=np.object)
    fn = np.zeros((num_images,), dtype=np.object)
    im_shapes = np.zeros((num_images,), dtype=np.object)

    output_dir = get_output_dir(imdb, net['full'])
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # timers
    _t = {'im_prop' : Timer()}
    
    gt_roidb = imdb.gt_roidb()
    roidb = imdb.roidb

    for i in xrange(num_images):
        im = cv2.imread(imdb.image_path_at(i))
        im_shapes[i] = im.shape       
        _t['im_prop'].tic()
        prop_boxes[i], anchor_boxes[i] = \
            im_propose(net, im) 
        
        _t['im_prop'].toc()
    
        gt_boxes[i] = gt_roidb[i]['boxes']
        fn[i] = os.path.basename(imdb.image_path_at(i))
        
        gt_overlaps = roidb[i]['gt_overlaps'].toarray()        
        # max overlap with gt over classes (columns)
        max_overlaps = gt_overlaps.max(axis=1)
        print 'im_prop: {:d}/{:d} {:.3f}s' \
              .format(i + 1, num_images, _t['im_prop'].average_time)
        
    Tz = cfg.SEAR.Tz
    num_proposals = cfg.SEAR.NUM_PROPOSALS
        
    outfile = os.path.join(output_dir, 'AZ_results.mat')
    sio.savemat(outfile, dict(prop_boxes = prop_boxes, 
                              anchor_boxes = anchor_boxes,
                              gt_boxes = gt_boxes,
                              fn = fn,
                              Tz = Tz,
                              num_proposals = num_proposals,
                              im_shapes = im_shapes))
Exemple #4
0
def tune_thresh(net, imdb):
    """Find an appropriate threhosld for zoom indicators, so that on the average number
    of anchors generated per image is roughly equal to cfg.SEAR.AVG_NUM_ANCHORS"""
    num_images = len(imdb.image_index)
    # keep cfg.SEAR.AVG_NUM_ANCHORS
    max_per_set = num_images * cfg.TRAIN.ANCHORS_PER_IMG
    # thresholds and top scores
    top_scores = []
    thresh = -np.inf

    output_dir = get_output_dir(imdb, net['full'])
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # timers
    _t = {'im_prop': Timer()}

    gt_roidb = imdb.gt_roidb()
    roidb = imdb.roidb

    for i in xrange(num_images):
        im = cv2.imread(imdb.image_path_at(i))
        _t['im_prop'].tic()
        _, anchor_boxes = \
            im_propose(net, im)

        scores = anchor_boxes[:, -1]
        inds = np.where(scores > thresh)[0]
        cls_scores = scores[inds]

        # push new scores onto the minheap
        for val in cls_scores:
            heapq.heappush(top_scores, val)
        # if we've collected more than the max number of anchors,
        # then pop items off the minheap and update the threshold
        if len(top_scores) > max_per_set:
            while len(top_scores) > max_per_set:
                heapq.heappop(top_scores)
            thresh = top_scores[0]

        _t['im_prop'].toc()
        print 'im_tune: {:d}/{:d} {:.3f}s' \
              .format(i + 1, num_images, _t['im_prop'].average_time)

    print 'the threshold is set to {0}'.format(thresh)

    outfile = os.path.join(output_dir, 'thresh.pkl')
    with open(outfile, 'wb') as f:
        cPickle.dump(thresh, f, cPickle.HIGHEST_PROTOCOL)
Exemple #5
0
def tune_thresh(net, imdb):
    """Find an appropriate threhosld for zoom indicators, so that on the average number
    of anchors generated per image is roughly equal to cfg.SEAR.AVG_NUM_ANCHORS"""
    num_images = len(imdb.image_index)
    # keep cfg.SEAR.AVG_NUM_ANCHORS
    max_per_set = num_images * cfg.TRAIN.ANCHORS_PER_IMG
    # thresholds and top scores
    top_scores = []
    thresh = -np.inf
    
    output_dir = get_output_dir(imdb, net['full'])
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # timers
    _t = {'im_prop' : Timer()}
    
    gt_roidb = imdb.gt_roidb()
    roidb = imdb.roidb

    for i in xrange(num_images):
        im = cv2.imread(imdb.image_path_at(i))
        _t['im_prop'].tic()
        _, anchor_boxes = \
            im_propose(net, im) 
        
        scores = anchor_boxes[:,-1]
        inds = np.where(scores > thresh)[0]
        cls_scores = scores[inds]

        # push new scores onto the minheap
        for val in cls_scores:
            heapq.heappush(top_scores, val)
        # if we've collected more than the max number of anchors,
        # then pop items off the minheap and update the threshold
        if len(top_scores) > max_per_set:
            while len(top_scores) > max_per_set:
                heapq.heappop(top_scores)
            thresh = top_scores[0]

        _t['im_prop'].toc() 
        print 'im_tune: {:d}/{:d} {:.3f}s' \
              .format(i + 1, num_images, _t['im_prop'].average_time)
        
    print 'the threshold is set to {0}'.format(thresh) 
    
    outfile = os.path.join(output_dir, 'thresh.pkl')
    with open(outfile, 'wb') as f:
        cPickle.dump(thresh, f, cPickle.HIGHEST_PROTOCOL)
Exemple #6
0
def prepare_roidb(imdb, net):
    """Enrich the imdb's roidb by adding some derived quantities that
    are useful for training. This function precomputes the maximum
    overlap, taken over ground-truth boxes, between each ROI and
    each ground-truth box. The class with maximum overlap is also
    recorded.
    """
    if cfg.TRAIN.USE_FLIPPED:
        num_images = len(imdb.image_index) / 2
    else:
        num_images = len(imdb.image_index)
    prop = [[] for _ in xrange(num_images)]

    use_loaded = False
    output_dir = get_output_dir(imdb, net['full'])
    cache_file = os.path.join(output_dir, 'proposals.pkl')
    if os.path.exists(cache_file):
        with open(cache_file, 'rb') as fid:
            prop = cPickle.load(fid)
        print '{} proposals loaded from {}'.format(imdb.name, cache_file)
        use_loaded = True

    roidb = imdb.roidb
    for i in xrange(len(imdb.image_index)):
        if i % 20 == 0:
            print 'Processing {}/{} ...'.format(i, len(imdb.image_index))

        roidb[i]['image'] = imdb.image_path_at(i)
        im_size = imdb.image_size(i)

        # For flipped images, load proposals and detections
        if roidb[i]['flipped']:
            im_width = im_size[1]
            index = i - num_images
            roidb[i]['ex_boxes'] = _flip_boxes(roidb[index]['ex_boxes'], im_width)\
                                        .astype(np.float32, copy=False)
            roidb[i]['gt_boxes'] = _flip_boxes(roidb[index]['gt_boxes'], im_width)\
                                        .astype(np.float32, copy=False)
            roidb[i]['gt_labels'] = roidb[index]['gt_labels']

            continue

        # need gt_overlaps as a dense array for argmax
        gt_overlaps = roidb[i]['gt_overlaps'].toarray()
        # max overlap with gt over classes (columns)
        max_overlaps = gt_overlaps.max(axis=1)
        # find out ground truths
        gt_inds = np.where(max_overlaps == 1)[0]
        gt_rois = roidb[i]['boxes'][gt_inds, :]
        # gt class of the objects
        max_classes = gt_overlaps.argmax(axis=1)
        roidb[i]['gt_labels'] = max_classes[gt_inds]

        # use trained AZ-Net to generate region proposals
        if use_loaded:
            ex_rois = np.vstack((prop[i], gt_rois))
        else:
            im = cv2.imread(roidb[i]['image'])
            ex_rois, prop[i] = _compute_ex_rois_with_net(im, net, gt_rois)

        prop[i] = prop[i].astype(np.float32, copy=False)
        roidb[i]['ex_boxes'] = ex_rois.astype(np.float32, copy=False)
        roidb[i]['gt_boxes'] = gt_rois.astype(np.float32, copy=False)

        # sanity checks
        # gt boxes => class should not be zero (must be a fg class)
        assert all(max_classes[gt_inds] != 0)

        assert roidb[i]['ex_boxes'].shape[0] > 0, 'no example boxes'

    if not use_loaded:
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        with open(cache_file, 'wb') as fid:
            cPickle.dump(prop, fid, cPickle.HIGHEST_PROTOCOL)
            print 'wrote roidb (proposals) to {}'.format(cache_file)
Exemple #7
0
    # set up caffe
    caffe.set_mode_gpu()
    if args.gpu_id is not None:
        caffe.set_device(args.gpu_id)
        
        # set up test net, if provided
    # full AZ-net
    nets = None
    if args.caffemodel is not None:
        net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
        net.name = os.path.splitext(os.path.basename(args.caffemodel))[0]
        # pooling layers of SC-Net
        net_fc = caffe.Net(args.prototxt_fc, args.caffemodel, caffe.TEST)
        net_fc.name = os.path.splitext(os.path.basename(args.caffemodel))[0]
        nets = {'full':net, 'fc': net_fc}


    imdb = get_imdb(args.imdb_name)
    print 'Loaded dataset `{:s}` for training'.format(imdb.name)
    roidb = get_training_roidb(imdb, nets)
    
    del nets, net, net_fc

    output_dir = get_output_dir(imdb, None)
    print 'Output will be saved to `{:s}`'.format(output_dir)

    train_net(args.solver, roidb, output_dir,
              pretrained_model=args.pretrained_model,
              max_iters=args.max_iters)
Exemple #8
0
def test_net_shared(sc_net, frcnn_net, imdb):
    """Use shared convolutional layers for detection."""
    num_images = len(imdb.image_index)
    # heuristic: keep an average of 40 detections per class per images prior
    # to NMS
    #    max_per_set = 40 * num_images
    max_per_set = 800 / (imdb.num_classes - 1) * num_images
    # heuristic: keep at most 100 detection per class per image prior to NMS
    max_per_image = 100
    # detection thresold for each class (this is adaptively set based on the
    # max_per_set constraint)
    thresh = -np.inf * np.ones(imdb.num_classes)
    # top_scores will hold one minheap of scores per class (used to enforce
    # the max_per_set constraint)
    top_scores = [[] for _ in xrange(imdb.num_classes)]
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]
    # number of boxes
    num_boxes = 0.0

    output_dir = get_output_dir(imdb, sc_net['full'])
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    num_gt = 0.0
    num_det = 0.0

    #    gt_roidb = imdb.gt_roidb()

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    for i in xrange(num_images):

        im = cv2.imread(imdb.image_path_at(i))
        _t['im_detect'].tic()
        scores, boxes = im_detect_shared(sc_net, frcnn_net, im,
                                         imdb.num_classes)
        num_boxes += scores.shape[0]
        _t['im_detect'].toc()

        # Evaluate the recall after bounding box regression

        #        for j in xrange(1, imdb.num_classes):
        # For each class, evaluate the number of boxes
        #            inds = np.where(gt_roidb[i]['gt_classes'] == j)[0]
        #            gt_boxes = gt_roidb[i]['boxes'][inds, :]

        #            if gt_boxes.shape[0] > 0 and boxes.shape[0] > 0:
        #                overlaps = bbox_overlaps(boxes[:, 4*j:4*j+4].astype(np.float),
        #                                         gt_boxes.astype(np.float))

        #                det_inds = np.where(np.max(overlaps, axis=0) >= 0.5)[0]
        #                num_det += len(det_inds)

        #                num_gt += gt_boxes.shape[0]

        _t['misc'].tic()
        for j in xrange(1, imdb.num_classes):
            inds = np.where((scores[:, j] > thresh[j]))[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            top_inds = np.argsort(-cls_scores)[:max_per_image]
            cls_scores = cls_scores[top_inds]
            cls_boxes = cls_boxes[top_inds, :]
            # push new scores onto the minheap
            for val in cls_scores:
                heapq.heappush(top_scores[j], val)
            # if we've collected more than the max number of detection,
            # then pop items off the minheap and update the class threshold
            if len(top_scores[j]) > max_per_set:
                while len(top_scores[j]) > max_per_set:
                    heapq.heappop(top_scores[j])
                thresh[j] = top_scores[j][0]

            all_boxes[j][i] = \
                    np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)

            if 0:
                keep = nms(all_boxes[j][i], 0.3)
                vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :])
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time)

    for j in xrange(1, imdb.num_classes):
        for i in xrange(num_images):
            inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0]
            all_boxes[j][i] = all_boxes[j][i][inds, :]

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Applying NMS to all detections'
    nms_dets = apply_nms(all_boxes, cfg.TEST.NMS)

    print 'Evaluating detections'
    imdb.evaluate_detections(nms_dets, output_dir)

    print 'The average detection time is {:.3f}s'.\
        format(_t['im_detect'].average_time)

    print 'On average, {0} boxes per image are proposed'.format(num_boxes /
                                                                num_images)
Exemple #9
0
def test_proposals(net, imdb):
    """Generate proposals using AZ-Net on an image database."""
    num_images = len(imdb.image_index)
    # all proposals are collected into:
    #    prpo_boxes[cls] = N x 5 array of proposals in
    #    (x1, y1, x2, y2, score)
    prop_boxes = [[] for _ in xrange(num_images)]

    output_dir = get_output_dir(imdb, net['full'])
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # timers
    _t = {'im_prop': Timer()}

    # initialize counters
    num_boxes = 0.0
    num_gt = 0.0
    num_det = 0.0

    #    gt_roidb = imdb.gt_roidb()

    for i in xrange(num_images):
        im = cv2.imread(imdb.image_path_at(i))
        _t['im_prop'].tic()
        prop_boxes[i] = \
            im_propose(net, im)
        _t['im_prop'].toc()

        #        gt_boxes = gt_roidb[i]['boxes']

        #        if not comp_mode:
        #            if prop_boxes[i].shape[0] > 0:
        #                overlaps = bbox_overlaps(prop_boxes[i].astype(np.float),
        #                                         gt_boxes.astype(np.float))
        #                det_inds = np.where(np.max(overlaps, axis=0) >= 0.5)[0]
        #                num_det += len(det_inds)

        #            num_gt += gt_boxes.shape[0]
        #            num_boxes += prop_boxes[i].shape[0]

        print 'im_prop: {:d}/{:d} {:.3f}s' \
              .format(i + 1, num_images, _t['im_prop'].average_time)


#    recall = num_det / num_gt
    recall = 0
    prop = {
        'boxes': prop_boxes,
        'time': _t['im_prop'].average_time,
        'recall': recall
    }
    prop_file = os.path.join(output_dir, 'proposals.pkl')
    with open(prop_file, 'wb') as f:
        cPickle.dump(prop, f, cPickle.HIGHEST_PROTOCOL)

    print 'The recall is {:.3f}'.format(recall)
    print 'On average, {0} boxes per image are generated'.format(num_boxes /
                                                                 num_images)
    print 'The average proposal generation time is {:.3f}s'.format(
        _t['im_prop'].average_time)
Exemple #10
0
def test_net_shared(sc_net, frcnn_net, imdb):
    """Use shared convolutional layers for detection."""
    num_images = len(imdb.image_index)
    # heuristic: keep an average of 40 detections per class per images prior
    # to NMS
#    max_per_set = 40 * num_images
    max_per_set = 800 / (imdb.num_classes - 1) * num_images
    # heuristic: keep at most 100 detection per class per image prior to NMS
    max_per_image = 100
    # detection thresold for each class (this is adaptively set based on the
    # max_per_set constraint)
    thresh = -np.inf * np.ones(imdb.num_classes)
    # top_scores will hold one minheap of scores per class (used to enforce
    # the max_per_set constraint)
    top_scores = [[] for _ in xrange(imdb.num_classes)]
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]
    # number of boxes
    num_boxes = 0.0

    output_dir = get_output_dir(imdb, sc_net['full'])
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    num_gt = 0.0
    num_det = 0.0
    
#    gt_roidb = imdb.gt_roidb()

    # timers
    _t = {'im_detect' : Timer(), 'misc' : Timer()}

    for i in xrange(num_images):
        
        im = cv2.imread(imdb.image_path_at(i))
        _t['im_detect'].tic()
        scores, boxes = im_detect_shared(sc_net, frcnn_net, im, imdb.num_classes)
        num_boxes += scores.shape[0]
        _t['im_detect'].toc()
        
        # Evaluate the recall after bounding box regression
        
#        for j in xrange(1, imdb.num_classes):
            # For each class, evaluate the number of boxes         
#            inds = np.where(gt_roidb[i]['gt_classes'] == j)[0]
#            gt_boxes = gt_roidb[i]['boxes'][inds, :]
            
#            if gt_boxes.shape[0] > 0 and boxes.shape[0] > 0:
#                overlaps = bbox_overlaps(boxes[:, 4*j:4*j+4].astype(np.float), 
#                                         gt_boxes.astype(np.float))
                
#                det_inds = np.where(np.max(overlaps, axis=0) >= 0.5)[0] 
#                num_det += len(det_inds)
                
#                num_gt += gt_boxes.shape[0]

        _t['misc'].tic()
        for j in xrange(1, imdb.num_classes):            
            inds = np.where((scores[:, j] > thresh[j]))[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j*4:(j+1)*4]
            top_inds = np.argsort(-cls_scores)[:max_per_image]
            cls_scores = cls_scores[top_inds]
            cls_boxes = cls_boxes[top_inds, :]
            # push new scores onto the minheap
            for val in cls_scores:
                heapq.heappush(top_scores[j], val)
            # if we've collected more than the max number of detection,
            # then pop items off the minheap and update the class threshold
            if len(top_scores[j]) > max_per_set:
                while len(top_scores[j]) > max_per_set:
                    heapq.heappop(top_scores[j])
                thresh[j] = top_scores[j][0]

            all_boxes[j][i] = \
                    np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)

            if 0:
                keep = nms(all_boxes[j][i], 0.3)
                vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :])
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time)

    for j in xrange(1, imdb.num_classes):
        for i in xrange(num_images):
            inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0]
            all_boxes[j][i] = all_boxes[j][i][inds, :]

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Applying NMS to all detections'
    nms_dets = apply_nms(all_boxes, cfg.TEST.NMS)

    print 'Evaluating detections'
    imdb.evaluate_detections(nms_dets, output_dir)
    
    print 'The average detection time is {:.3f}s'.\
        format(_t['im_detect'].average_time)
        
    print 'On average, {0} boxes per image are proposed'.format(num_boxes/num_images)
Exemple #11
0
def prepare_roidb(imdb, net):
    """Enrich the imdb's roidb by adding some derived quantities that
    are useful for training. This function precomputes the maximum
    overlap, taken over ground-truth boxes, between each ROI and
    each ground-truth box. The class with maximum overlap is also
    recorded.
    """
    if cfg.TRAIN.USE_FLIPPED:
        num_images = len(imdb.image_index)/2
    else:
        num_images = len(imdb.image_index)
    prop = [[] for _ in xrange(num_images)]
        
    use_loaded = False
    output_dir = get_output_dir(imdb, net['full'])
    cache_file = os.path.join(output_dir, 'proposals.pkl')
    if os.path.exists(cache_file):
        with open(cache_file, 'rb') as fid:
            prop = cPickle.load(fid)
        print '{} proposals loaded from {}'.format(imdb.name, cache_file)
        use_loaded = True
    
    roidb = imdb.roidb
    for i in xrange(len(imdb.image_index)):
        if i % 20 == 0:
            print 'Processing {}/{} ...'.format(i, len(imdb.image_index)) 
        
        roidb[i]['image'] = imdb.image_path_at(i)
        im_size = imdb.image_size(i)
        
        # For flipped images, load proposals and detections
        if roidb[i]['flipped']:
            im_width = im_size[1]
            index = i - num_images
            roidb[i]['ex_boxes'] = _flip_boxes(roidb[index]['ex_boxes'], im_width)\
                                        .astype(np.float32, copy=False)
            roidb[i]['gt_boxes'] = _flip_boxes(roidb[index]['gt_boxes'], im_width)\
                                        .astype(np.float32, copy=False)
            roidb[i]['gt_labels']  = roidb[index]['gt_labels']
                        
            continue
       
        # need gt_overlaps as a dense array for argmax
        gt_overlaps = roidb[i]['gt_overlaps'].toarray()    
        # max overlap with gt over classes (columns)
        max_overlaps = gt_overlaps.max(axis=1)
        # find out ground truths
        gt_inds = np.where(max_overlaps == 1)[0]
        gt_rois = roidb[i]['boxes'][gt_inds, :]
        # gt class of the objects
        max_classes = gt_overlaps.argmax(axis=1)
        roidb[i]['gt_labels'] = max_classes[gt_inds]

        # use trained AZ-Net to generate region proposals
        if use_loaded:
            ex_rois = np.vstack((prop[i], gt_rois))
        else:
            im = cv2.imread(roidb[i]['image'])
            ex_rois, prop[i] = _compute_ex_rois_with_net(im, net, gt_rois)
        
        prop[i] = prop[i].astype(np.float32, copy=False)
        roidb[i]['ex_boxes'] = ex_rois.astype(np.float32, copy=False)
        roidb[i]['gt_boxes'] = gt_rois.astype(np.float32, copy=False)
        
        # sanity checks
        # gt boxes => class should not be zero (must be a fg class)
        assert all(max_classes[gt_inds] != 0)
        
        assert roidb[i]['ex_boxes'].shape[0] > 0, 'no example boxes'
     
    if not use_loaded:
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        with open(cache_file, 'wb') as fid:
            cPickle.dump(prop, fid, cPickle.HIGHEST_PROTOCOL)
            print 'wrote roidb (proposals) to {}'.format(cache_file)