Example #1
0
 def evaluate_detections(self, detections, **kwargs):
     cache_path = os.path.join(self._root_path, 'cache',
                               '{}_{}.pkl'.format(self._name, 'detections'))
     logger.info('saving cache {}'.format(cache_path))
     with open(cache_path, 'wb') as fid:
         pickle.dump(detections, fid, pickle.HIGHEST_PROTOCOL)
     self._evaluate_detections(detections, **kwargs)
    def __init__(self, image_set, root_path, devkit_path):
        """
        fill basic information to initialize imdb
        :param image_set: 2007_trainval, 2007_test, etc
        :param root_path: 'data', will write 'cache'
        :param devkit_path: 'data/VOCdevkit', load data and write results
        """
        super(PascalVOC, self).__init__('voc_' + image_set, root_path)

        year, image_set = image_set.split('_')
        self._config = {'comp_id': 'comp4', 'use_diff': False, 'min_size': 2}
        self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
        self._image_index_file = os.path.join(devkit_path, 'VOC' + year,
                                              'ImageSets', 'Main',
                                              image_set + '.txt')
        self._image_file_tmpl = os.path.join(devkit_path, 'VOC' + year,
                                             'JPEGImages', '{}.jpg')
        self._image_anno_tmpl = os.path.join(devkit_path, 'VOC' + year,
                                             'Annotations', '{}.xml')

        # results
        result_folder = os.path.join(devkit_path, 'results', 'VOC' + year,
                                     'Main')
        if not os.path.exists(result_folder):
            os.makedirs(result_folder)
        self._result_file_tmpl = os.path.join(
            result_folder, 'comp4_det_' + image_set + '_{}.txt')

        # get roidb
        self._roidb = self._get_cached('roidb', self._load_gt_roidb)
        logger.info('%s num_images %d' % (self.name, self.num_images))
Example #3
0
    def __init__(self, image_set, root_path, devkit_path):
        """
        fill basic information to initialize imdb
        :param image_set: 2007_trainval, 2007_test, etc
        :param root_path: 'data', will write 'cache'
        :param devkit_path: 'data/VOCdevkit', load data and write results
        """
        super(PascalVOC, self).__init__('voc_' + image_set, root_path)

        year, image_set = image_set.split('_')
        self._config = {'comp_id': 'comp4',
                        'use_diff': False,
                        'min_size': 2}
        self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
        self._image_index_file = os.path.join(devkit_path, 'VOC' + year, 'ImageSets', 'Main', image_set + '.txt')
        self._image_file_tmpl = os.path.join(devkit_path, 'VOC' + year, 'JPEGImages', '{}.jpg')
        self._image_anno_tmpl = os.path.join(devkit_path, 'VOC' + year, 'Annotations', '{}.xml')

        # results
        result_folder = os.path.join(devkit_path, 'results', 'VOC' + year, 'Main')
        if not os.path.exists(result_folder):
            os.makedirs(result_folder)
        self._result_file_tmpl = os.path.join(result_folder, 'comp4_det_' + image_set + '_{}.txt')

        # get roidb
        self._roidb = self._get_cached('roidb', self._load_gt_roidb)
        logger.info('%s num_images %d' % (self.name, self.num_images))
Example #4
0
 def filter_roidb(self):
     """Remove images without usable rois"""
     num_roidb = len(self._roidb)
     self._roidb = [
         roi_rec for roi_rec in self._roidb if len(roi_rec['gt_classes'])
     ]
     num_after = len(self._roidb)
     logger.info('filter roidb: {} -> {}'.format(num_roidb, num_after))
Example #5
0
def test_net(sym, imdb, args):
    # print config
    logger.info('called with args\n{}'.format(pprint.pformat(vars(args))))

    # setup context
    ctx = mx.gpu(args.gpu)

    # load testing data
    test_data = TestLoader(imdb.roidb, batch_size=1, short=args.img_short_side, max_size=args.img_long_side,
                           mean=args.img_pixel_means, std=args.img_pixel_stds)

    # load params
    arg_params, aux_params = load_param(args.params, ctx=ctx)

    # produce shape max possible
    data_names = ['data', 'im_info']
    label_names = None
    data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)), ('im_info', (1, 3))]
    label_shapes = None

    # check shapes
    check_shape(sym, data_shapes, arg_params, aux_params)

    # create and bind module
    mod = Module(sym, data_names, label_names, context=ctx)
    mod.bind(data_shapes, label_shapes, for_training=False)
    mod.init_params(arg_params=arg_params, aux_params=aux_params)

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(imdb.num_images)]
                 for _ in range(imdb.num_classes)]

    # start detection
    with tqdm(total=imdb.num_images) as pbar:
        for i, data_batch in enumerate(test_data):
            # forward
            im_info = data_batch.data[1][0]
            mod.forward(data_batch)
            rois, scores, bbox_deltas = mod.get_outputs()
            rois = rois[:, 1:]
            scores = scores[0]
            bbox_deltas = bbox_deltas[0]

            det = im_detect(rois, scores, bbox_deltas, im_info,
                            bbox_stds=args.rcnn_bbox_stds, nms_thresh=args.rcnn_nms_thresh,
                            conf_thresh=args.rcnn_conf_thresh)
            for j in range(1, imdb.num_classes):
                indexes = np.where(det[:, 0] == j)[0]
                all_boxes[j][i] = np.concatenate((det[:, -4:], det[:, [1]]), axis=-1)[indexes, :]
            pbar.update(data_batch.data[0].shape[0])

    # evaluate model
    imdb.evaluate_detections(all_boxes)
Example #6
0
    def _do_python_eval(self, all_boxes, use_07_metric):
        aps = []
        for cls_ind, cls in enumerate(self.classes):
            if cls == '__background__':
                continue
            # class_anno is a dict [image_index, [bbox, difficult, det]]
            class_anno = {}
            npos = 0
            for roi_rec in self.roidb:
                index = roi_rec['index']
                objects = [
                    obj for obj in roi_rec['objs'] if obj['name'] == cls
                ]
                bbox = np.array([x['bbox'] for x in objects])
                difficult = np.array([x['difficult']
                                      for x in objects]).astype(np.bool)
                det = [False] * len(objects)  # stand for detected
                npos = npos + sum(~difficult)
                class_anno[index] = {
                    'bbox': bbox,
                    'difficult': difficult,
                    'det': det
                }

            # bbox is 2d array of all detections, corresponding to each image_id
            image_ids = []
            bbox = []
            confidence = []
            for im_ind, dets in enumerate(all_boxes[cls_ind]):
                for k in range(dets.shape[0]):
                    image_ids.append(self.roidb[im_ind]['index'])
                    bbox.append([
                        dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1,
                        dets[k, 3] + 1
                    ])
                    confidence.append(dets[k, -1])
            bbox = np.array(bbox)
            confidence = np.array(confidence)

            rec, prec, ap = self.voc_eval(class_anno,
                                          npos,
                                          image_ids,
                                          bbox,
                                          confidence,
                                          ovthresh=0.5,
                                          use_07_metric=use_07_metric)
            aps.append(ap)

        for cls, ap in zip(self.classes, aps):
            logger.info('AP for {} = {:.4f}'.format(cls, ap))
        logger.info('Mean AP = {:.4f}'.format(np.mean(aps)))
Example #7
0
    def _print_detection_metrics(self, coco_eval):
        IoU_lo_thresh = 0.5
        IoU_hi_thresh = 0.95

        def _get_thr_ind(coco_eval, thr):
            ind = np.where((coco_eval.params.iouThrs > thr - 1e-5)
                           & (coco_eval.params.iouThrs < thr + 1e-5))[0][0]
            iou_thr = coco_eval.params.iouThrs[ind]
            assert np.isclose(iou_thr, thr)
            return ind

        ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)
        ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)

        # precision has dims (iou, recall, cls, area range, max dets)
        # area range index 0: all area ranges
        # max dets index 2: 100 per image
        precision = \
            coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
        ap_default = np.mean(precision[precision > -1])
        logger.info('~~~~ Mean and per-category AP @ IoU=%.2f,%.2f] ~~~~' %
                    (IoU_lo_thresh, IoU_hi_thresh))
        logger.info('%-15s %5.1f' % ('all', 100 * ap_default))
        for cls_ind, cls in enumerate(self.classes):
            if cls == '__background__':
                continue
            # minus 1 because of __background__
            precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :,
                                                    cls_ind - 1, 0, 2]
            ap = np.mean(precision[precision > -1])
            logger.info('%-15s %5.1f' % (cls, 100 * ap))

        logger.info('~~~~ Summary metrics ~~~~')
        coco_eval.summarize()
Example #8
0
    def _print_detection_metrics(self, coco_eval):
        IoU_lo_thresh = 0.5
        IoU_hi_thresh = 0.95

        def _get_thr_ind(coco_eval, thr):
            ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &
                           (coco_eval.params.iouThrs < thr + 1e-5))[0][0]
            iou_thr = coco_eval.params.iouThrs[ind]
            assert np.isclose(iou_thr, thr)
            return ind

        ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)
        ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)

        # precision has dims (iou, recall, cls, area range, max dets)
        # area range index 0: all area ranges
        # max dets index 2: 100 per image
        precision = \
            coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
        ap_default = np.mean(precision[precision > -1])
        logger.info('~~~~ Mean and per-category AP @ IoU=%.2f,%.2f] ~~~~' % (IoU_lo_thresh, IoU_hi_thresh))
        logger.info('%-15s %5.1f' % ('all', 100 * ap_default))
        for cls_ind, cls in enumerate(self.classes):
            if cls == '__background__':
                continue
            # minus 1 because of __background__
            precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2]
            ap = np.mean(precision[precision > -1])
            logger.info('%-15s %5.1f' % (cls, 100 * ap))

        logger.info('~~~~ Summary metrics ~~~~')
        coco_eval.summarize()
Example #9
0
 def append_flipped_images(self):
     """Only flip boxes coordinates, images will be flipped when loading into network"""
     logger.info('%s append flipped images to roidb' % self._name)
     roidb_flipped = []
     for roi_rec in self._roidb:
         boxes = roi_rec['boxes'].copy()
         oldx1 = boxes[:, 0].copy()
         oldx2 = boxes[:, 2].copy()
         boxes[:, 0] = roi_rec['width'] - oldx2 - 1
         boxes[:, 2] = roi_rec['width'] - oldx1 - 1
         assert (boxes[:, 2] >= boxes[:, 0]).all()
         roi_rec_flipped = roi_rec.copy()
         roi_rec_flipped['boxes'] = boxes
         roi_rec_flipped['flipped'] = True
         roidb_flipped.append(roi_rec_flipped)
     self._roidb.extend(roidb_flipped)
Example #10
0
 def append_flipped_images(self):
     """Only flip boxes coordinates, images will be flipped when loading into network"""
     logger.info('%s append flipped images to roidb' % self._name)
     roidb_flipped = []
     for roi_rec in self._roidb:
         boxes = roi_rec['boxes'].copy()
         oldx1 = boxes[:, 0].copy()
         oldx2 = boxes[:, 2].copy()
         boxes[:, 0] = roi_rec['width'] - oldx2 - 1
         boxes[:, 2] = roi_rec['width'] - oldx1 - 1
         assert (boxes[:, 2] >= boxes[:, 0]).all()
         roi_rec_flipped = roi_rec.copy()
         roi_rec_flipped['boxes'] = boxes
         roi_rec_flipped['flipped'] = True
         roidb_flipped.append(roi_rec_flipped)
     self._roidb.extend(roidb_flipped)
Example #11
0
 def _write_pascal_results(self, all_boxes):
     for cls_ind, cls in enumerate(self.classes):
         if cls == '__background__':
             continue
         logger.info('Writing %s VOC results file' % cls)
         filename = self._result_file_tmpl.format(cls)
         with open(filename, 'wt') as f:
             for im_ind, roi_rec in enumerate(self.roidb):
                 index = roi_rec['index']
                 dets = all_boxes[cls_ind][im_ind]
                 if len(dets) == 0:
                     continue
                 # the VOCdevkit expects 1-based indices
                 for k in range(dets.shape[0]):
                     f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
                             format(index, dets[k, -1],
                                    dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1))
Example #12
0
 def __init__(self, image_set, root_path, data_path):
     """
     fill basic information to initialize imdb
     :param image_set: train2017, val2017
     :param root_path: 'data', will write 'cache'
     :param data_path: 'data/coco', load data and write results
     """
     super(coco, self).__init__('coco_' + image_set, root_path)
     # example: annotations/instances_train2017.json
     self._anno_file = os.path.join(data_path, 'annotations', 'instances_' + image_set + '.json')
     # example train2017/000000119993.jpg
     self._image_file_tmpl = os.path.join(data_path, image_set, '{}')
     # example detections_val2017_results.json
     self._result_file = os.path.join(data_path, 'detections_{}_results.json'.format(image_set))
     # get roidb
     self._roidb = self._get_cached('roidb', self._load_gt_roidb)
     logger.info('%s num_images %d' % (self.name, self.num_images))
Example #13
0
 def _write_pascal_results(self, all_boxes):
     for cls_ind, cls in enumerate(self.classes):
         if cls == '__background__':
             continue
         logger.info('Writing %s VOC results file' % cls)
         filename = self._result_file_tmpl.format(cls)
         with open(filename, 'wt') as f:
             for im_ind, roi_rec in enumerate(self.roidb):
                 index = roi_rec['index']
                 dets = all_boxes[cls_ind][im_ind]
                 if len(dets) == 0:
                     continue
                 # the VOCdevkit expects 1-based indices
                 for k in range(dets.shape[0]):
                     f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
                             format(index, dets[k, -1],
                                    dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1))
Example #14
0
 def _write_coco_results(self, _coco, detections):
     """ example results
     [{"image_id": 42,
       "category_id": 18,
       "bbox": [258.15,41.29,348.26,243.78],
       "score": 0.236}, ...]
     """
     cats = [cat['name'] for cat in _coco.loadCats(_coco.getCatIds())]
     class_to_coco_ind = dict(zip(cats, _coco.getCatIds()))
     results = []
     for cls_ind, cls in enumerate(self.classes):
         if cls == '__background__':
             continue
         logger.info('collecting %s results (%d/%d)' % (cls, cls_ind, self.num_classes - 1))
         coco_cat_id = class_to_coco_ind[cls]
         results.extend(self._coco_results_one_category(detections[cls_ind], coco_cat_id))
     logger.info('writing results json to %s' % self._result_file)
     with open(self._result_file, 'w') as f:
         json.dump(results, f, sort_keys=True, indent=4)
Example #15
0
    def __init__(self, image_set, root_path, data_path):
        """
        fill basic information to initialize imdb
        :param image_set: train2017, val2017
        :param root_path: 'data', will write 'cache'
        :param data_path: 'data/coco', load data and write results
        """
        super(coco, self).__init__('coco_' + image_set, root_path)
        # example: annotations/instances_train2017.json
        self._anno_file = os.path.join(data_path, 'annotations',
                                       'instances_' + image_set + '.json')
        # example train2017/000000119993.jpg
        self._image_file_tmpl = os.path.join(data_path, image_set, '{}')
        # example detections_val2017_results.json
        self._result_file = os.path.join(
            data_path, 'detections_{}_results.json'.format(image_set))
        # get roidb
        '''
        if("custom" in image_set):
            self.classes = ['__background__'];
            self._classes_file = os.path.join(data_path, 'annotations', 'classes.txt');
            f = open(self._classes_file);
            lines = f.readlines();
            f.close();

            for i in range(len(lines)):
                self.classes.append(lines[i][:len(lines[i])-1]);
        '''

        self.classes = ['__background__']
        self._classes_file = os.path.join(data_path, 'annotations',
                                          'classes.txt')
        f = open(self._classes_file)
        lines = f.readlines()
        f.close()

        for i in range(len(lines)):
            self.classes.append(lines[i][:len(lines[i]) - 1])

        self._roidb = self._get_cached('roidb', self._load_gt_roidb)
        logger.info('%s num_images %d' % (self.name, self.num_images))
Example #16
0
    def _do_python_eval(self, all_boxes, use_07_metric):
        aps = []
        for cls_ind, cls in enumerate(self.classes):
            if cls == '__background__':
                continue
            # class_anno is a dict [image_index, [bbox, difficult, det]]
            class_anno = {}
            npos = 0
            for roi_rec in self.roidb:
                index = roi_rec['index']
                objects = [obj for obj in roi_rec['objs'] if obj['name'] == cls]
                bbox = np.array([x['bbox'] for x in objects])
                difficult = np.array([x['difficult'] for x in objects]).astype(np.bool)
                det = [False] * len(objects)  # stand for detected
                npos = npos + sum(~difficult)
                class_anno[index] = {'bbox': bbox,
                                     'difficult': difficult,
                                     'det': det}

            # bbox is 2d array of all detections, corresponding to each image_id
            image_ids = []
            bbox = []
            confidence = []
            for im_ind, dets in enumerate(all_boxes[cls_ind]):
                for k in range(dets.shape[0]):
                    image_ids.append(self.roidb[im_ind]['index'])
                    bbox.append([dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1])
                    confidence.append(dets[k, -1])
            bbox = np.array(bbox)
            confidence = np.array(confidence)

            rec, prec, ap = self.voc_eval(class_anno, npos, image_ids, bbox, confidence,
                                          ovthresh=0.5, use_07_metric=use_07_metric)
            aps.append(ap)

        for cls, ap in zip(self.classes, aps):
            logger.info('AP for {} = {:.4f}'.format(cls, ap))
        logger.info('Mean AP = {:.4f}'.format(np.mean(aps)))
Example #17
0
 def _get_cached(self, cache_item, fn):
     cache_path = os.path.join(self._root_path, 'cache', '{}_{}.pkl'.format(self._name, cache_item))
     if os.path.exists(cache_path):
         logger.info('loading cache {}'.format(cache_path))
         with open(cache_path, 'rb') as fid:
             cached = pickle.load(fid)
         return cached
     else:
         logger.info('computing cache {}'.format(cache_path))
         cached = fn()
         logger.info('saving cache {}'.format(cache_path))
         with open(cache_path, 'wb') as fid:
             pickle.dump(cached, fid, pickle.HIGHEST_PROTOCOL)
         return cached
Example #18
0
 def _get_cached(self, cache_item, fn):
     cache_path = os.path.join(self._root_path, 'cache', '{}_{}.pkl'.format(self._name, cache_item))
     if os.path.exists(cache_path):
         logger.info('loading cache {}'.format(cache_path))
         with open(cache_path, 'rb') as fid:
             cached = pickle.load(fid)
         return cached
     else:
         logger.info('computing cache {}'.format(cache_path))
         cached = fn()
         logger.info('saving cache {}'.format(cache_path))
         with open(cache_path, 'wb') as fid:
             pickle.dump(cached, fid, pickle.HIGHEST_PROTOCOL)
         return cached
Example #19
0
def test_net(sym, imdb, args):
    # print config
    logger.info('called with args\n{}'.format(pprint.pformat(vars(args))))

    # setup context
    ctx = mx.gpu(args.gpu)

    # load testing data
    test_data = TestLoader(imdb.roidb,
                           batch_size=1,
                           short=args.img_short_side,
                           max_size=args.img_long_side,
                           mean=args.img_pixel_means,
                           std=args.img_pixel_stds)

    # load params
    arg_params, aux_params = load_param(args.params, ctx=ctx)

    # produce shape max possible
    data_names = ['data', 'im_info']
    label_names = None
    data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)),
                   ('im_info', (1, 3))]
    label_shapes = None

    # check shapes
    check_shape(sym, data_shapes, arg_params, aux_params)

    # create and bind module
    mod = Module(sym, data_names, label_names, context=ctx)
    mod.bind(data_shapes, label_shapes, for_training=False)
    mod.init_params(arg_params=arg_params, aux_params=aux_params)

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    results_list = []
    all_boxes = [[[] for _ in range(imdb.num_images)]
                 for _ in range(imdb.num_classes)]
    all_masks = [[[] for _ in range(imdb.num_images)]
                 for _ in range(imdb.num_classes)]
    all_rois = [[[] for _ in range(imdb.num_images)]
                for _ in range(imdb.num_classes)]

    # start detection
    with tqdm(total=imdb.num_images) as pbar:
        for i, data_batch in enumerate(test_data):
            # forward
            im_info = data_batch.data[1][0]
            mod.forward(data_batch)
            rois, scores, bbox_deltas, mask_prob = mod.get_outputs()
            rois = rois[:, 1:]
            scores = scores[0]
            bbox_deltas = bbox_deltas[0]

            det, masks, rois_out = im_detect(rois,
                                             scores,
                                             bbox_deltas,
                                             mask_prob,
                                             im_info,
                                             bbox_stds=args.rcnn_bbox_stds,
                                             nms_thresh=args.rcnn_nms_thresh,
                                             conf_thresh=args.rcnn_conf_thresh)
            # print(det.shape, masks.shape)
            for j in range(1, imdb.num_classes):
                indexes = np.where(det[:, 0] == j)[0]
                all_boxes[j][i] = np.concatenate((det[:, -4:], det[:, [1]]),
                                                 axis=-1)[indexes, :]
                # print(type(masks), type(rois_out))
                all_masks[j][i] = masks[indexes]
                all_rois[j][i] = rois_out[indexes]

            boxes_this_image = [[]] + [
                all_boxes[cls_ind][i]
                for cls_ind in range(1, imdb.num_classes)
            ]
            masks_this_image = [[]] + [
                all_masks[cls_ind][i]
                for cls_ind in range(1, imdb.num_classes)
            ]
            rois_this_image = [[]] + [
                all_rois[cls_ind][i] for cls_ind in range(1, imdb.num_classes)
            ]
            results_list.append({
                'image': '{}.png'.format(i),
                'im_info': im_info.asnumpy(),
                'boxes': boxes_this_image,
                'masks': masks_this_image,
                'rois': rois_this_image
            })

            pbar.update(data_batch.data[0].shape[0])

    # evaluate model
    results_pack = {
        'all_boxes': all_boxes,
        'all_masks': all_masks,
        'results_list': results_list
    }
    imdb.evaluate_mask(results_pack)
Example #20
0
def test_net(sym, imdb, args, config):
    logger.addHandler(
        logging.FileHandler("{0}/{1}".format(args.prefix, 'test.log')))
    # print config
    logger.info('called with args\n{}'.format(pprint.pformat(vars(args))))

    # setup context
    ctx = mx.gpu(args.gpu)

    # load testing data
    test_data = TestLoader(imdb.roidb,
                           batch_size=1,
                           short=args.img_short_side,
                           max_size=args.img_long_side,
                           mean=config.transform['img_pixel_means'],
                           std=config.transform['img_pixel_stds'])

    # load params
    arg_params, aux_params = load_param(args.params, ctx=ctx)

    # produce shape max possible
    data_names = ['data', 'im_info']
    label_names = None
    data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)),
                   ('im_info', (1, 3))]
    label_shapes = None

    # check shapes
    check_shape(sym, data_shapes, arg_params, aux_params)

    # create and bind module
    mod = Module(sym, data_names, label_names, context=ctx)
    mod.bind(data_shapes, label_shapes, for_training=False)
    mod.init_params(arg_params=arg_params, aux_params=aux_params)

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(imdb.num_images)]
                 for _ in range(imdb.num_classes)]

    # start detection
    with tqdm(total=imdb.num_images) as pbar:
        for i, data_batch in enumerate(test_data):
            # forward
            im_info = data_batch.data[1][0]
            mod.forward(data_batch)
            rois, scores, bbox_deltas = mod.get_outputs()
            rois = rois[:, 1:]
            scores = scores[0]
            bbox_deltas = bbox_deltas[0]

            det = im_detect(rois,
                            scores,
                            bbox_deltas,
                            im_info,
                            bbox_stds=args.rcnn_bbox_stds,
                            nms_thresh=args.rcnn_nms_thresh,
                            conf_thresh=args.rcnn_conf_thresh)
            for j in range(1, imdb.num_classes):
                indexes = np.where(det[:, 0] == j)[0]
                all_boxes[j][i] = np.concatenate((det[:, -4:], det[:, [1]]),
                                                 axis=-1)[indexes, :]
            pbar.update(data_batch.data[0].shape[0])

    # evaluate model
    imdb.evaluate_detections(all_boxes)
Example #21
0
def train(sym, roidb):
    '''
    User function: Start training

    Args:
        sym (mxnet model): Mxnet model returned from set_network() function
        roidb (dataloader): Dataloader returned from set_model() function

    Returns:
        None
    '''



    # print config
    #logger.info('called with system_dict\n{}'.format(pprint.pformat(vars(system_dict))))
    #print(system_dict)

    # setup multi-gpu
    if(len(system_dict["gpus"]) == 0):
        ctx = [mx.cpu(0)];
    else:
        ctx = [mx.gpu(int(i)) for i in system_dict["gpus"]]
    batch_size = system_dict["rcnn_batch_size"] * len(ctx)

    # load training data
    feat_sym = sym.get_internals()['rpn_cls_score_output']
    ag = AnchorGenerator(feat_stride=system_dict["rpn_feat_stride"],
                         anchor_scales=system_dict["rpn_anchor_scales"], anchor_ratios=system_dict["rpn_anchor_ratios"])
    asp = AnchorSampler(allowed_border=system_dict["rpn_allowed_border"], batch_rois=system_dict["rpn_batch_rois"],
                        fg_fraction=system_dict["rpn_fg_fraction"], fg_overlap=system_dict["rpn_fg_overlap"],
                        bg_overlap=system_dict["rpn_bg_overlap"])
    train_data = AnchorLoader(roidb, batch_size, system_dict["img_short_side"], system_dict["img_long_side"],
                              system_dict["img_pixel_means"], system_dict["img_pixel_stds"], feat_sym, ag, asp, shuffle=True)

    # produce shape max possible
    _, out_shape, _ = feat_sym.infer_shape(data=(1, 3, system_dict["img_long_side"], system_dict["img_long_side"]))
    feat_height, feat_width = out_shape[0][-2:]
    rpn_num_anchors = len(system_dict["rpn_anchor_scales"]) * len(system_dict["rpn_anchor_ratios"])
    data_names = ['data', 'im_info', 'gt_boxes']
    label_names = ['label', 'bbox_target', 'bbox_weight']
    data_shapes = [('data', (batch_size, 3, system_dict["img_long_side"], system_dict["img_long_side"])),
                   ('im_info', (batch_size, 3)),
                   ('gt_boxes', (batch_size, 100, 5))]
    label_shapes = [('label', (batch_size, 1, rpn_num_anchors * feat_height, feat_width)),
                    ('bbox_target', (batch_size, 4 * rpn_num_anchors, feat_height, feat_width)),
                    ('bbox_weight', (batch_size, 4 * rpn_num_anchors, feat_height, feat_width))]

    # print shapes
    data_shape_dict, out_shape_dict = infer_data_shape(sym, data_shapes + label_shapes)
    logger.info('max input shape\n%s' % pprint.pformat(data_shape_dict))
    logger.info('max output shape\n%s' % pprint.pformat(out_shape_dict))

    # load and initialize params
    if system_dict["resume"]:
        arg_params, aux_params = load_param(system_dict["resume"])
    else:
        arg_params, aux_params = load_param(system_dict["pretrained"])
        arg_params, aux_params = initialize_frcnn(sym, data_shapes, arg_params, aux_params)

    # check parameter shapes
    check_shape(sym, data_shapes + label_shapes, arg_params, aux_params)

    # check fixed params
    fixed_param_names = get_fixed_params(sym, system_dict["net_fixed_params"])
    logger.info('locking params\n%s' % pprint.pformat(fixed_param_names))

    # metric
    rpn_eval_metric = RPNAccMetric()
    rpn_cls_metric = RPNLogLossMetric()
    rpn_bbox_metric = RPNL1LossMetric()
    eval_metric = RCNNAccMetric()
    cls_metric = RCNNLogLossMetric()
    bbox_metric = RCNNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)

    # callback
    batch_end_callback = mx.callback.Speedometer(batch_size, frequent=system_dict["log_interval"], auto_reset=False)
    epoch_end_callback = mx.callback.do_checkpoint(system_dict["save_prefix"])

    # learning schedule
    base_lr = system_dict["lr"]
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in system_dict["lr_decay_epoch"].split(',')]
    lr_epoch_diff = [epoch - system_dict["start_epoch"] for epoch in lr_epoch if epoch > system_dict["start_epoch"]]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
    logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters))
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    optimizer_params = {'momentum': 0.9,
                        'wd': 0.0005,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': (1.0 / batch_size),
                        'clip_gradient': 5}

    # train
    mod = Module(sym, data_names=data_names, label_names=label_names,
                 logger=logger, context=ctx, work_load_list=None,
                 fixed_param_names=fixed_param_names)
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore='device',
            optimizer='sgd', optimizer_params=optimizer_params,
            arg_params=arg_params, aux_params=aux_params, begin_epoch=system_dict["start_epoch"], num_epoch=system_dict["epochs"])
Example #22
0
def train_net(sym, roidb, args):
    # print config
    logger.info('called with args\n{}'.format(pprint.pformat(vars(args))))

    # setup multi-gpu
    ctx = [mx.cpu()] if not args.gpus else [
        mx.gpu(int(i)) for i in args.gpus.split(',')
    ]
    batch_size = args.rcnn_batch_size * len(ctx)

    # load training data
    feat_sym = sym.get_internals()['rpn_cls_score_output']
    ag = AnchorGenerator(feat_stride=args.rpn_feat_stride,
                         anchor_scales=args.rpn_anchor_scales,
                         anchor_ratios=args.rpn_anchor_ratios)
    asp = AnchorSampler(allowed_border=args.rpn_allowed_border,
                        batch_rois=args.rpn_batch_rois,
                        fg_fraction=args.rpn_fg_fraction,
                        fg_overlap=args.rpn_fg_overlap,
                        bg_overlap=args.rpn_bg_overlap)
    train_data = AnchorLoader(roidb,
                              batch_size,
                              args.img_short_side,
                              args.img_long_side,
                              args.img_pixel_means,
                              args.img_pixel_stds,
                              feat_sym,
                              ag,
                              asp,
                              shuffle=True)

    # produce shape max possible
    _, out_shape, _ = feat_sym.infer_shape(data=(1, 3, args.img_long_side,
                                                 args.img_long_side))
    feat_height, feat_width = out_shape[0][-2:]
    rpn_num_anchors = len(args.rpn_anchor_scales) * len(args.rpn_anchor_ratios)
    data_names = ['data', 'im_info', 'gt_boxes']
    label_names = ['label', 'bbox_target', 'bbox_weight']
    data_shapes = [('data', (batch_size, 3,
                             args.img_long_side, args.img_long_side)),
                   ('im_info', (batch_size, 3)),
                   ('gt_boxes', (batch_size, 100, 5))]
    label_shapes = [('label', (batch_size, 1, rpn_num_anchors * feat_height,
                               feat_width)),
                    ('bbox_target', (batch_size, 4 * rpn_num_anchors,
                                     feat_height, feat_width)),
                    ('bbox_weight', (batch_size, 4 * rpn_num_anchors,
                                     feat_height, feat_width))]

    # print shapes
    data_shape_dict, out_shape_dict = infer_data_shape(
        sym, data_shapes + label_shapes)
    logger.info('max input shape\n%s' % pprint.pformat(data_shape_dict))
    logger.info('max output shape\n%s' % pprint.pformat(out_shape_dict))

    # load and initialize params
    if args.resume:
        arg_params, aux_params = load_param(args.resume)
    else:
        arg_params, aux_params = load_param(args.pretrained)
        arg_params, aux_params = initialize_frcnn(sym, data_shapes, arg_params,
                                                  aux_params)

    # check parameter shapes
    check_shape(sym, data_shapes + label_shapes, arg_params, aux_params)

    # check fixed params
    fixed_param_names = get_fixed_params(sym, args.net_fixed_params)
    logger.info('locking params\n%s' % pprint.pformat(fixed_param_names))

    # metric
    rpn_eval_metric = RPNAccMetric()
    rpn_cls_metric = RPNLogLossMetric()
    rpn_bbox_metric = RPNL1LossMetric()
    eval_metric = RCNNAccMetric()
    cls_metric = RCNNLogLossMetric()
    bbox_metric = RCNNL1LossMetric()
    eval_metrics = mx.gluon.metric.CompositeEvalMetric()
    for child_metric in [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric,
            cls_metric, bbox_metric
    ]:
        eval_metrics.add(child_metric)

    # callback
    batch_end_callback = mx.callback.Speedometer(batch_size,
                                                 frequent=args.log_interval,
                                                 auto_reset=False)
    epoch_end_callback = mx.callback.do_checkpoint(args.save_prefix)

    # learning schedule
    base_lr = args.lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in args.lr_decay_epoch.split(',')]
    lr_epoch_diff = [
        epoch - args.start_epoch for epoch in lr_epoch
        if epoch > args.start_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    logger.info('lr %f lr_epoch_diff %s lr_iters %s' %
                (lr, lr_epoch_diff, lr_iters))
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    optimizer_params = {
        'momentum': 0.9,
        'wd': 0.0005,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': (1.0 / batch_size),
        'clip_gradient': 5
    }

    # train
    mod = Module(sym,
                 data_names=data_names,
                 label_names=label_names,
                 logger=logger,
                 context=ctx,
                 work_load_list=None,
                 fixed_param_names=fixed_param_names)
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore='device',
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=args.start_epoch,
            num_epoch=args.epochs)
Example #23
0
 def filter_roidb(self):
     """Remove images without usable rois"""
     num_roidb = len(self._roidb)
     self._roidb = [roi_rec for roi_rec in self._roidb if len(roi_rec['gt_classes'])]
     num_after = len(self._roidb)
     logger.info('filter roidb: {} -> {}'.format(num_roidb, num_after))
Example #24
0
 def evaluate_detections(self, detections, **kwargs):
     cache_path = os.path.join(self._root_path, 'cache', '{}_{}.pkl'.format(self._name, 'detections'))
     logger.info('saving cache {}'.format(cache_path))
     with open(cache_path, 'wb') as fid:
         pickle.dump(detections, fid, pickle.HIGHEST_PROTOCOL)
     self._evaluate_detections(detections, **kwargs)
Example #25
0
def train_net(sym, roidb, args):
    # print config
    logger.info('called with args\n{}'.format(pprint.pformat(vars(args))))

    # setup multi-gpu
    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')]
    batch_size = args.rcnn_batch_size * len(ctx)

    # load training data
    feat_sym = sym.get_internals()['rpn_cls_score_output']
    ag = AnchorGenerator(feat_stride=args.rpn_feat_stride,
                         anchor_scales=args.rpn_anchor_scales, anchor_ratios=args.rpn_anchor_ratios)
    asp = AnchorSampler(allowed_border=args.rpn_allowed_border, batch_rois=args.rpn_batch_rois,
                        fg_fraction=args.rpn_fg_fraction, fg_overlap=args.rpn_fg_overlap,
                        bg_overlap=args.rpn_bg_overlap)
    train_data = AnchorLoader(roidb, batch_size, args.img_short_side, args.img_long_side,
                              args.img_pixel_means, args.img_pixel_stds, feat_sym, ag, asp, shuffle=True)

    # produce shape max possible
    _, out_shape, _ = feat_sym.infer_shape(data=(1, 3, args.img_long_side, args.img_long_side))
    feat_height, feat_width = out_shape[0][-2:]
    rpn_num_anchors = len(args.rpn_anchor_scales) * len(args.rpn_anchor_ratios)
    data_names = ['data', 'im_info', 'gt_boxes']
    label_names = ['label', 'bbox_target', 'bbox_weight']
    data_shapes = [('data', (batch_size, 3, args.img_long_side, args.img_long_side)),
                   ('im_info', (batch_size, 3)),
                   ('gt_boxes', (batch_size, 100, 5))]
    label_shapes = [('label', (batch_size, 1, rpn_num_anchors * feat_height, feat_width)),
                    ('bbox_target', (batch_size, 4 * rpn_num_anchors, feat_height, feat_width)),
                    ('bbox_weight', (batch_size, 4 * rpn_num_anchors, feat_height, feat_width))]

    # print shapes
    data_shape_dict, out_shape_dict = infer_data_shape(sym, data_shapes + label_shapes)
    logger.info('max input shape\n%s' % pprint.pformat(data_shape_dict))
    logger.info('max output shape\n%s' % pprint.pformat(out_shape_dict))

    # load and initialize params
    if args.resume:
        arg_params, aux_params = load_param(args.resume)
    else:
        arg_params, aux_params = load_param(args.pretrained)
        arg_params, aux_params = initialize_frcnn(sym, data_shapes, arg_params, aux_params)

    # check parameter shapes
    check_shape(sym, data_shapes + label_shapes, arg_params, aux_params)

    # check fixed params
    fixed_param_names = get_fixed_params(sym, args.net_fixed_params)
    logger.info('locking params\n%s' % pprint.pformat(fixed_param_names))

    # metric
    rpn_eval_metric = RPNAccMetric()
    rpn_cls_metric = RPNLogLossMetric()
    rpn_bbox_metric = RPNL1LossMetric()
    eval_metric = RCNNAccMetric()
    cls_metric = RCNNLogLossMetric()
    bbox_metric = RCNNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)

    # callback
    batch_end_callback = mx.callback.Speedometer(batch_size, frequent=args.log_interval, auto_reset=False)
    epoch_end_callback = mx.callback.do_checkpoint(args.save_prefix)

    # learning schedule
    base_lr = args.lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in args.lr_decay_epoch.split(',')]
    lr_epoch_diff = [epoch - args.start_epoch for epoch in lr_epoch if epoch > args.start_epoch]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
    logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters))
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    optimizer_params = {'momentum': 0.9,
                        'wd': 0.0005,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': (1.0 / batch_size),
                        'clip_gradient': 5}

    # train
    mod = Module(sym, data_names=data_names, label_names=label_names,
                 logger=logger, context=ctx, work_load_list=None,
                 fixed_param_names=fixed_param_names)
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore='device',
            optimizer='sgd', optimizer_params=optimizer_params,
            arg_params=arg_params, aux_params=aux_params, begin_epoch=args.start_epoch, num_epoch=args.epochs)