Esempio n. 1
0
    def _load_pascal3d_voxel_exemplar_annotation(self, index):
        """
        Load image and bounding boxes info from txt file in the pascal subcategory exemplar format.
        """

        if self._image_set == 'val':
            return self._load_pascal_annotation(index)

        filename = os.path.join(self._pascal3d_path, cfg.SUBCLS_NAME,
                                index + '.txt')
        assert os.path.exists(filename), \
                'Path does not exist: {}'.format(filename)

        # the annotation file contains flipped objects
        lines = []
        lines_flipped = []
        with open(filename) as f:
            for line in f:
                words = line.split()
                subcls = int(words[1])
                is_flip = int(words[2])
                if subcls != -1:
                    if is_flip == 0:
                        lines.append(line)
                    else:
                        lines_flipped.append(line)

        num_objs = len(lines)

        # store information of flipped objects
        assert (num_objs == len(lines_flipped)
                ), 'The number of flipped objects is not the same!'
        gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32)

        for ix, line in enumerate(lines_flipped):
            words = line.split()
            subcls = int(words[1])
            gt_subclasses_flipped[ix] = subcls

        boxes = np.zeros((num_objs, 4), dtype=np.float32)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        gt_subclasses = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes_flipped = np.zeros((num_objs, self.num_classes),
                                      dtype=np.int32)

        for ix, line in enumerate(lines):
            words = line.split()
            cls = self._class_to_ind[words[0]]
            subcls = int(words[1])
            # Make pixel indexes 0-based
            boxes[ix, :] = [float(n) - 1 for n in words[3:7]]
            gt_classes[ix] = cls
            gt_subclasses[ix] = subcls
            overlaps[ix, cls] = 1.0
            subindexes[ix, cls] = subcls
            subindexes_flipped[ix, cls] = gt_subclasses_flipped[ix]

        overlaps = scipy.sparse.csr_matrix(overlaps)
        subindexes = scipy.sparse.csr_matrix(subindexes)
        subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)

        if cfg.IS_RPN:
            if cfg.IS_MULTISCALE:
                # compute overlaps between grid boxes and gt boxes in multi-scales
                # rescale the gt boxes
                boxes_all = np.zeros((0, 4), dtype=np.float32)
                for scale in cfg.TRAIN.SCALES:
                    boxes_all = np.vstack((boxes_all, boxes * scale))
                gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES))

                # compute grid boxes
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]
                boxes_grid, _, _ = get_boxes_grid(image_height, image_width)

                # compute overlap
                overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float),
                                              boxes_all.astype(np.float))

                # check how many gt boxes are covered by grids
                if num_objs != 0:
                    index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES))
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes_all == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])
                    index_covered = np.unique(index[fg_inds])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[index_covered] == i)[0])
            else:
                assert len(cfg.TRAIN.SCALES_BASE) == 1
                scale = cfg.TRAIN.SCALES_BASE[0]
                feat_stride = 16
                # faster rcnn region proposal
                base_size = 16
                ratios = [3.0, 2.0, 1.5, 1.0, 0.75, 0.5, 0.25]
                scales = 2**np.arange(1, 6, 0.5)
                anchors = generate_anchors(base_size, ratios, scales)
                num_anchors = anchors.shape[0]

                # image size
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]

                # height and width of the heatmap
                height = np.round((image_height * scale - 1) / 4.0 + 1)
                height = np.floor((height - 1) / 2 + 1 + 0.5)
                height = np.floor((height - 1) / 2 + 1 + 0.5)

                width = np.round((image_width * scale - 1) / 4.0 + 1)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)

                # gt boxes
                gt_boxes = boxes * scale

                # 1. Generate proposals from bbox deltas and shifted anchors
                shift_x = np.arange(0, width) * feat_stride
                shift_y = np.arange(0, height) * feat_stride
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack(
                    (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                     shift_y.ravel())).transpose()
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = num_anchors
                K = shifts.shape[0]
                all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape(
                    (1, K, 4)).transpose((1, 0, 2)))
                all_anchors = all_anchors.reshape((K * A, 4))

                # compute overlap
                overlaps_grid = bbox_overlaps(all_anchors.astype(np.float),
                                              gt_boxes.astype(np.float))

                # check how many gt boxes are covered by anchors
                if num_objs != 0:
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[fg_inds] == i)[0])

        return {
            'boxes': boxes,
            'gt_classes': gt_classes,
            'gt_subclasses': gt_subclasses,
            'gt_subclasses_flipped': gt_subclasses_flipped,
            'gt_overlaps': overlaps,
            'gt_subindexes': subindexes,
            'gt_subindexes_flipped': subindexes_flipped,
            'flipped': False
        }
Esempio n. 2
0
    def _load_kitti_voxel_exemplar_annotation(self, index):
        """
        Load image and bounding boxes info from txt file in the KITTI voxel exemplar format.
        """
        if self._image_set == 'training' and self._seq_name != 'trainval':
            prefix = 'train'
        elif self._image_set == 'training':
            prefix = 'trainval'
        else:
            prefix = ''

        if prefix == '':
            lines = []
            lines_flipped = []
        else:
            filename = os.path.join(self._kitti_tracking_path, cfg.SUBCLS_NAME, prefix, index + '.txt')
            if os.path.exists(filename):
                print filename

                # the annotation file contains flipped objects    
                lines = []
                lines_flipped = []
                with open(filename) as f:
                    for line in f:
                        words = line.split()
                        subcls = int(words[1])
                        is_flip = int(words[2])
                        if subcls != -1:
                            if is_flip == 0:
                                lines.append(line)
                            else:
                                lines_flipped.append(line)
            else:
                lines = []
                lines_flipped = []
        
        num_objs = len(lines)

        # store information of flipped objects
        assert (num_objs == len(lines_flipped)), 'The number of flipped objects is not the same!'
        gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32)
        
        for ix, line in enumerate(lines_flipped):
            words = line.split()
            subcls = int(words[1])
            gt_subclasses_flipped[ix] = subcls

        boxes = np.zeros((num_objs, 4), dtype=np.float32)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        gt_subclasses = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32)

        for ix, line in enumerate(lines):
            words = line.split()
            cls = self._class_to_ind[words[0]]
            subcls = int(words[1])
            boxes[ix, :] = [float(n) for n in words[3:7]]
            gt_classes[ix] = cls
            gt_subclasses[ix] = subcls
            overlaps[ix, cls] = 1.0
            subindexes[ix, cls] = subcls
            subindexes_flipped[ix, cls] = gt_subclasses_flipped[ix]

        overlaps = scipy.sparse.csr_matrix(overlaps)
        subindexes = scipy.sparse.csr_matrix(subindexes)
        subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)

        if cfg.IS_RPN:
            if cfg.IS_MULTISCALE:
                # compute overlaps between grid boxes and gt boxes in multi-scales
                # rescale the gt boxes
                boxes_all = np.zeros((0, 4), dtype=np.float32)
                for scale in cfg.TRAIN.SCALES:
                    boxes_all = np.vstack((boxes_all, boxes * scale))
                gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES))

                # compute grid boxes
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]
                boxes_grid, _, _ = get_boxes_grid(image_height, image_width)

                # compute overlap
                overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float))
        
                # check how many gt boxes are covered by grids
                if num_objs != 0:
                    index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES))
                    max_overlaps = overlaps_grid.max(axis = 0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(np.where((gt_classes_all == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0])
                    index_covered = np.unique(index[fg_inds])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(np.where(gt_classes[index_covered] == i)[0])
            else:
                assert len(cfg.TRAIN.SCALES_BASE) == 1
                scale = cfg.TRAIN.SCALES_BASE[0]
                feat_stride = 16
                # faster rcnn region proposal
                base_size = 16
                ratios = [3.0, 2.0, 1.5, 1.0, 0.75, 0.5, 0.25]
                scales = 2**np.arange(1, 6, 0.5)
                anchors = generate_anchors(base_size, ratios, scales)
                num_anchors = anchors.shape[0]

                # image size
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]

                # height and width of the heatmap
                height = np.round((image_height * scale - 1) / 4.0 + 1)
                height = np.floor((height - 1) / 2 + 1 + 0.5)
                height = np.floor((height - 1) / 2 + 1 + 0.5)

                width = np.round((image_width * scale - 1) / 4.0 + 1)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)

                # gt boxes
                gt_boxes = boxes * scale

                # 1. Generate proposals from bbox deltas and shifted anchors
                shift_x = np.arange(0, width) * feat_stride
                shift_y = np.arange(0, height) * feat_stride
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = num_anchors
                K = shifts.shape[0]
                all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
                all_anchors = all_anchors.reshape((K * A, 4))

                # compute overlap
                overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float))
        
                # check how many gt boxes are covered by anchors
                if num_objs != 0:
                    max_overlaps = overlaps_grid.max(axis = 0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(np.where((gt_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(np.where(gt_classes[fg_inds] == i)[0])

        return {'boxes' : boxes,
                'gt_classes': gt_classes,
                'gt_subclasses': gt_subclasses,
                'gt_subclasses_flipped': gt_subclasses_flipped,
                'gt_overlaps': overlaps,
                'gt_subindexes': subindexes, 
                'gt_subindexes_flipped': subindexes_flipped, 
                'flipped' : False}
Esempio n. 3
0
    def _load_pascal_annotation(self, index):
        """
        Load image and bounding boxes info from XML file in the PASCAL VOC
        format.
        """
        filename = os.path.join(self._data_path, 'Annotations', index + '.xml')

        # print 'Loading: {}'.format(filename)
        def get_data_from_tag(node, tag):
            return node.getElementsByTagName(tag)[0].childNodes[0].data

        with open(filename) as f:
            data = minidom.parseString(f.read())

        objs = data.getElementsByTagName('object')
        num_objs = len(objs)

        boxes = np.zeros((num_objs, 4), dtype=np.uint16)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)

        # Load object bounding boxes into a data frame.
        for ix, obj in enumerate(objs):
            # Make pixel indexes 0-based
            x1 = float(get_data_from_tag(obj, 'xmin')) - 1
            y1 = float(get_data_from_tag(obj, 'ymin')) - 1
            x2 = float(get_data_from_tag(obj, 'xmax')) - 1
            y2 = float(get_data_from_tag(obj, 'ymax')) - 1
            name = str(get_data_from_tag(obj, "name")).lower().strip()
            if name in self._classes:
                cls = self._class_to_ind[name]
            else:
                cls = 0
            boxes[ix, :] = [x1, y1, x2, y2]
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0

        overlaps = scipy.sparse.csr_matrix(overlaps)
        gt_subclasses = np.zeros((num_objs), dtype=np.int32)
        gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32)
        subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes_flipped = np.zeros((num_objs, self.num_classes),
                                      dtype=np.int32)
        subindexes = scipy.sparse.csr_matrix(subindexes)
        subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)

        if cfg.IS_RPN:
            if cfg.IS_MULTISCALE:
                # compute overlaps between grid boxes and gt boxes in multi-scales
                # rescale the gt boxes
                boxes_all = np.zeros((0, 4), dtype=np.float32)
                for scale in cfg.TRAIN.SCALES:
                    boxes_all = np.vstack((boxes_all, boxes * scale))
                gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES))

                # compute grid boxes
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]
                boxes_grid, _, _ = get_boxes_grid(image_height, image_width)

                # compute overlap
                overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float),
                                              boxes_all.astype(np.float))

                # check how many gt boxes are covered by grids
                if num_objs != 0:
                    index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES))
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes_all == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])
                    index_covered = np.unique(index[fg_inds])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[index_covered] == i)[0])
            else:
                assert len(cfg.TRAIN.SCALES_BASE) == 1
                scale = cfg.TRAIN.SCALES_BASE[0]
                feat_stride = 16
                # faster rcnn region proposal
                anchors = generate_anchors()
                num_anchors = anchors.shape[0]

                # image size
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]

                # height and width of the heatmap
                height = np.round((image_height * scale - 1) / 4.0 + 1)
                height = np.floor((height - 1) / 2 + 1 + 0.5)
                height = np.floor((height - 1) / 2 + 1 + 0.5)

                width = np.round((image_width * scale - 1) / 4.0 + 1)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)

                # gt boxes
                gt_boxes = boxes * scale

                # 1. Generate proposals from bbox deltas and shifted anchors
                shift_x = np.arange(0, width) * feat_stride
                shift_y = np.arange(0, height) * feat_stride
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack(
                    (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                     shift_y.ravel())).transpose()
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = num_anchors
                K = shifts.shape[0]
                all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape(
                    (1, K, 4)).transpose((1, 0, 2)))
                all_anchors = all_anchors.reshape((K * A, 4))

                # compute overlap
                overlaps_grid = bbox_overlaps(all_anchors.astype(np.float),
                                              gt_boxes.astype(np.float))

                # check how many gt boxes are covered by anchors
                if num_objs != 0:
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[fg_inds] == i)[0])

        return {
            'boxes': boxes,
            'gt_classes': gt_classes,
            'gt_subclasses': gt_subclasses,
            'gt_subclasses_flipped': gt_subclasses_flipped,
            'gt_overlaps': overlaps,
            'gt_subindexes': subindexes,
            'gt_subindexes_flipped': subindexes_flipped,
            'flipped': False
        }
Esempio n. 4
0
def test_net(net, imdb):

    output_dir = get_output_dir(imdb, net)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    if 'nissan' in imdb.name:
        output_dir_center = os.path.join(output_dir, 'imagesCenter')
        if not os.path.exists(output_dir_center):
            os.makedirs(output_dir_center)
        output_dir_left = os.path.join(output_dir, 'imagesLeft')
        if not os.path.exists(output_dir_left):
            os.makedirs(output_dir_left)
        output_dir_right = os.path.join(output_dir, 'imagesRight')
        if not os.path.exists(output_dir_right):
            os.makedirs(output_dir_right)

    det_file = os.path.join(output_dir, 'detections.pkl')
    print imdb.name
    if os.path.exists(det_file):
        with open(det_file, 'rb') as fid:
            all_boxes = cPickle.load(fid)
        print 'Detections loaded from {}'.format(det_file)

        if cfg.IS_RPN:
            print 'Evaluating detections'
            imdb.evaluate_proposals(all_boxes, output_dir)
        else:
            print 'Applying NMS to all detections'
            nms_dets = apply_nms(all_boxes, cfg.TEST.NMS)
            print 'Evaluating detections'
            print imdb.name
            if not 'imagenet3d' in imdb.name:
                imdb.evaluate_detections(nms_dets, output_dir)
            imdb.evaluate_detections_one_file(nms_dets, output_dir)
        return
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # heuristic: keep an average of 40 detections per class per images prior
    # to NMS
    if ('voc' in imdb.name or 'pascal' in imdb.name
            or 'imagenet3d' in imdb.name) and cfg.IS_RPN == False:
        max_per_set = 40 * num_images
        max_per_image = 100
    else:
        max_per_set = np.inf
        # heuristic: keep at most 100 detection per class per image prior to NMS
        max_per_image = 10000
        # detection thresold for each class (this is adaptively set based on the
        # max_per_set constraint)

    if cfg.IS_RPN:
        thresh = -np.inf * np.ones(imdb.num_classes)
    else:
        thresh = cfg.TEST.DET_THRESHOLD * np.ones(imdb.num_classes)
        # top_scores will hold one minheap of scores per class (used to enforce the max_per_set constraint)
        top_scores = [[] for _ in xrange(imdb.num_classes)]

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    if cfg.IS_RPN == False:
        roidb = imdb.roidb

    for i in xrange(num_images):
        im = cv2.imread(imdb.image_path_at(i))

        _t['im_detect'].tic()
        if cfg.IS_RPN:
            boxes_grid, _, _ = get_boxes_grid(im.shape[0], im.shape[1])
            scores, boxes, scores_subcls, labels, views = im_detect_proposal(
                net, im, boxes_grid, imdb.num_classes, imdb.num_subclasses,
                imdb.subclass_mapping)

            # save conv5 features
            # index = imdb._image_index[i]
            # filename = os.path.join(output_dir, index[5:] + '_conv5.pkl')
            # with open(filename, 'wb') as f:
            #    cPickle.dump(conv5, f, cPickle.HIGHEST_PROTOCOL)
        else:
            if cfg.TEST.IS_PATCH:
                scores, boxes, scores_subcls, views = im_detect_patch(
                    net, im, roidb[i]['boxes'], imdb.num_classes,
                    imdb.num_subclasses)
            else:
                scores, boxes, scores_subcls, views = im_detect(
                    net, im, roidb[i]['boxes'], imdb.num_classes,
                    imdb.num_subclasses)
        _t['im_detect'].toc()

        _t['misc'].tic()
        count = 0
        for j in xrange(1, imdb.num_classes):
            if cfg.IS_RPN:
                # inds = np.where(scores[:, j] > thresh[j])[0]
                inds = np.where(labels == j)[0]
            else:
                inds = np.where((scores[:, j] > thresh[j])
                                & (roidb[i]['gt_classes'] == 0))[0]

            cls_scores = scores[inds, j]
            subcls_scores = scores_subcls[inds, :]
            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            cls_views = views[inds, j * 3:(j + 1) * 3]

            top_inds = np.argsort(-cls_scores)[:max_per_image]
            cls_scores = cls_scores[top_inds]
            subcls_scores = subcls_scores[top_inds, :]
            cls_boxes = cls_boxes[top_inds, :]
            cls_views = cls_views[top_inds, :]

            if cfg.IS_RPN == False:
                # push new scores onto the minheap
                for val in cls_scores:
                    heapq.heappush(top_scores[j], val)
                # if we've collected more than the max number of detection,
                # then pop items off the minheap and update the class threshold
                if len(top_scores[j]) > max_per_set:
                    while len(top_scores[j]) > max_per_set:
                        heapq.heappop(top_scores[j])
                    thresh[j] = top_scores[j][0]

            # select the maximum score subclass in this class
            if cfg.TEST.SUBCLS and cfg.IS_RPN == False:
                index = np.where(imdb.subclass_mapping == j)[0]
                max_indexes = subcls_scores[:, index].argmax(axis=1)
                sub_classes = index[max_indexes]
            else:
                if subcls_scores.shape[0] == 0:
                    sub_classes = cls_scores
                else:
                    sub_classes = subcls_scores.argmax(axis=1).ravel()

            all_boxes[j][i] = \
                    np.hstack((cls_boxes, cls_scores[:, np.newaxis], sub_classes[:, np.newaxis], cls_views)) \
                    .astype(np.float32, copy=False)
            count = count + len(cls_scores)

            if 0:
                keep = nms(all_boxes[j][i], cfg.TEST.NMS)
                vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :])
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:d} object detected {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, count, _t['im_detect'].average_time, _t['misc'].average_time)

    for j in xrange(1, imdb.num_classes):
        for i in xrange(num_images):
            inds = np.where(all_boxes[j][i][:, 4] > thresh[j])[0]
            all_boxes[j][i] = all_boxes[j][i][inds, :]

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    if cfg.IS_RPN:
        print 'Evaluating detections'
        imdb.evaluate_proposals(all_boxes, output_dir)
        if 'mot' in imdb.name:
            imdb.evaluate_proposals_one_file(all_boxes, output_dir)
    else:
        print 'Applying NMS to all detections'
        nms_dets = apply_nms(all_boxes, cfg.TEST.NMS)
        print 'Evaluating detections'
        if not 'imagenet3d' in imdb.name:
            imdb.evaluate_detections(nms_dets, output_dir)
        imdb.evaluate_detections_one_file(nms_dets, output_dir)
Esempio n. 5
0
def prepare_roidb(imdb):
    """Enrich the imdb's roidb by adding some derived quantities that
    are useful for training. This function precomputes the maximum
    overlap, taken over ground-truth boxes, between each ROI and
    each ground-truth box. The class with maximum overlap is also
    recorded.
    """
    cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl')
    if os.path.exists(cache_file):
        with open(cache_file, 'rb') as fid:
            imdb._roidb = cPickle.load(fid)
        print '{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file)
        return

    roidb = imdb.roidb
    for i in xrange(len(imdb.image_index)):
        roidb[i]['image'] = imdb.image_path_at(i)
        boxes = roidb[i]['boxes']
        labels = roidb[i]['gt_classes']
        info_boxes = np.zeros((0, 18), dtype=np.float32)

        if boxes.shape[0] == 0:
            roidb[i]['info_boxes'] = info_boxes
            continue

        # compute grid boxes
        s = PIL.Image.open(imdb.image_path_at(i)).size
        image_height = s[1]
        image_width = s[0]
        boxes_grid, cx, cy = get_boxes_grid(image_height, image_width)
        
        # for each scale
        for scale_ind, scale in enumerate(cfg.TRAIN.SCALES):
            boxes_rescaled = boxes * scale

            # compute overlap
            overlaps = bbox_overlaps(boxes_grid.astype(np.float), boxes_rescaled.astype(np.float))
            max_overlaps = overlaps.max(axis = 1)
            argmax_overlaps = overlaps.argmax(axis = 1)
            max_classes = labels[argmax_overlaps]

            # select positive boxes
            fg_inds = []
            for k in xrange(1, imdb.num_classes):
                fg_inds.extend(np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0])

            if len(fg_inds) > 0:
                gt_inds = argmax_overlaps[fg_inds]
                # bounding box regression targets
                gt_targets = _compute_targets(boxes_grid[fg_inds,:], boxes_rescaled[gt_inds,:])
                # scale mapping for RoI pooling
                scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind]
                scale_map = cfg.TRAIN.SCALES[scale_ind_map]
                # contruct the list of positive boxes
                # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target)
                info_box = np.zeros((len(fg_inds), 18), dtype=np.float32)
                info_box[:, 0] = cx[fg_inds]
                info_box[:, 1] = cy[fg_inds]
                info_box[:, 2] = scale_ind
                info_box[:, 3:7] = boxes_grid[fg_inds,:]
                info_box[:, 7] = scale_ind_map
                info_box[:, 8:12] = boxes_grid[fg_inds,:] * scale_map / scale
                info_box[:, 12] = labels[gt_inds]
                info_box[:, 14:] = gt_targets
                info_boxes = np.vstack((info_boxes, info_box))

        roidb[i]['info_boxes'] = info_boxes

    with open(cache_file, 'wb') as fid:
        cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
    print 'wrote gt roidb prepared to {}'.format(cache_file)
Esempio n. 6
0
    def _load_pascal_annotation(self, index):
        """
        Load image and bounding boxes info from XML file in the PASCAL VOC
        format.
        """
        filename = os.path.join(self._data_path, 'Annotations', index + '.xml')
        # print 'Loading: {}'.format(filename)
        def get_data_from_tag(node, tag):
            return node.getElementsByTagName(tag)[0].childNodes[0].data

        with open(filename) as f:
            data = minidom.parseString(f.read())

        objs = data.getElementsByTagName('object')
        num_objs = len(objs)

        boxes = np.zeros((num_objs, 4), dtype=np.uint16)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)

        # Load object bounding boxes into a data frame.
        for ix, obj in enumerate(objs):
            # Make pixel indexes 0-based
            x1 = float(get_data_from_tag(obj, 'xmin')) - 1
            y1 = float(get_data_from_tag(obj, 'ymin')) - 1
            x2 = float(get_data_from_tag(obj, 'xmax')) - 1
            y2 = float(get_data_from_tag(obj, 'ymax')) - 1
            name =  str(get_data_from_tag(obj, "name")).lower().strip()
            if name in self._classes:
                cls = self._class_to_ind[name]
            else:
                cls = 0
            boxes[ix, :] = [x1, y1, x2, y2]
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0

        overlaps = scipy.sparse.csr_matrix(overlaps)
        gt_subclasses = np.zeros((num_objs), dtype=np.int32)
        gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32)
        subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes = scipy.sparse.csr_matrix(subindexes)
        subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)

        if cfg.IS_RPN:
            if cfg.IS_MULTISCALE:
                # compute overlaps between grid boxes and gt boxes in multi-scales
                # rescale the gt boxes
                boxes_all = np.zeros((0, 4), dtype=np.float32)
                for scale in cfg.TRAIN.SCALES:
                    boxes_all = np.vstack((boxes_all, boxes * scale))
                gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES))

                # compute grid boxes
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]
                boxes_grid, _, _ = get_boxes_grid(image_height, image_width)

                # compute overlap
                overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float))
        
                # check how many gt boxes are covered by grids
                if num_objs != 0:
                    index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES))
                    max_overlaps = overlaps_grid.max(axis = 0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(np.where((gt_classes_all == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0])
                    index_covered = np.unique(index[fg_inds])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(np.where(gt_classes[index_covered] == i)[0])
            else:
                assert len(cfg.TRAIN.SCALES_BASE) == 1
                scale = cfg.TRAIN.SCALES_BASE[0]
                feat_stride = 16
                # faster rcnn region proposal
                anchors = generate_anchors()
                num_anchors = anchors.shape[0]

                # image size
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]

                # height and width of the heatmap
                height = np.round((image_height * scale - 1) / 4.0 + 1)
                height = np.floor((height - 1) / 2 + 1 + 0.5)
                height = np.floor((height - 1) / 2 + 1 + 0.5)

                width = np.round((image_width * scale - 1) / 4.0 + 1)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)

                # gt boxes
                gt_boxes = boxes * scale

                # 1. Generate proposals from bbox deltas and shifted anchors
                shift_x = np.arange(0, width) * feat_stride
                shift_y = np.arange(0, height) * feat_stride
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = num_anchors
                K = shifts.shape[0]
                all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
                all_anchors = all_anchors.reshape((K * A, 4))

                # compute overlap
                overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float))
        
                # check how many gt boxes are covered by anchors
                if num_objs != 0:
                    max_overlaps = overlaps_grid.max(axis = 0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(np.where((gt_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(np.where(gt_classes[fg_inds] == i)[0])

        return {'boxes' : boxes,
                'gt_classes': gt_classes,
                'gt_subclasses': gt_subclasses,
                'gt_subclasses_flipped': gt_subclasses_flipped,
                'gt_overlaps' : overlaps,
                'gt_subindexes': subindexes,
                'gt_subindexes_flipped': subindexes_flipped,
                'flipped' : False}
Esempio n. 7
0
    def _load_kitti_annotation(self, index):
        """
        Load image and bounding boxes info from txt file in the KITTI format.
        """

        if self._image_set == 'test':
            lines = []
        else:
            filename = os.path.join(self._data_path, 'training', 'label_2',
                                    index + '.txt')
            lines = []
            with open(filename) as f:
                for line in f:
                    line = line.replace('Van', 'Car')
                    words = line.split()
                    cls = words[0]
                    truncation = float(words[1])
                    occlusion = int(words[2])
                    height = float(words[7]) - float(words[5])
                    if cls in self._class_to_ind and truncation < 0.5 and occlusion < 3 and height > 25:
                        lines.append(line)

        num_objs = len(lines)

        boxes = np.zeros((num_objs, 4), dtype=np.float32)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)

        for ix, line in enumerate(lines):
            words = line.split()
            cls = self._class_to_ind[words[0]]
            boxes[ix, :] = [float(n) for n in words[4:8]]
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0

        overlaps = scipy.sparse.csr_matrix(overlaps)
        gt_subclasses = np.zeros((num_objs), dtype=np.int32)
        gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32)
        subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes_flipped = np.zeros((num_objs, self.num_classes),
                                      dtype=np.int32)
        subindexes = scipy.sparse.csr_matrix(subindexes)
        subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)

        if cfg.IS_RPN:
            if cfg.IS_MULTISCALE:
                # compute overlaps between grid boxes and gt boxes in multi-scales
                # rescale the gt boxes
                boxes_all = np.zeros((0, 4), dtype=np.float32)
                for scale in cfg.TRAIN.SCALES:
                    boxes_all = np.vstack((boxes_all, boxes * scale))
                gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES))

                # compute grid boxes
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]
                boxes_grid, _, _ = get_boxes_grid(image_height, image_width)

                # compute overlap
                overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float),
                                              boxes_all.astype(np.float))

                # check how many gt boxes are covered by grids
                if num_objs != 0:
                    index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES))
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes_all == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])
                    index_covered = np.unique(index[fg_inds])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[index_covered] == i)[0])
            else:
                assert len(cfg.TRAIN.SCALES_BASE) == 1
                scale = cfg.TRAIN.SCALES_BASE[0]
                feat_stride = 16
                # faster rcnn region proposal
                anchors = generate_anchors()
                num_anchors = anchors.shape[0]

                # image size
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]

                # height and width of the heatmap
                height = np.round((image_height * scale - 1) / 4.0 + 1)
                height = np.floor((height - 1) / 2 + 1 + 0.5)
                height = np.floor((height - 1) / 2 + 1 + 0.5)

                width = np.round((image_width * scale - 1) / 4.0 + 1)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)

                # gt boxes
                gt_boxes = boxes * scale

                # 1. Generate proposals from bbox deltas and shifted anchors
                shift_x = np.arange(0, width) * feat_stride
                shift_y = np.arange(0, height) * feat_stride
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack(
                    (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                     shift_y.ravel())).transpose()
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = num_anchors
                K = shifts.shape[0]
                all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape(
                    (1, K, 4)).transpose((1, 0, 2)))
                all_anchors = all_anchors.reshape((K * A, 4))

                # compute overlap
                overlaps_grid = bbox_overlaps(all_anchors.astype(np.float),
                                              gt_boxes.astype(np.float))

                # check how many gt boxes are covered by anchors
                if num_objs != 0:
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[fg_inds] == i)[0])

        return {
            'boxes': boxes,
            'gt_classes': gt_classes,
            'gt_subclasses': gt_subclasses,
            'gt_subclasses_flipped': gt_subclasses_flipped,
            'gt_overlaps': overlaps,
            'gt_subindexes': subindexes,
            'gt_subindexes_flipped': subindexes_flipped,
            'flipped': False
        }
Esempio n. 8
0
def prepare_roidb(imdb):
    """Enrich the imdb's roidb by adding some derived quantities(可以求导的量) that
    are useful for training. This function precomputes the maximum
    overlap, taken over ground-truth boxes, between each ROI and
    each ground-truth box. The class with maximum overlap is also
    recorded.
    """
    # 如果有cache文件,加载后直接返回即可
    cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl')
    if os.path.exists(cache_file):
        with open(cache_file, 'rb') as fid:
            imdb._roidb = cPickle.load(fid)
        print '{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file)
        return

    roidb = imdb.roidb
    for i in xrange(len(imdb.image_index)):
        roidb[i]['image'] = imdb.image_path_at(i)
        # 这应该是gt box
        # roidb中的box并没有对应到原图!!!!!!!!!!!!!!!!!!
        boxes = roidb[i]['boxes']
        labels = roidb[i]['gt_classes']
        # feamap每个点9个box,每个box对应两个概率:是fg的概率;不是bg的概率
        # 生成的就是个空array
        # array([], shape=(0, 18), dtype=float32)
        info_boxes = np.zeros((0, 18), dtype=np.float32)

        if boxes.shape[0] == 0:
            roidb[i]['info_boxes'] = info_boxes
            continue

        # compute grid boxes
        s = PIL.Image.open(imdb.image_path_at(i)).size
        image_height = s[1]
        image_width = s[0]
        # 输入:图片的真是高度和宽度
        # 输出:boxes_grid:非常多(feamap所有点的数量*num_aspect)个[x1,y1,x2,y2], centers[:,0], centers[:,1]
        # 输出:box在原图中的左上角和右下角坐标;feature map中各个点对应的x坐标和y坐标
        # 这个box不是gt,这里是给feature map中的每个点生成多个box(不同比例的)    roidb中的box是 gt
        boxes_grid, cx, cy = get_boxes_grid(image_height, image_width)
        

        # Scales to use during training (can list multiple scales)
        # Each scale is the pixel size of an image's shortest side
        #__C.TRAIN.SCALES = (600,)
        # for each scale
        for scale_ind, scale in enumerate(cfg.TRAIN.SCALES):
            # scale应该是16
            boxes_rescaled = boxes * scale

            # compute overlap
            overlaps = bbox_overlaps(boxes_grid.astype(np.float), boxes_rescaled.astype(np.float))
            # 为每个box 找个与它最match的gt box
            # 最大的IoU值
            max_overlaps = overlaps.max(axis = 1)
            # 最大的IoU值对应的gt box的索引
            argmax_overlaps = overlaps.argmax(axis = 1)
            # 最match的gt box对应的类别
            max_classes = labels[argmax_overlaps]

            # select positive boxes
            fg_inds = []
            # 遍历所有类别,找出满足条件的boxes作为fg
            for k in xrange(1, imdb.num_classes):
                # IoU超过一定阈值的box才是fg!
                fg_inds.extend(np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0])

            if len(fg_inds) > 0:
                # fg对应的gt box的索引
                gt_inds = argmax_overlaps[fg_inds]
                # bounding box regression targets
                # 计算当前fg box 和其对应的 gt box 的偏移量
                # 返回值是2维的,有4列。第0列:x的偏移量;第1列:y的偏移量;第2列:w的伸缩量;第4列:h的伸缩量
                gt_targets = _compute_targets(boxes_grid[fg_inds,:], boxes_rescaled[gt_inds,:])
                
                # scale mapping for RoI pooling
                # cfg中没有这个变量???
                scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind]
                scale_map = cfg.TRAIN.SCALES[scale_ind_map]

                # 创建fg对应的list
                # contruct the list of positive boxes
                # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target)
                # 这里的18可不是9个anchor,而是1个anchor,用了18列存储相关信息
                info_box = np.zeros((len(fg_inds), 18), dtype=np.float32)
                info_box[:, 0] = cx[fg_inds]
                info_box[:, 1] = cy[fg_inds]
                info_box[:, 2] = scale_ind
                info_box[:, 3:7] = boxes_grid[fg_inds,:]
                info_box[:, 7] = scale_ind_map
                info_box[:, 8:12] = boxes_grid[fg_inds,:] * scale_map / scale
                info_box[:, 12] = labels[gt_inds]
                info_box[:, 14:] = gt_targets
                info_boxes = np.vstack((info_boxes, info_box))

        roidb[i]['info_boxes'] = info_boxes

    with open(cache_file, 'wb') as fid:
        cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
    print 'wrote gt roidb prepared to {}'.format(cache_file)
Esempio n. 9
0
    def _load_kitti_annotation(self, index):
        """
        Load image and bounding boxes info from txt file in the KITTI format.
        """

        if self._image_set == 'test':
            lines = []
        else:
            filename = os.path.join(self._data_path, 'training', 'label_2', index + '.txt')
            lines = []
            with open(filename) as f:
                for line in f:
                    line = line.replace('Van', 'Car')
                    words = line.split()
                    cls = words[0]
                    truncation = float(words[1])
                    occlusion = int(words[2])
                    height = float(words[7]) - float(words[5])
                    if cls in self._class_to_ind and truncation < 0.5 and occlusion < 3 and height > 25:
                        lines.append(line)

        num_objs = len(lines)

        boxes = np.zeros((num_objs, 4), dtype=np.float32)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)

        for ix, line in enumerate(lines):
            words = line.split()
            cls = self._class_to_ind[words[0]]
            boxes[ix, :] = [float(n) for n in words[4:8]]
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0

        overlaps = scipy.sparse.csr_matrix(overlaps)
        gt_subclasses = np.zeros((num_objs), dtype=np.int32)
        gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32)
        subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes = scipy.sparse.csr_matrix(subindexes)
        subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)

        if cfg.IS_RPN:
            if cfg.IS_MULTISCALE:
                # compute overlaps between grid boxes and gt boxes in multi-scales
                # rescale the gt boxes
                boxes_all = np.zeros((0, 4), dtype=np.float32)
                for scale in cfg.TRAIN.SCALES:
                    boxes_all = np.vstack((boxes_all, boxes * scale))
                gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES))

                # compute grid boxes
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]
                boxes_grid, _, _ = get_boxes_grid(image_height, image_width)

                # compute overlap
                overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float))
        
                # check how many gt boxes are covered by grids
                if num_objs != 0:
                    index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES))
                    max_overlaps = overlaps_grid.max(axis = 0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(np.where((gt_classes_all == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0])
                    index_covered = np.unique(index[fg_inds])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(np.where(gt_classes[index_covered] == i)[0])
            else:
                assert len(cfg.TRAIN.SCALES_BASE) == 1
                scale = cfg.TRAIN.SCALES_BASE[0]
                feat_stride = 16
                # faster rcnn region proposal
                anchors = generate_anchors()
                num_anchors = anchors.shape[0]

                # image size
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]

                # height and width of the heatmap
                height = np.round((image_height * scale - 1) / 4.0 + 1)
                height = np.floor((height - 1) / 2 + 1 + 0.5)
                height = np.floor((height - 1) / 2 + 1 + 0.5)

                width = np.round((image_width * scale - 1) / 4.0 + 1)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)

                # gt boxes
                gt_boxes = boxes * scale

                # 1. Generate proposals from bbox deltas and shifted anchors
                shift_x = np.arange(0, width) * feat_stride
                shift_y = np.arange(0, height) * feat_stride
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = num_anchors
                K = shifts.shape[0]
                all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
                all_anchors = all_anchors.reshape((K * A, 4))

                # compute overlap
                overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float))
        
                # check how many gt boxes are covered by anchors
                if num_objs != 0:
                    max_overlaps = overlaps_grid.max(axis = 0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(np.where((gt_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(np.where(gt_classes[fg_inds] == i)[0])

        return {'boxes' : boxes,
                'gt_classes': gt_classes,
                'gt_subclasses': gt_subclasses,
                'gt_subclasses_flipped': gt_subclasses_flipped,
                'gt_overlaps' : overlaps,
                'gt_subindexes': subindexes,
                'gt_subindexes_flipped': subindexes_flipped,
                'flipped' : False}
Esempio n. 10
0
def test_net(net, imdb):

    output_dir = get_output_dir(imdb, net)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    if 'nissan' in imdb.name:
        output_dir_center = os.path.join(output_dir, 'imagesCenter')
        if not os.path.exists(output_dir_center):
            os.makedirs(output_dir_center)
        output_dir_left = os.path.join(output_dir, 'imagesLeft')
        if not os.path.exists(output_dir_left):
            os.makedirs(output_dir_left)
        output_dir_right = os.path.join(output_dir, 'imagesRight')
        if not os.path.exists(output_dir_right):
            os.makedirs(output_dir_right)

    det_file = os.path.join(output_dir, 'detections.pkl')
    print imdb.name
    if os.path.exists(det_file):
        with open(det_file, 'rb') as fid:
            all_boxes = cPickle.load(fid)
        print 'Detections loaded from {}'.format(det_file)

        if cfg.IS_RPN:
            print 'Evaluating detections'
            imdb.evaluate_proposals(all_boxes, output_dir)
        else:
            print 'Applying NMS to all detections'
            nms_dets = apply_nms(all_boxes, cfg.TEST.NMS)
            print 'Evaluating detections'
            print imdb.name
            if not 'imagenet3d' in imdb.name:
                imdb.evaluate_detections(nms_dets, output_dir)
            imdb.evaluate_detections_one_file(nms_dets, output_dir)
        return

    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # heuristic: keep an average of 40 detections per class per images prior
    # to NMS
    if ('voc' in imdb.name or 'pascal' in imdb.name or 'imagenet3d' in imdb.name) and cfg.IS_RPN == False:
        max_per_set = 40 * num_images
        max_per_image = 100
    else:
        max_per_set = np.inf
        # heuristic: keep at most 100 detection per class per image prior to NMS
        max_per_image = 10000
        # detection thresold for each class (this is adaptively set based on the
        # max_per_set constraint)

    if cfg.IS_RPN:
        thresh = -np.inf * np.ones(imdb.num_classes)
    else:
        thresh = cfg.TEST.DET_THRESHOLD * np.ones(imdb.num_classes)
        # top_scores will hold one minheap of scores per class (used to enforce the max_per_set constraint)
        top_scores = [[] for _ in xrange(imdb.num_classes)]

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    # timers
    _t = {'im_detect' : Timer(), 'misc' : Timer()}

    if cfg.IS_RPN == False:
        roidb = imdb.roidb

    for i in xrange(num_images):
        im = cv2.imread(imdb.image_path_at(i))

        _t['im_detect'].tic()
        if cfg.IS_RPN:
            boxes_grid, _, _ = get_boxes_grid(im.shape[0], im.shape[1])
            scores, boxes, scores_subcls, labels, views = im_detect_proposal(net, im, boxes_grid, imdb.num_classes, imdb.num_subclasses, imdb.subclass_mapping)

            # save conv5 features
            # index = imdb._image_index[i]
            # filename = os.path.join(output_dir, index[5:] + '_conv5.pkl')
            # with open(filename, 'wb') as f:
            #    cPickle.dump(conv5, f, cPickle.HIGHEST_PROTOCOL)
        else:
            if cfg.TEST.IS_PATCH:
                scores, boxes, scores_subcls, views = im_detect_patch(net, im, roidb[i]['boxes'], imdb.num_classes, imdb.num_subclasses)
            else:
                scores, boxes, scores_subcls, views = im_detect(net, im, roidb[i]['boxes'], imdb.num_classes, imdb.num_subclasses)
        _t['im_detect'].toc()

        _t['misc'].tic()
        count = 0
        for j in xrange(1, imdb.num_classes):
            if cfg.IS_RPN:
                # inds = np.where(scores[:, j] > thresh[j])[0]
                inds = np.where(labels == j)[0]
            else:
                inds = np.where((scores[:, j] > thresh[j]) & (roidb[i]['gt_classes'] == 0))[0]

            cls_scores = scores[inds, j]
            subcls_scores = scores_subcls[inds, :]
            cls_boxes = boxes[inds, j*4:(j+1)*4]
            cls_views = views[inds, j*3:(j+1)*3]

            top_inds = np.argsort(-cls_scores)[:max_per_image]
            cls_scores = cls_scores[top_inds]
            subcls_scores = subcls_scores[top_inds, :]
            cls_boxes = cls_boxes[top_inds, :]
            cls_views = cls_views[top_inds, :]

            if cfg.IS_RPN == False:
                # push new scores onto the minheap
                for val in cls_scores:
                    heapq.heappush(top_scores[j], val)
                # if we've collected more than the max number of detection,
                # then pop items off the minheap and update the class threshold
                if len(top_scores[j]) > max_per_set:
                    while len(top_scores[j]) > max_per_set:
                        heapq.heappop(top_scores[j])
                    thresh[j] = top_scores[j][0]

            # select the maximum score subclass in this class
            if cfg.TEST.SUBCLS and cfg.IS_RPN == False:
                index = np.where(imdb.subclass_mapping == j)[0]
                max_indexes = subcls_scores[:,index].argmax(axis = 1)
                sub_classes = index[max_indexes]
            else:
                if subcls_scores.shape[0] == 0:
                    sub_classes = cls_scores
                else:
                    sub_classes = subcls_scores.argmax(axis = 1).ravel()

            all_boxes[j][i] = \
                    np.hstack((cls_boxes, cls_scores[:, np.newaxis], sub_classes[:, np.newaxis], cls_views)) \
                    .astype(np.float32, copy=False)
            count = count + len(cls_scores)

            if 0:
                keep = nms(all_boxes[j][i], cfg.TEST.NMS)
                vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :])
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:d} object detected {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, count, _t['im_detect'].average_time, _t['misc'].average_time)

    for j in xrange(1, imdb.num_classes):
        for i in xrange(num_images):
            inds = np.where(all_boxes[j][i][:, 4] > thresh[j])[0]
            all_boxes[j][i] = all_boxes[j][i][inds, :]

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    if cfg.IS_RPN:
        print 'Evaluating detections'
        imdb.evaluate_proposals(all_boxes, output_dir)
        if 'mot' in imdb.name:
            imdb.evaluate_proposals_one_file(all_boxes, output_dir)
    else:
        print 'Applying NMS to all detections'
        nms_dets = apply_nms(all_boxes, cfg.TEST.NMS)
        print 'Evaluating detections'
        if not 'imagenet3d' in imdb.name:
            imdb.evaluate_detections(nms_dets, output_dir)
        imdb.evaluate_detections_one_file(nms_dets, output_dir)
Esempio n. 11
0
    def _load_imagenet3d_annotation(self, index):
        """
        Load image and bounding boxes info from txt file in the imagenet3d format.
        """

        if self._image_set == 'test' or self._image_set == 'test_1' or self._image_set == 'test_2':
            lines = []
        else:
            filename = os.path.join(self._imagenet3d_path, 'Labels',
                                    index + '.txt')
            lines = []
            with open(filename) as f:
                for line in f:
                    lines.append(line)

        num_objs = len(lines)

        boxes = np.zeros((num_objs, 4), dtype=np.float32)
        viewpoints = np.zeros(
            (num_objs, 3),
            dtype=np.float32)  # azimuth, elevation, in-plane rotation
        viewpoints_flipped = np.zeros(
            (num_objs, 3),
            dtype=np.float32)  # azimuth, elevation, in-plane rotation
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)

        for ix, line in enumerate(lines):
            words = line.split()
            assert len(words) == 5 or len(
                words) == 8, 'Wrong label format: {}'.format(index)
            cls = self._class_to_ind[words[0]]
            boxes[ix, :] = [float(n) for n in words[1:5]]
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0
            if len(words) == 8:
                viewpoints[ix, :] = [float(n) for n in words[5:8]]
                # flip the viewpoint
                viewpoints_flipped[ix, 0] = -viewpoints[ix, 0]  # azimuth
                viewpoints_flipped[ix, 1] = viewpoints[ix, 1]  # elevation
                viewpoints_flipped[ix,
                                   2] = -viewpoints[ix, 2]  # in-plane rotation
            else:
                viewpoints[ix, :] = np.inf
                viewpoints_flipped[ix, :] = np.inf

        gt_subclasses = np.zeros((num_objs), dtype=np.int32)
        gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32)
        subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes_flipped = np.zeros((num_objs, self.num_classes),
                                      dtype=np.int32)
        viewindexes_azimuth = np.zeros((num_objs, self.num_classes),
                                       dtype=np.float32)
        viewindexes_azimuth_flipped = np.zeros((num_objs, self.num_classes),
                                               dtype=np.float32)
        viewindexes_elevation = np.zeros((num_objs, self.num_classes),
                                         dtype=np.float32)
        viewindexes_elevation_flipped = np.zeros((num_objs, self.num_classes),
                                                 dtype=np.float32)
        viewindexes_rotation = np.zeros((num_objs, self.num_classes),
                                        dtype=np.float32)
        viewindexes_rotation_flipped = np.zeros((num_objs, self.num_classes),
                                                dtype=np.float32)

        overlaps = scipy.sparse.csr_matrix(overlaps)
        subindexes = scipy.sparse.csr_matrix(subindexes)
        subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)
        viewindexes_azimuth = scipy.sparse.csr_matrix(viewindexes_azimuth)
        viewindexes_azimuth_flipped = scipy.sparse.csr_matrix(
            viewindexes_azimuth_flipped)
        viewindexes_elevation = scipy.sparse.csr_matrix(viewindexes_elevation)
        viewindexes_elevation_flipped = scipy.sparse.csr_matrix(
            viewindexes_elevation_flipped)
        viewindexes_rotation = scipy.sparse.csr_matrix(viewindexes_rotation)
        viewindexes_rotation_flipped = scipy.sparse.csr_matrix(
            viewindexes_rotation_flipped)

        if cfg.IS_RPN:
            if cfg.IS_MULTISCALE:
                # compute overlaps between grid boxes and gt boxes in multi-scales
                # rescale the gt boxes
                boxes_all = np.zeros((0, 4), dtype=np.float32)
                for scale in cfg.TRAIN.SCALES:
                    boxes_all = np.vstack((boxes_all, boxes * scale))
                gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES))

                # compute grid boxes
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]
                boxes_grid, _, _ = get_boxes_grid(image_height, image_width)

                # compute overlap
                overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float),
                                              boxes_all.astype(np.float))

                # check how many gt boxes are covered by grids
                if num_objs != 0:
                    index = np.tile(list(range(num_objs)),
                                    len(cfg.TRAIN.SCALES))
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in range(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes_all == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])
                    index_covered = np.unique(index[fg_inds])

                    for i in range(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[index_covered] == i)[0])
            else:
                assert len(cfg.TRAIN.SCALES_BASE) == 1
                scale = cfg.TRAIN.SCALES_BASE[0]
                feat_stride = 16
                # faster rcnn region proposal
                base_size = 16
                ratios = cfg.TRAIN.RPN_ASPECTS
                scales = cfg.TRAIN.RPN_SCALES
                anchors = generate_anchors(base_size, ratios, scales)
                num_anchors = anchors.shape[0]

                # image size
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]

                # height and width of the heatmap
                height = np.round((image_height * scale - 1) / 4.0 + 1)
                height = np.floor((height - 1) / 2 + 1 + 0.5)
                height = np.floor((height - 1) / 2 + 1 + 0.5)

                width = np.round((image_width * scale - 1) / 4.0 + 1)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)

                # gt boxes
                gt_boxes = boxes * scale

                # 1. Generate proposals from bbox deltas and shifted anchors
                shift_x = np.arange(0, width) * feat_stride
                shift_y = np.arange(0, height) * feat_stride
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack(
                    (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                     shift_y.ravel())).transpose()
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = num_anchors
                K = shifts.shape[0]
                all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape(
                    (1, K, 4)).transpose((1, 0, 2)))
                all_anchors = all_anchors.reshape((K * A, 4))

                # compute overlap
                overlaps_grid = bbox_overlaps(all_anchors.astype(np.float),
                                              gt_boxes.astype(np.float))

                # check how many gt boxes are covered by anchors
                if num_objs != 0:
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in range(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])

                    for i in range(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[fg_inds] == i)[0])

        return {
            'boxes': boxes,
            'gt_classes': gt_classes,
            'gt_viewpoints': viewpoints,
            'gt_viewpoints_flipped': viewpoints_flipped,
            'gt_viewindexes_azimuth': viewindexes_azimuth,
            'gt_viewindexes_azimuth_flipped': viewindexes_azimuth_flipped,
            'gt_viewindexes_elevation': viewindexes_elevation,
            'gt_viewindexes_elevation_flipped': viewindexes_elevation_flipped,
            'gt_viewindexes_rotation': viewindexes_rotation,
            'gt_viewindexes_rotation_flipped': viewindexes_rotation_flipped,
            'gt_subclasses': gt_subclasses,
            'gt_subclasses_flipped': gt_subclasses_flipped,
            'gt_overlaps': overlaps,
            'gt_subindexes': subindexes,
            'gt_subindexes_flipped': subindexes_flipped,
            'flipped': False
        }
Esempio n. 12
0
    def _load_imagenet3d_annotation(self, index):
        """
        Load image and bounding boxes info from txt file in the imagenet3d format.
        """

        if self._image_set == 'test' or self._image_set == 'test_1' or self._image_set == 'test_2':
            lines = []
        else:
            filename = os.path.join(self._imagenet3d_path, 'Labels', index + '.txt')
            lines = []
            with open(filename) as f:
                for line in f:
                    lines.append(line)

        num_objs = len(lines)

        boxes = np.zeros((num_objs, 4), dtype=np.float32)
        viewpoints = np.zeros((num_objs, 3), dtype=np.float32)          # azimuth, elevation, in-plane rotation
        viewpoints_flipped = np.zeros((num_objs, 3), dtype=np.float32)  # azimuth, elevation, in-plane rotation
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)

        for ix, line in enumerate(lines):
            words = line.split()
            assert len(words) == 5 or len(words) == 8, 'Wrong label format: {}'.format(index)
            cls = self._class_to_ind[words[0]]
            boxes[ix, :] = [float(n) for n in words[1:5]]
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0
            if len(words) == 8:
                viewpoints[ix, :] = [float(n) for n in words[5:8]]
                # flip the viewpoint
                viewpoints_flipped[ix, 0] = -viewpoints[ix, 0]  # azimuth
                viewpoints_flipped[ix, 1] = viewpoints[ix, 1]   # elevation
                viewpoints_flipped[ix, 2] = -viewpoints[ix, 2]  # in-plane rotation
            else:
                viewpoints[ix, :] = np.inf
                viewpoints_flipped[ix, :] = np.inf

        gt_subclasses = np.zeros((num_objs), dtype=np.int32)
        gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32)
        subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        viewindexes_azimuth = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        viewindexes_azimuth_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        viewindexes_elevation = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        viewindexes_elevation_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        viewindexes_rotation = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        viewindexes_rotation_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32)

        overlaps = scipy.sparse.csr_matrix(overlaps)
        subindexes = scipy.sparse.csr_matrix(subindexes)
        subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)
        viewindexes_azimuth = scipy.sparse.csr_matrix(viewindexes_azimuth)
        viewindexes_azimuth_flipped = scipy.sparse.csr_matrix(viewindexes_azimuth_flipped)
        viewindexes_elevation = scipy.sparse.csr_matrix(viewindexes_elevation)
        viewindexes_elevation_flipped = scipy.sparse.csr_matrix(viewindexes_elevation_flipped)
        viewindexes_rotation = scipy.sparse.csr_matrix(viewindexes_rotation)
        viewindexes_rotation_flipped = scipy.sparse.csr_matrix(viewindexes_rotation_flipped)

        if cfg.IS_RPN:
            if cfg.IS_MULTISCALE:
                # compute overlaps between grid boxes and gt boxes in multi-scales
                # rescale the gt boxes
                boxes_all = np.zeros((0, 4), dtype=np.float32)
                for scale in cfg.TRAIN.SCALES:
                    boxes_all = np.vstack((boxes_all, boxes * scale))
                gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES))

                # compute grid boxes
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]
                boxes_grid, _, _ = get_boxes_grid(image_height, image_width)

                # compute overlap
                overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float))
        
                # check how many gt boxes are covered by grids
                if num_objs != 0:
                    index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES))
                    max_overlaps = overlaps_grid.max(axis = 0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(np.where((gt_classes_all == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0])
                    index_covered = np.unique(index[fg_inds])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(np.where(gt_classes[index_covered] == i)[0])
            else:
                assert len(cfg.TRAIN.SCALES_BASE) == 1
                scale = cfg.TRAIN.SCALES_BASE[0]
                feat_stride = 16
                # faster rcnn region proposal
                base_size = 16
                ratios = cfg.TRAIN.RPN_ASPECTS
                scales = cfg.TRAIN.RPN_SCALES
                anchors = generate_anchors(base_size, ratios, scales)
                num_anchors = anchors.shape[0]

                # image size
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]

                # height and width of the heatmap
                height = np.round((image_height * scale - 1) / 4.0 + 1)
                height = np.floor((height - 1) / 2 + 1 + 0.5)
                height = np.floor((height - 1) / 2 + 1 + 0.5)

                width = np.round((image_width * scale - 1) / 4.0 + 1)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)

                # gt boxes
                gt_boxes = boxes * scale

                # 1. Generate proposals from bbox deltas and shifted anchors
                shift_x = np.arange(0, width) * feat_stride
                shift_y = np.arange(0, height) * feat_stride
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = num_anchors
                K = shifts.shape[0]
                all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
                all_anchors = all_anchors.reshape((K * A, 4))

                # compute overlap
                overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float))
        
                # check how many gt boxes are covered by anchors
                if num_objs != 0:
                    max_overlaps = overlaps_grid.max(axis = 0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(np.where((gt_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(np.where(gt_classes[fg_inds] == i)[0])

        return {'boxes' : boxes,
                'gt_classes': gt_classes,
                'gt_viewpoints': viewpoints,
                'gt_viewpoints_flipped': viewpoints_flipped,
                'gt_viewindexes_azimuth': viewindexes_azimuth,
                'gt_viewindexes_azimuth_flipped': viewindexes_azimuth_flipped,
                'gt_viewindexes_elevation': viewindexes_elevation,
                'gt_viewindexes_elevation_flipped': viewindexes_elevation_flipped,
                'gt_viewindexes_rotation': viewindexes_rotation,
                'gt_viewindexes_rotation_flipped': viewindexes_rotation_flipped,
                'gt_subclasses': gt_subclasses,
                'gt_subclasses_flipped': gt_subclasses_flipped,
                'gt_overlaps' : overlaps,
                'gt_subindexes': subindexes,
                'gt_subindexes_flipped': subindexes_flipped,
                'flipped' : False}