def _load_pascal3d_voxel_exemplar_annotation(self, index): """ Load image and bounding boxes info from txt file in the pascal subcategory exemplar format. """ if self._image_set == 'val': return self._load_pascal_annotation(index) filename = os.path.join(self._pascal3d_path, cfg.SUBCLS_NAME, index + '.txt') assert os.path.exists(filename), \ 'Path does not exist: {}'.format(filename) # the annotation file contains flipped objects lines = [] lines_flipped = [] with open(filename) as f: for line in f: words = line.split() subcls = int(words[1]) is_flip = int(words[2]) if subcls != -1: if is_flip == 0: lines.append(line) else: lines_flipped.append(line) num_objs = len(lines) # store information of flipped objects assert (num_objs == len(lines_flipped) ), 'The number of flipped objects is not the same!' gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) for ix, line in enumerate(lines_flipped): words = line.split() subcls = int(words[1]) gt_subclasses_flipped[ix] = subcls boxes = np.zeros((num_objs, 4), dtype=np.float32) gt_classes = np.zeros((num_objs), dtype=np.int32) gt_subclasses = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) for ix, line in enumerate(lines): words = line.split() cls = self._class_to_ind[words[0]] subcls = int(words[1]) # Make pixel indexes 0-based boxes[ix, :] = [float(n) - 1 for n in words[3:7]] gt_classes[ix] = cls gt_subclasses[ix] = subcls overlaps[ix, cls] = 1.0 subindexes[ix, cls] = subcls subindexes_flipped[ix, cls] = gt_subclasses_flipped[ix] overlaps = scipy.sparse.csr_matrix(overlaps) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal base_size = 16 ratios = [3.0, 2.0, 1.5, 1.0, 0.75, 0.5, 0.25] scales = 2**np.arange(1, 6, 0.5) anchors = generate_anchors(base_size, ratios, scales) num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }
def _load_kitti_voxel_exemplar_annotation(self, index): """ Load image and bounding boxes info from txt file in the KITTI voxel exemplar format. """ if self._image_set == 'training' and self._seq_name != 'trainval': prefix = 'train' elif self._image_set == 'training': prefix = 'trainval' else: prefix = '' if prefix == '': lines = [] lines_flipped = [] else: filename = os.path.join(self._kitti_tracking_path, cfg.SUBCLS_NAME, prefix, index + '.txt') if os.path.exists(filename): print filename # the annotation file contains flipped objects lines = [] lines_flipped = [] with open(filename) as f: for line in f: words = line.split() subcls = int(words[1]) is_flip = int(words[2]) if subcls != -1: if is_flip == 0: lines.append(line) else: lines_flipped.append(line) else: lines = [] lines_flipped = [] num_objs = len(lines) # store information of flipped objects assert (num_objs == len(lines_flipped)), 'The number of flipped objects is not the same!' gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) for ix, line in enumerate(lines_flipped): words = line.split() subcls = int(words[1]) gt_subclasses_flipped[ix] = subcls boxes = np.zeros((num_objs, 4), dtype=np.float32) gt_classes = np.zeros((num_objs), dtype=np.int32) gt_subclasses = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) for ix, line in enumerate(lines): words = line.split() cls = self._class_to_ind[words[0]] subcls = int(words[1]) boxes[ix, :] = [float(n) for n in words[3:7]] gt_classes[ix] = cls gt_subclasses[ix] = subcls overlaps[ix, cls] = 1.0 subindexes[ix, cls] = subcls subindexes_flipped[ix, cls] = gt_subclasses_flipped[ix] overlaps = scipy.sparse.csr_matrix(overlaps) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes_all == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal base_size = 16 ratios = [3.0, 2.0, 1.5, 1.0, 0.75, 0.5, 0.25] scales = 2**np.arange(1, 6, 0.5) anchors = generate_anchors(base_size, ratios, scales) num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[fg_inds] == i)[0]) return {'boxes' : boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped' : False}
def _load_pascal_annotation(self, index): """ Load image and bounding boxes info from XML file in the PASCAL VOC format. """ filename = os.path.join(self._data_path, 'Annotations', index + '.xml') # print 'Loading: {}'.format(filename) def get_data_from_tag(node, tag): return node.getElementsByTagName(tag)[0].childNodes[0].data with open(filename) as f: data = minidom.parseString(f.read()) objs = data.getElementsByTagName('object') num_objs = len(objs) boxes = np.zeros((num_objs, 4), dtype=np.uint16) gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) # Load object bounding boxes into a data frame. for ix, obj in enumerate(objs): # Make pixel indexes 0-based x1 = float(get_data_from_tag(obj, 'xmin')) - 1 y1 = float(get_data_from_tag(obj, 'ymin')) - 1 x2 = float(get_data_from_tag(obj, 'xmax')) - 1 y2 = float(get_data_from_tag(obj, 'ymax')) - 1 name = str(get_data_from_tag(obj, "name")).lower().strip() if name in self._classes: cls = self._class_to_ind[name] else: cls = 0 boxes[ix, :] = [x1, y1, x2, y2] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 overlaps = scipy.sparse.csr_matrix(overlaps) gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal anchors = generate_anchors() num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }
def test_net(net, imdb): output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) if 'nissan' in imdb.name: output_dir_center = os.path.join(output_dir, 'imagesCenter') if not os.path.exists(output_dir_center): os.makedirs(output_dir_center) output_dir_left = os.path.join(output_dir, 'imagesLeft') if not os.path.exists(output_dir_left): os.makedirs(output_dir_left) output_dir_right = os.path.join(output_dir, 'imagesRight') if not os.path.exists(output_dir_right): os.makedirs(output_dir_right) det_file = os.path.join(output_dir, 'detections.pkl') print imdb.name if os.path.exists(det_file): with open(det_file, 'rb') as fid: all_boxes = cPickle.load(fid) print 'Detections loaded from {}'.format(det_file) if cfg.IS_RPN: print 'Evaluating detections' imdb.evaluate_proposals(all_boxes, output_dir) else: print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print 'Evaluating detections' print imdb.name if not 'imagenet3d' in imdb.name: imdb.evaluate_detections(nms_dets, output_dir) imdb.evaluate_detections_one_file(nms_dets, output_dir) return """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS if ('voc' in imdb.name or 'pascal' in imdb.name or 'imagenet3d' in imdb.name) and cfg.IS_RPN == False: max_per_set = 40 * num_images max_per_image = 100 else: max_per_set = np.inf # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 10000 # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) if cfg.IS_RPN: thresh = -np.inf * np.ones(imdb.num_classes) else: thresh = cfg.TEST.DET_THRESHOLD * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] # timers _t = {'im_detect': Timer(), 'misc': Timer()} if cfg.IS_RPN == False: roidb = imdb.roidb for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() if cfg.IS_RPN: boxes_grid, _, _ = get_boxes_grid(im.shape[0], im.shape[1]) scores, boxes, scores_subcls, labels, views = im_detect_proposal( net, im, boxes_grid, imdb.num_classes, imdb.num_subclasses, imdb.subclass_mapping) # save conv5 features # index = imdb._image_index[i] # filename = os.path.join(output_dir, index[5:] + '_conv5.pkl') # with open(filename, 'wb') as f: # cPickle.dump(conv5, f, cPickle.HIGHEST_PROTOCOL) else: if cfg.TEST.IS_PATCH: scores, boxes, scores_subcls, views = im_detect_patch( net, im, roidb[i]['boxes'], imdb.num_classes, imdb.num_subclasses) else: scores, boxes, scores_subcls, views = im_detect( net, im, roidb[i]['boxes'], imdb.num_classes, imdb.num_subclasses) _t['im_detect'].toc() _t['misc'].tic() count = 0 for j in xrange(1, imdb.num_classes): if cfg.IS_RPN: # inds = np.where(scores[:, j] > thresh[j])[0] inds = np.where(labels == j)[0] else: inds = np.where((scores[:, j] > thresh[j]) & (roidb[i]['gt_classes'] == 0))[0] cls_scores = scores[inds, j] subcls_scores = scores_subcls[inds, :] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_views = views[inds, j * 3:(j + 1) * 3] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] subcls_scores = subcls_scores[top_inds, :] cls_boxes = cls_boxes[top_inds, :] cls_views = cls_views[top_inds, :] if cfg.IS_RPN == False: # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] # select the maximum score subclass in this class if cfg.TEST.SUBCLS and cfg.IS_RPN == False: index = np.where(imdb.subclass_mapping == j)[0] max_indexes = subcls_scores[:, index].argmax(axis=1) sub_classes = index[max_indexes] else: if subcls_scores.shape[0] == 0: sub_classes = cls_scores else: sub_classes = subcls_scores.argmax(axis=1).ravel() all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis], sub_classes[:, np.newaxis], cls_views)) \ .astype(np.float32, copy=False) count = count + len(cls_scores) if 0: keep = nms(all_boxes[j][i], cfg.TEST.NMS) vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :]) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:d} object detected {:.3f}s {:.3f}s' \ .format(i + 1, num_images, count, _t['im_detect'].average_time, _t['misc'].average_time) for j in xrange(1, imdb.num_classes): for i in xrange(num_images): inds = np.where(all_boxes[j][i][:, 4] > thresh[j])[0] all_boxes[j][i] = all_boxes[j][i][inds, :] det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) if cfg.IS_RPN: print 'Evaluating detections' imdb.evaluate_proposals(all_boxes, output_dir) if 'mot' in imdb.name: imdb.evaluate_proposals_one_file(all_boxes, output_dir) else: print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print 'Evaluating detections' if not 'imagenet3d' in imdb.name: imdb.evaluate_detections(nms_dets, output_dir) imdb.evaluate_detections_one_file(nms_dets, output_dir)
def prepare_roidb(imdb): """Enrich the imdb's roidb by adding some derived quantities that are useful for training. This function precomputes the maximum overlap, taken over ground-truth boxes, between each ROI and each ground-truth box. The class with maximum overlap is also recorded. """ cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: imdb._roidb = cPickle.load(fid) print '{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file) return roidb = imdb.roidb for i in xrange(len(imdb.image_index)): roidb[i]['image'] = imdb.image_path_at(i) boxes = roidb[i]['boxes'] labels = roidb[i]['gt_classes'] info_boxes = np.zeros((0, 18), dtype=np.float32) if boxes.shape[0] == 0: roidb[i]['info_boxes'] = info_boxes continue # compute grid boxes s = PIL.Image.open(imdb.image_path_at(i)).size image_height = s[1] image_width = s[0] boxes_grid, cx, cy = get_boxes_grid(image_height, image_width) # for each scale for scale_ind, scale in enumerate(cfg.TRAIN.SCALES): boxes_rescaled = boxes * scale # compute overlap overlaps = bbox_overlaps(boxes_grid.astype(np.float), boxes_rescaled.astype(np.float)) max_overlaps = overlaps.max(axis = 1) argmax_overlaps = overlaps.argmax(axis = 1) max_classes = labels[argmax_overlaps] # select positive boxes fg_inds = [] for k in xrange(1, imdb.num_classes): fg_inds.extend(np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0]) if len(fg_inds) > 0: gt_inds = argmax_overlaps[fg_inds] # bounding box regression targets gt_targets = _compute_targets(boxes_grid[fg_inds,:], boxes_rescaled[gt_inds,:]) # scale mapping for RoI pooling scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind] scale_map = cfg.TRAIN.SCALES[scale_ind_map] # contruct the list of positive boxes # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target) info_box = np.zeros((len(fg_inds), 18), dtype=np.float32) info_box[:, 0] = cx[fg_inds] info_box[:, 1] = cy[fg_inds] info_box[:, 2] = scale_ind info_box[:, 3:7] = boxes_grid[fg_inds,:] info_box[:, 7] = scale_ind_map info_box[:, 8:12] = boxes_grid[fg_inds,:] * scale_map / scale info_box[:, 12] = labels[gt_inds] info_box[:, 14:] = gt_targets info_boxes = np.vstack((info_boxes, info_box)) roidb[i]['info_boxes'] = info_boxes with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote gt roidb prepared to {}'.format(cache_file)
def _load_pascal_annotation(self, index): """ Load image and bounding boxes info from XML file in the PASCAL VOC format. """ filename = os.path.join(self._data_path, 'Annotations', index + '.xml') # print 'Loading: {}'.format(filename) def get_data_from_tag(node, tag): return node.getElementsByTagName(tag)[0].childNodes[0].data with open(filename) as f: data = minidom.parseString(f.read()) objs = data.getElementsByTagName('object') num_objs = len(objs) boxes = np.zeros((num_objs, 4), dtype=np.uint16) gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) # Load object bounding boxes into a data frame. for ix, obj in enumerate(objs): # Make pixel indexes 0-based x1 = float(get_data_from_tag(obj, 'xmin')) - 1 y1 = float(get_data_from_tag(obj, 'ymin')) - 1 x2 = float(get_data_from_tag(obj, 'xmax')) - 1 y2 = float(get_data_from_tag(obj, 'ymax')) - 1 name = str(get_data_from_tag(obj, "name")).lower().strip() if name in self._classes: cls = self._class_to_ind[name] else: cls = 0 boxes[ix, :] = [x1, y1, x2, y2] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 overlaps = scipy.sparse.csr_matrix(overlaps) gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes_all == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal anchors = generate_anchors() num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[fg_inds] == i)[0]) return {'boxes' : boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps' : overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped' : False}
def _load_kitti_annotation(self, index): """ Load image and bounding boxes info from txt file in the KITTI format. """ if self._image_set == 'test': lines = [] else: filename = os.path.join(self._data_path, 'training', 'label_2', index + '.txt') lines = [] with open(filename) as f: for line in f: line = line.replace('Van', 'Car') words = line.split() cls = words[0] truncation = float(words[1]) occlusion = int(words[2]) height = float(words[7]) - float(words[5]) if cls in self._class_to_ind and truncation < 0.5 and occlusion < 3 and height > 25: lines.append(line) num_objs = len(lines) boxes = np.zeros((num_objs, 4), dtype=np.float32) gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) for ix, line in enumerate(lines): words = line.split() cls = self._class_to_ind[words[0]] boxes[ix, :] = [float(n) for n in words[4:8]] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 overlaps = scipy.sparse.csr_matrix(overlaps) gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal anchors = generate_anchors() num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }
def prepare_roidb(imdb): """Enrich the imdb's roidb by adding some derived quantities(可以求导的量) that are useful for training. This function precomputes the maximum overlap, taken over ground-truth boxes, between each ROI and each ground-truth box. The class with maximum overlap is also recorded. """ # 如果有cache文件,加载后直接返回即可 cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: imdb._roidb = cPickle.load(fid) print '{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file) return roidb = imdb.roidb for i in xrange(len(imdb.image_index)): roidb[i]['image'] = imdb.image_path_at(i) # 这应该是gt box # roidb中的box并没有对应到原图!!!!!!!!!!!!!!!!!! boxes = roidb[i]['boxes'] labels = roidb[i]['gt_classes'] # feamap每个点9个box,每个box对应两个概率:是fg的概率;不是bg的概率 # 生成的就是个空array # array([], shape=(0, 18), dtype=float32) info_boxes = np.zeros((0, 18), dtype=np.float32) if boxes.shape[0] == 0: roidb[i]['info_boxes'] = info_boxes continue # compute grid boxes s = PIL.Image.open(imdb.image_path_at(i)).size image_height = s[1] image_width = s[0] # 输入:图片的真是高度和宽度 # 输出:boxes_grid:非常多(feamap所有点的数量*num_aspect)个[x1,y1,x2,y2], centers[:,0], centers[:,1] # 输出:box在原图中的左上角和右下角坐标;feature map中各个点对应的x坐标和y坐标 # 这个box不是gt,这里是给feature map中的每个点生成多个box(不同比例的) roidb中的box是 gt boxes_grid, cx, cy = get_boxes_grid(image_height, image_width) # Scales to use during training (can list multiple scales) # Each scale is the pixel size of an image's shortest side #__C.TRAIN.SCALES = (600,) # for each scale for scale_ind, scale in enumerate(cfg.TRAIN.SCALES): # scale应该是16 boxes_rescaled = boxes * scale # compute overlap overlaps = bbox_overlaps(boxes_grid.astype(np.float), boxes_rescaled.astype(np.float)) # 为每个box 找个与它最match的gt box # 最大的IoU值 max_overlaps = overlaps.max(axis = 1) # 最大的IoU值对应的gt box的索引 argmax_overlaps = overlaps.argmax(axis = 1) # 最match的gt box对应的类别 max_classes = labels[argmax_overlaps] # select positive boxes fg_inds = [] # 遍历所有类别,找出满足条件的boxes作为fg for k in xrange(1, imdb.num_classes): # IoU超过一定阈值的box才是fg! fg_inds.extend(np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0]) if len(fg_inds) > 0: # fg对应的gt box的索引 gt_inds = argmax_overlaps[fg_inds] # bounding box regression targets # 计算当前fg box 和其对应的 gt box 的偏移量 # 返回值是2维的,有4列。第0列:x的偏移量;第1列:y的偏移量;第2列:w的伸缩量;第4列:h的伸缩量 gt_targets = _compute_targets(boxes_grid[fg_inds,:], boxes_rescaled[gt_inds,:]) # scale mapping for RoI pooling # cfg中没有这个变量??? scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind] scale_map = cfg.TRAIN.SCALES[scale_ind_map] # 创建fg对应的list # contruct the list of positive boxes # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target) # 这里的18可不是9个anchor,而是1个anchor,用了18列存储相关信息 info_box = np.zeros((len(fg_inds), 18), dtype=np.float32) info_box[:, 0] = cx[fg_inds] info_box[:, 1] = cy[fg_inds] info_box[:, 2] = scale_ind info_box[:, 3:7] = boxes_grid[fg_inds,:] info_box[:, 7] = scale_ind_map info_box[:, 8:12] = boxes_grid[fg_inds,:] * scale_map / scale info_box[:, 12] = labels[gt_inds] info_box[:, 14:] = gt_targets info_boxes = np.vstack((info_boxes, info_box)) roidb[i]['info_boxes'] = info_boxes with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote gt roidb prepared to {}'.format(cache_file)
def _load_kitti_annotation(self, index): """ Load image and bounding boxes info from txt file in the KITTI format. """ if self._image_set == 'test': lines = [] else: filename = os.path.join(self._data_path, 'training', 'label_2', index + '.txt') lines = [] with open(filename) as f: for line in f: line = line.replace('Van', 'Car') words = line.split() cls = words[0] truncation = float(words[1]) occlusion = int(words[2]) height = float(words[7]) - float(words[5]) if cls in self._class_to_ind and truncation < 0.5 and occlusion < 3 and height > 25: lines.append(line) num_objs = len(lines) boxes = np.zeros((num_objs, 4), dtype=np.float32) gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) for ix, line in enumerate(lines): words = line.split() cls = self._class_to_ind[words[0]] boxes[ix, :] = [float(n) for n in words[4:8]] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 overlaps = scipy.sparse.csr_matrix(overlaps) gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes_all == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal anchors = generate_anchors() num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[fg_inds] == i)[0]) return {'boxes' : boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps' : overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped' : False}
def test_net(net, imdb): output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) if 'nissan' in imdb.name: output_dir_center = os.path.join(output_dir, 'imagesCenter') if not os.path.exists(output_dir_center): os.makedirs(output_dir_center) output_dir_left = os.path.join(output_dir, 'imagesLeft') if not os.path.exists(output_dir_left): os.makedirs(output_dir_left) output_dir_right = os.path.join(output_dir, 'imagesRight') if not os.path.exists(output_dir_right): os.makedirs(output_dir_right) det_file = os.path.join(output_dir, 'detections.pkl') print imdb.name if os.path.exists(det_file): with open(det_file, 'rb') as fid: all_boxes = cPickle.load(fid) print 'Detections loaded from {}'.format(det_file) if cfg.IS_RPN: print 'Evaluating detections' imdb.evaluate_proposals(all_boxes, output_dir) else: print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print 'Evaluating detections' print imdb.name if not 'imagenet3d' in imdb.name: imdb.evaluate_detections(nms_dets, output_dir) imdb.evaluate_detections_one_file(nms_dets, output_dir) return """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS if ('voc' in imdb.name or 'pascal' in imdb.name or 'imagenet3d' in imdb.name) and cfg.IS_RPN == False: max_per_set = 40 * num_images max_per_image = 100 else: max_per_set = np.inf # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 10000 # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) if cfg.IS_RPN: thresh = -np.inf * np.ones(imdb.num_classes) else: thresh = cfg.TEST.DET_THRESHOLD * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} if cfg.IS_RPN == False: roidb = imdb.roidb for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() if cfg.IS_RPN: boxes_grid, _, _ = get_boxes_grid(im.shape[0], im.shape[1]) scores, boxes, scores_subcls, labels, views = im_detect_proposal(net, im, boxes_grid, imdb.num_classes, imdb.num_subclasses, imdb.subclass_mapping) # save conv5 features # index = imdb._image_index[i] # filename = os.path.join(output_dir, index[5:] + '_conv5.pkl') # with open(filename, 'wb') as f: # cPickle.dump(conv5, f, cPickle.HIGHEST_PROTOCOL) else: if cfg.TEST.IS_PATCH: scores, boxes, scores_subcls, views = im_detect_patch(net, im, roidb[i]['boxes'], imdb.num_classes, imdb.num_subclasses) else: scores, boxes, scores_subcls, views = im_detect(net, im, roidb[i]['boxes'], imdb.num_classes, imdb.num_subclasses) _t['im_detect'].toc() _t['misc'].tic() count = 0 for j in xrange(1, imdb.num_classes): if cfg.IS_RPN: # inds = np.where(scores[:, j] > thresh[j])[0] inds = np.where(labels == j)[0] else: inds = np.where((scores[:, j] > thresh[j]) & (roidb[i]['gt_classes'] == 0))[0] cls_scores = scores[inds, j] subcls_scores = scores_subcls[inds, :] cls_boxes = boxes[inds, j*4:(j+1)*4] cls_views = views[inds, j*3:(j+1)*3] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] subcls_scores = subcls_scores[top_inds, :] cls_boxes = cls_boxes[top_inds, :] cls_views = cls_views[top_inds, :] if cfg.IS_RPN == False: # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] # select the maximum score subclass in this class if cfg.TEST.SUBCLS and cfg.IS_RPN == False: index = np.where(imdb.subclass_mapping == j)[0] max_indexes = subcls_scores[:,index].argmax(axis = 1) sub_classes = index[max_indexes] else: if subcls_scores.shape[0] == 0: sub_classes = cls_scores else: sub_classes = subcls_scores.argmax(axis = 1).ravel() all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis], sub_classes[:, np.newaxis], cls_views)) \ .astype(np.float32, copy=False) count = count + len(cls_scores) if 0: keep = nms(all_boxes[j][i], cfg.TEST.NMS) vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :]) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:d} object detected {:.3f}s {:.3f}s' \ .format(i + 1, num_images, count, _t['im_detect'].average_time, _t['misc'].average_time) for j in xrange(1, imdb.num_classes): for i in xrange(num_images): inds = np.where(all_boxes[j][i][:, 4] > thresh[j])[0] all_boxes[j][i] = all_boxes[j][i][inds, :] det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) if cfg.IS_RPN: print 'Evaluating detections' imdb.evaluate_proposals(all_boxes, output_dir) if 'mot' in imdb.name: imdb.evaluate_proposals_one_file(all_boxes, output_dir) else: print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print 'Evaluating detections' if not 'imagenet3d' in imdb.name: imdb.evaluate_detections(nms_dets, output_dir) imdb.evaluate_detections_one_file(nms_dets, output_dir)
def _load_imagenet3d_annotation(self, index): """ Load image and bounding boxes info from txt file in the imagenet3d format. """ if self._image_set == 'test' or self._image_set == 'test_1' or self._image_set == 'test_2': lines = [] else: filename = os.path.join(self._imagenet3d_path, 'Labels', index + '.txt') lines = [] with open(filename) as f: for line in f: lines.append(line) num_objs = len(lines) boxes = np.zeros((num_objs, 4), dtype=np.float32) viewpoints = np.zeros( (num_objs, 3), dtype=np.float32) # azimuth, elevation, in-plane rotation viewpoints_flipped = np.zeros( (num_objs, 3), dtype=np.float32) # azimuth, elevation, in-plane rotation gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) for ix, line in enumerate(lines): words = line.split() assert len(words) == 5 or len( words) == 8, 'Wrong label format: {}'.format(index) cls = self._class_to_ind[words[0]] boxes[ix, :] = [float(n) for n in words[1:5]] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 if len(words) == 8: viewpoints[ix, :] = [float(n) for n in words[5:8]] # flip the viewpoint viewpoints_flipped[ix, 0] = -viewpoints[ix, 0] # azimuth viewpoints_flipped[ix, 1] = viewpoints[ix, 1] # elevation viewpoints_flipped[ix, 2] = -viewpoints[ix, 2] # in-plane rotation else: viewpoints[ix, :] = np.inf viewpoints_flipped[ix, :] = np.inf gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) viewindexes_azimuth = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_azimuth_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_elevation = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_elevation_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_rotation = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_rotation_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) overlaps = scipy.sparse.csr_matrix(overlaps) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) viewindexes_azimuth = scipy.sparse.csr_matrix(viewindexes_azimuth) viewindexes_azimuth_flipped = scipy.sparse.csr_matrix( viewindexes_azimuth_flipped) viewindexes_elevation = scipy.sparse.csr_matrix(viewindexes_elevation) viewindexes_elevation_flipped = scipy.sparse.csr_matrix( viewindexes_elevation_flipped) viewindexes_rotation = scipy.sparse.csr_matrix(viewindexes_rotation) viewindexes_rotation_flipped = scipy.sparse.csr_matrix( viewindexes_rotation_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(list(range(num_objs)), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in range(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in range(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal base_size = 16 ratios = cfg.TRAIN.RPN_ASPECTS scales = cfg.TRAIN.RPN_SCALES anchors = generate_anchors(base_size, ratios, scales) num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in range(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in range(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_viewpoints': viewpoints, 'gt_viewpoints_flipped': viewpoints_flipped, 'gt_viewindexes_azimuth': viewindexes_azimuth, 'gt_viewindexes_azimuth_flipped': viewindexes_azimuth_flipped, 'gt_viewindexes_elevation': viewindexes_elevation, 'gt_viewindexes_elevation_flipped': viewindexes_elevation_flipped, 'gt_viewindexes_rotation': viewindexes_rotation, 'gt_viewindexes_rotation_flipped': viewindexes_rotation_flipped, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }
def _load_imagenet3d_annotation(self, index): """ Load image and bounding boxes info from txt file in the imagenet3d format. """ if self._image_set == 'test' or self._image_set == 'test_1' or self._image_set == 'test_2': lines = [] else: filename = os.path.join(self._imagenet3d_path, 'Labels', index + '.txt') lines = [] with open(filename) as f: for line in f: lines.append(line) num_objs = len(lines) boxes = np.zeros((num_objs, 4), dtype=np.float32) viewpoints = np.zeros((num_objs, 3), dtype=np.float32) # azimuth, elevation, in-plane rotation viewpoints_flipped = np.zeros((num_objs, 3), dtype=np.float32) # azimuth, elevation, in-plane rotation gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) for ix, line in enumerate(lines): words = line.split() assert len(words) == 5 or len(words) == 8, 'Wrong label format: {}'.format(index) cls = self._class_to_ind[words[0]] boxes[ix, :] = [float(n) for n in words[1:5]] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 if len(words) == 8: viewpoints[ix, :] = [float(n) for n in words[5:8]] # flip the viewpoint viewpoints_flipped[ix, 0] = -viewpoints[ix, 0] # azimuth viewpoints_flipped[ix, 1] = viewpoints[ix, 1] # elevation viewpoints_flipped[ix, 2] = -viewpoints[ix, 2] # in-plane rotation else: viewpoints[ix, :] = np.inf viewpoints_flipped[ix, :] = np.inf gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) viewindexes_azimuth = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_azimuth_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_elevation = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_elevation_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_rotation = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_rotation_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) overlaps = scipy.sparse.csr_matrix(overlaps) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) viewindexes_azimuth = scipy.sparse.csr_matrix(viewindexes_azimuth) viewindexes_azimuth_flipped = scipy.sparse.csr_matrix(viewindexes_azimuth_flipped) viewindexes_elevation = scipy.sparse.csr_matrix(viewindexes_elevation) viewindexes_elevation_flipped = scipy.sparse.csr_matrix(viewindexes_elevation_flipped) viewindexes_rotation = scipy.sparse.csr_matrix(viewindexes_rotation) viewindexes_rotation_flipped = scipy.sparse.csr_matrix(viewindexes_rotation_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes_all == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal base_size = 16 ratios = cfg.TRAIN.RPN_ASPECTS scales = cfg.TRAIN.RPN_SCALES anchors = generate_anchors(base_size, ratios, scales) num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[fg_inds] == i)[0]) return {'boxes' : boxes, 'gt_classes': gt_classes, 'gt_viewpoints': viewpoints, 'gt_viewpoints_flipped': viewpoints_flipped, 'gt_viewindexes_azimuth': viewindexes_azimuth, 'gt_viewindexes_azimuth_flipped': viewindexes_azimuth_flipped, 'gt_viewindexes_elevation': viewindexes_elevation, 'gt_viewindexes_elevation_flipped': viewindexes_elevation_flipped, 'gt_viewindexes_rotation': viewindexes_rotation, 'gt_viewindexes_rotation_flipped': viewindexes_rotation_flipped, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps' : overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped' : False}