def prepare_roidb(imdb): """Enrich the imdb's roidb by adding some derived quantities that are useful for training. This function precomputes the maximum overlap, taken over ground-truth boxes, between each ROI and each ground-truth box. The class with maximum overlap is also recorded. """ cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: imdb._roidb = cPickle.load(fid) print('{} gt roidb prepared loaded from {}'.format( imdb.name, cache_file)) return roidb = imdb.roidb for i in range(len(imdb.image_index)): roidb[i]['image'] = imdb.image_path_at(i) boxes = roidb[i]['boxes'] labels = roidb[i]['gt_classes'] info_boxes = np.zeros((0, 18), dtype=np.float32) if boxes.shape[0] == 0: roidb[i]['info_boxes'] = info_boxes continue # compute grid boxes s = PIL.Image.open(imdb.image_path_at(i)).size image_height = s[1] image_width = s[0] boxes_grid, cx, cy = get_boxes_grid(image_height, image_width) # for each scale for scale_ind, scale in enumerate(cfg.ZLRM.TRAIN.SCALES): boxes_rescaled = boxes * scale # compute overlap overlaps = bbox_overlaps(boxes_grid.astype(np.float), boxes_rescaled.astype(np.float)) max_overlaps = overlaps.max(axis=1) argmax_overlaps = overlaps.argmax(axis=1) max_classes = labels[argmax_overlaps] # select positive boxes fg_inds = [] for k in range(1, imdb.num_classes): fg_inds.extend( np.where((max_classes == k) & (max_overlaps >= cfg.ZLRM.TRAIN.FG_THRESH))[0]) if len(fg_inds) > 0: gt_inds = argmax_overlaps[fg_inds] # bounding box regression targets gt_targets = _compute_targets(boxes_grid[fg_inds, :], boxes_rescaled[gt_inds, :]) # scale mapping for RoI pooling scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind] scale_map = cfg.ZLRM.TRAIN.SCALES[scale_ind_map] # contruct the list of positive boxes # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target) info_box = np.zeros((len(fg_inds), 18), dtype=np.float32) info_box[:, 0] = cx[fg_inds] info_box[:, 1] = cy[fg_inds] info_box[:, 2] = scale_ind info_box[:, 3:7] = boxes_grid[fg_inds, :] info_box[:, 7] = scale_ind_map info_box[:, 8:12] = boxes_grid[fg_inds, :] * scale_map / scale info_box[:, 12] = labels[gt_inds] info_box[:, 14:] = gt_targets info_boxes = np.vstack((info_boxes, info_box)) roidb[i]['info_boxes'] = info_boxes with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) print('wrote gt roidb prepared to {}'.format(cache_file))
def _load_pascal3d_voxel_exemplar_annotation(self, index): """ Load image and bounding boxes info from txt file in the pascal subcategory exemplar format. """ if self._image_set == 'val': return self._load_pascal_annotation(index) filename = os.path.join(self._pascal3d_path, cfg.SUBCLS_NAME, index + '.txt') assert os.path.exists(filename), \ 'Path does not exist: {}'.format(filename) # the annotation file contains flipped objects lines = [] lines_flipped = [] with open(filename) as f: for line in f: words = line.split() subcls = int(words[1]) is_flip = int(words[2]) if subcls != -1: if is_flip == 0: lines.append(line) else: lines_flipped.append(line) num_objs = len(lines) # store information of flipped objects assert (num_objs == len(lines_flipped) ), 'The number of flipped objects is not the same!' gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) for ix, line in enumerate(lines_flipped): words = line.split() subcls = int(words[1]) gt_subclasses_flipped[ix] = subcls boxes = np.zeros((num_objs, 4), dtype=np.float32) gt_classes = np.zeros((num_objs), dtype=np.int32) gt_subclasses = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) for ix, line in enumerate(lines): words = line.split() cls = self._class_to_ind[words[0]] subcls = int(words[1]) # Make pixel indexes 0-based boxes[ix, :] = [float(n) - 1 for n in words[3:7]] gt_classes[ix] = cls gt_subclasses[ix] = subcls overlaps[ix, cls] = 1.0 subindexes[ix, cls] = subcls subindexes_flipped[ix, cls] = gt_subclasses_flipped[ix] overlaps = scipy.sparse.csr_matrix(overlaps) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal base_size = 16 ratios = [3.0, 2.0, 1.5, 1.0, 0.75, 0.5, 0.25] scales = 2**np.arange(1, 6, 0.5) anchors = generate_anchors(base_size, ratios, scales) num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }
def _load_imagenet3d_annotation(self, index): """ Load image and bounding boxes info from txt file in the imagenet3d format. """ if self._image_set == 'test' or self._image_set == 'test_1' or self._image_set == 'test_2': lines = [] else: filename = os.path.join(self._imagenet3d_path, 'Labels', index + '.txt') lines = [] with open(filename) as f: for line in f: lines.append(line) num_objs = len(lines) boxes = np.zeros((num_objs, 4), dtype=np.float32) viewpoints = np.zeros( (num_objs, 3), dtype=np.float32) # azimuth, elevation, in-plane rotation viewpoints_flipped = np.zeros( (num_objs, 3), dtype=np.float32) # azimuth, elevation, in-plane rotation gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) for ix, line in enumerate(lines): words = line.split() assert len(words) == 5 or len( words) == 8, 'Wrong label format: {}'.format(index) cls = self._class_to_ind[words[0]] boxes[ix, :] = [float(n) for n in words[1:5]] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 if len(words) == 8: viewpoints[ix, :] = [float(n) for n in words[5:8]] # flip the viewpoint viewpoints_flipped[ix, 0] = -viewpoints[ix, 0] # azimuth viewpoints_flipped[ix, 1] = viewpoints[ix, 1] # elevation viewpoints_flipped[ix, 2] = -viewpoints[ix, 2] # in-plane rotation else: viewpoints[ix, :] = np.inf viewpoints_flipped[ix, :] = np.inf gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) viewindexes_azimuth = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_azimuth_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_elevation = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_elevation_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_rotation = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_rotation_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) overlaps = scipy.sparse.csr_matrix(overlaps) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) viewindexes_azimuth = scipy.sparse.csr_matrix(viewindexes_azimuth) viewindexes_azimuth_flipped = scipy.sparse.csr_matrix( viewindexes_azimuth_flipped) viewindexes_elevation = scipy.sparse.csr_matrix(viewindexes_elevation) viewindexes_elevation_flipped = scipy.sparse.csr_matrix( viewindexes_elevation_flipped) viewindexes_rotation = scipy.sparse.csr_matrix(viewindexes_rotation) viewindexes_rotation_flipped = scipy.sparse.csr_matrix( viewindexes_rotation_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal base_size = 16 ratios = cfg.TRAIN.RPN_ASPECTS scales = cfg.TRAIN.RPN_SCALES anchors = generate_anchors(base_size, ratios, scales) num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_viewpoints': viewpoints, 'gt_viewpoints_flipped': viewpoints_flipped, 'gt_viewindexes_azimuth': viewindexes_azimuth, 'gt_viewindexes_azimuth_flipped': viewindexes_azimuth_flipped, 'gt_viewindexes_elevation': viewindexes_elevation, 'gt_viewindexes_elevation_flipped': viewindexes_elevation_flipped, 'gt_viewindexes_rotation': viewindexes_rotation, 'gt_viewindexes_rotation_flipped': viewindexes_rotation_flipped, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }
def _load_pascal_annotation(self, index): """ Load image and bounding boxes info from XML file in the PASCAL VOC format. """ filename = os.path.join(self._data_path, 'Annotations', index + '.xml') # print 'Loading: {}'.format(filename) def get_data_from_tag(node, tag): return node.getElementsByTagName(tag)[0].childNodes[0].data with open(filename) as f: data = minidom.parseString(f.read()) objs = data.getElementsByTagName('object') num_objs = len(objs) boxes = np.zeros((num_objs, 4), dtype=np.uint16) gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) # Load object bounding boxes into a data frame. for ix, obj in enumerate(objs): # Make pixel indexes 0-based x1 = float(get_data_from_tag(obj, 'xmin')) - 1 y1 = float(get_data_from_tag(obj, 'ymin')) - 1 x2 = float(get_data_from_tag(obj, 'xmax')) - 1 y2 = float(get_data_from_tag(obj, 'ymax')) - 1 name = str(get_data_from_tag(obj, "name")).lower().strip() if name in self._classes: cls = self._class_to_ind[name] else: cls = 0 boxes[ix, :] = [x1, y1, x2, y2] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 overlaps = scipy.sparse.csr_matrix(overlaps) gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal anchors = generate_anchors() num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }
def _load_kitti_annotation(self, index): """ Load image and bounding boxes info from txt file in the KITTI format. """ if self._image_set == 'test': lines = [] else: filename = os.path.join(self._data_path, 'training', 'label_2', index + '.txt') lines = [] with open(filename) as f: for line in f: line = line.replace('Van', 'Car') words = line.split() cls = words[0] truncation = float(words[1]) occlusion = int(words[2]) height = float(words[7]) - float(words[5]) if cls in self._class_to_ind and truncation < 0.5 and occlusion < 3 and height > 25: lines.append(line) num_objs = len(lines) boxes = np.zeros((num_objs, 4), dtype=np.float32) gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) for ix, line in enumerate(lines): words = line.split() cls = self._class_to_ind[words[0]] boxes[ix, :] = [float(n) for n in words[4:8]] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 overlaps = scipy.sparse.csr_matrix(overlaps) gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal anchors = generate_anchors() num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }