def _load_kitti_annotation(self, index): """ Load image and bounding boxes info from txt file in the KITTI format. """ if self._image_set == 'test': lines = [] else: filename = os.path.join(self._data_path, 'training', 'label_2', index + '.txt') lines = [] with open(filename) as f: for line in f: line = line.replace('Van', 'Car') words = line.split() cls = words[0] truncation = float(words[1]) occlusion = int(words[2]) height = float(words[7]) - float(words[5]) if cls in self._class_to_ind and truncation < 0.5 and occlusion < 3 and height > 25: lines.append(line) num_objs = len(lines) boxes = np.zeros((num_objs, 4), dtype=np.float32) gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) for ix, line in enumerate(lines): words = line.split() cls = self._class_to_ind[words[0]] boxes[ix, :] = [float(n) for n in words[4:8]] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 overlaps = scipy.sparse.csr_matrix(overlaps) gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal anchors = generate_anchors() num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }
def _load_kitti_voxel_exemplar_annotation(self, index): """ Load image and bounding boxes info from txt file in the KITTI voxel exemplar format. """ if self._image_set == 'training' and self._seq_name != 'trainval': prefix = 'train' elif self._image_set == 'training': prefix = 'trainval' else: prefix = '' if prefix == '': lines = [] lines_flipped = [] else: filename = os.path.join(self._kitti_tracking_path, cfg.SUBCLS_NAME, prefix, index + '.txt') if os.path.exists(filename): print filename # the annotation file contains flipped objects lines = [] lines_flipped = [] with open(filename) as f: for line in f: words = line.split() subcls = int(words[1]) is_flip = int(words[2]) if subcls != -1: if is_flip == 0: lines.append(line) else: lines_flipped.append(line) else: lines = [] lines_flipped = [] num_objs = len(lines) # store information of flipped objects assert (num_objs == len(lines_flipped)), 'The number of flipped objects is not the same!' gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) for ix, line in enumerate(lines_flipped): words = line.split() subcls = int(words[1]) gt_subclasses_flipped[ix] = subcls boxes = np.zeros((num_objs, 4), dtype=np.float32) gt_classes = np.zeros((num_objs), dtype=np.int32) gt_subclasses = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) for ix, line in enumerate(lines): words = line.split() cls = self._class_to_ind[words[0]] subcls = int(words[1]) boxes[ix, :] = [float(n) for n in words[3:7]] gt_classes[ix] = cls gt_subclasses[ix] = subcls overlaps[ix, cls] = 1.0 subindexes[ix, cls] = subcls subindexes_flipped[ix, cls] = gt_subclasses_flipped[ix] overlaps = scipy.sparse.csr_matrix(overlaps) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes_all == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal base_size = 16 ratios = [3.0, 2.0, 1.5, 1.0, 0.75, 0.5, 0.25] scales = 2**np.arange(1, 6, 0.5) anchors = generate_anchors(base_size, ratios, scales) num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[fg_inds] == i)[0]) return {'boxes' : boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped' : False}
def _load_pascal_annotation(self, index): """ Load image and bounding boxes info from XML file in the PASCAL VOC format. """ filename = os.path.join(self._data_path, 'Annotations', index + '.xml') # print 'Loading: {}'.format(filename) def get_data_from_tag(node, tag): return node.getElementsByTagName(tag)[0].childNodes[0].data with open(filename) as f: data = minidom.parseString(f.read()) objs = data.getElementsByTagName('object') num_objs = len(objs) boxes = np.zeros((num_objs, 4), dtype=np.uint16) gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) # Load object bounding boxes into a data frame. for ix, obj in enumerate(objs): # Make pixel indexes 0-based x1 = float(get_data_from_tag(obj, 'xmin')) - 1 y1 = float(get_data_from_tag(obj, 'ymin')) - 1 x2 = float(get_data_from_tag(obj, 'xmax')) - 1 y2 = float(get_data_from_tag(obj, 'ymax')) - 1 name = str(get_data_from_tag(obj, "name")).lower().strip() if name in self._classes: cls = self._class_to_ind[name] else: cls = 0 boxes[ix, :] = [x1, y1, x2, y2] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 overlaps = scipy.sparse.csr_matrix(overlaps) gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes_all == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal anchors = generate_anchors() num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[fg_inds] == i)[0]) return {'boxes' : boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps' : overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped' : False}
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride=[ 16, ], anchor_scales=[8, 16, 32]): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) _anchors = generate_anchors(scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape, [0, 3, 1, 2]) rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1]) #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1]) im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' #cfg_key = 'TEST' cfg_key = cfg_key.decode("utf-8") #convert byte to string pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :] bbox_deltas = rpn_bbox_pred #im_info = bottom[2].data[0, :] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob
def _load_imagenet3d_annotation(self, index): """ Load image and bounding boxes info from txt file in the imagenet3d format. """ if self._image_set == 'test' or self._image_set == 'test_1' or self._image_set == 'test_2': lines = [] else: filename = os.path.join(self._imagenet3d_path, 'Labels', index + '.txt') lines = [] with open(filename) as f: for line in f: lines.append(line) num_objs = len(lines) boxes = np.zeros((num_objs, 4), dtype=np.float32) viewpoints = np.zeros( (num_objs, 3), dtype=np.float32) # azimuth, elevation, in-plane rotation viewpoints_flipped = np.zeros( (num_objs, 3), dtype=np.float32) # azimuth, elevation, in-plane rotation gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) for ix, line in enumerate(lines): words = line.split() assert len(words) == 5 or len( words) == 8, 'Wrong label format: {}'.format(index) cls = self._class_to_ind[words[0]] boxes[ix, :] = [float(n) for n in words[1:5]] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 if len(words) == 8: viewpoints[ix, :] = [float(n) for n in words[5:8]] # flip the viewpoint viewpoints_flipped[ix, 0] = -viewpoints[ix, 0] # azimuth viewpoints_flipped[ix, 1] = viewpoints[ix, 1] # elevation viewpoints_flipped[ix, 2] = -viewpoints[ix, 2] # in-plane rotation else: viewpoints[ix, :] = np.inf viewpoints_flipped[ix, :] = np.inf gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) viewindexes_azimuth = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_azimuth_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_elevation = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_elevation_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_rotation = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_rotation_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) overlaps = scipy.sparse.csr_matrix(overlaps) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) viewindexes_azimuth = scipy.sparse.csr_matrix(viewindexes_azimuth) viewindexes_azimuth_flipped = scipy.sparse.csr_matrix( viewindexes_azimuth_flipped) viewindexes_elevation = scipy.sparse.csr_matrix(viewindexes_elevation) viewindexes_elevation_flipped = scipy.sparse.csr_matrix( viewindexes_elevation_flipped) viewindexes_rotation = scipy.sparse.csr_matrix(viewindexes_rotation) viewindexes_rotation_flipped = scipy.sparse.csr_matrix( viewindexes_rotation_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(list(range(num_objs)), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in range(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in range(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal base_size = 16 ratios = cfg.TRAIN.RPN_ASPECTS scales = cfg.TRAIN.RPN_SCALES anchors = generate_anchors(base_size, ratios, scales) num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in range(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in range(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_viewpoints': viewpoints, 'gt_viewpoints_flipped': viewpoints_flipped, 'gt_viewindexes_azimuth': viewindexes_azimuth, 'gt_viewindexes_azimuth_flipped': viewindexes_azimuth_flipped, 'gt_viewindexes_elevation': viewindexes_elevation, 'gt_viewindexes_elevation_flipped': viewindexes_elevation_flipped, 'gt_viewindexes_rotation': viewindexes_rotation, 'gt_viewindexes_rotation_flipped': viewindexes_rotation_flipped, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }
def _load_kitti_annotation(self, index): """ Load image and bounding boxes info from txt file in the KITTI format. """ if self._image_set == 'test': lines = [] else: filename = os.path.join(self._data_path, 'training', 'label_2', index + '.txt') lines = [] with open(filename) as f: for line in f: line = line.replace('Van', 'Car') words = line.split() cls = words[0] truncation = float(words[1]) occlusion = int(words[2]) height = float(words[7]) - float(words[5]) if cls in self._class_to_ind and truncation < 0.5 and occlusion < 3 and height > 25: lines.append(line) num_objs = len(lines) boxes = np.zeros((num_objs, 4), dtype=np.float32) gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) for ix, line in enumerate(lines): words = line.split() cls = self._class_to_ind[words[0]] boxes[ix, :] = [float(n) for n in words[4:8]] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 overlaps = scipy.sparse.csr_matrix(overlaps) gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes_all == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal anchors = generate_anchors() num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[fg_inds] == i)[0]) return {'boxes' : boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps' : overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped' : False}
def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride=[ 16, ], anchor_scales=[ 16, ]): """ Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. Parameters ---------- rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] gt_ishard: (G, 1), 1 or 0 indicates difficult or not dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0 im_info: a list of [image_height, image_width, scale_ratios] _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform) that are the regression objectives rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg, beacuse the numbers of bgs and fgs mays significiantly different """ _anchors = generate_anchors( scales=np.array(anchor_scales)) #生成基本的anchor,一共9个 _num_anchors = _anchors.shape[0] #9个anchor if DEBUG: print('anchors:') print(_anchors) print('anchor shapes:') print( np.hstack(( _anchors[:, 2::4] - _anchors[:, 0::4], _anchors[:, 3::4] - _anchors[:, 1::4], ))) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) _fg_sum = 0 _bg_sum = 0 _count = 0 # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) #height, width = rpn_cls_score.shape[1:3] im_info = im_info[0] #图像的高宽及通道数 #在feature-map上定位anchor,并加上delta,得到在实际图像中anchor的真实坐标 # Algorithm: # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] #feature-map的高宽 if DEBUG: print('AnchorTargetLayer: height', height, 'width', width) print('') print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) print('height, width: ({}, {})'.format(height, width)) print('rpn: gt_boxes.shape', gt_boxes.shape) print('rpn: gt_boxes', gt_boxes) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) # in W H order # K is H x W shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() #生成feature-map和真实image上anchor之间的偏移量 # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors #9个anchor K = shifts.shape[0] #50*37,feature-map的宽乘高的大小 all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) #相当于复制宽高的维度,然后相加 all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image #仅保留那些还在图像内部的anchor,超出图像的都删掉 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print('total_anchors', total_anchors) print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] #保留那些在图像内的anchor if DEBUG: print('anchors.shape', anchors.shape) #至此,anchor准备好了 #-------------------------------------------------------------- # label: 1 is positive, 0 is negative, -1 is dont care # (A) labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) #初始化label,均为-1 # overlaps between the anchors and the gt boxes # overlaps (ex, gt), shape is A x G #计算anchor和gt-box的overlap,用来给anchor上标签 overlaps = bbox_overlaps(np.ascontiguousarray( anchors, dtype=np.float), np.ascontiguousarray( gt_boxes, dtype=np.float)) #假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组 # 存放每一个anchor和每一个gtbox之间的overlap argmax_overlaps = overlaps.argmax( axis=1) # (A)#找到和每一个gtbox,overlap最大的那个anchor max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax( axis=0) # G#找到每个位置上9个anchor中与gtbox,overlap最大的那个 gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 #先给背景上标签,小于0.3overlap的 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 #每个位置上的9个anchor中overlap最大的认为是前景 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 #overlap大于0.7的认为是前景 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # preclude dontcare areas if dontcare_areas is not None and dontcare_areas.shape[ 0] > 0: #这里我们暂时不考虑有doncare_area的存在 # intersec shape is D x A intersecs = bbox_intersections( np.ascontiguousarray(dontcare_areas, dtype=np.float), # D x 4 np.ascontiguousarray(anchors, dtype=np.float) # A x 4 ) intersecs_ = intersecs.sum(axis=0) # A x 1 labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1 #这里我们暂时不考虑难样本的问题 # preclude hard samples that are highly occlusioned, truncated or difficult to see if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[ 0] > 0: assert gt_ishard.shape[0] == gt_boxes.shape[0] gt_ishard = gt_ishard.astype(int) gt_hardboxes = gt_boxes[gt_ishard == 1, :] if gt_hardboxes.shape[0] > 0: # H x A hard_overlaps = bbox_overlaps( np.ascontiguousarray(gt_hardboxes, dtype=np.float), # H x 4 np.ascontiguousarray(anchors, dtype=np.float)) # A x 4 hard_max_overlaps = hard_overlaps.max(axis=0) # (A) labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1 max_intersec_label_inds = hard_overlaps.argmax(axis=1) # H x 1 labels[max_intersec_label_inds] = -1 # # subsample positive labels if we have too many #对正样本进行采样,如果正样本的数量太多的话 # 限制正样本的数量不超过128个 #TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) #随机去除掉一些正样本 labels[disable_inds] = -1 #变为-1 # subsample negative labels if we have too many #对负样本进行采样,如果负样本的数量太多的话 # 正负样本总数是256,限制正样本数目最多128, # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本 num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #print "was %s inds, disabling %s, now %s inds" % ( #len(bg_inds), len(disable_inds), np.sum(labels == 0)) # 至此, 上好标签,开始计算rpn-box的真值 #-------------------------------------------------------------- bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets( anchors, gt_boxes[argmax_overlaps, :]) #根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) #内部权重,前景就给1,其他是0 bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: #暂时使用uniform 权重,也就是正样本是1,负样本是0 # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) + 1 # positive_weights = np.ones((1, 4)) * 1.0 / num_examples # negative_weights = np.ones((1, 4)) * 1.0 / num_examples positive_weights = np.ones((1, 4)) negative_weights = np.zeros((1, 4)) else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / (np.sum(labels == 1)) + 1) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / (np.sum(labels == 0)) + 1) bbox_outside_weights[labels == 1, :] = positive_weights #外部权重,前景是1,背景是0 bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: _sums += bbox_targets[labels == 1, :].sum(axis=0) _squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts += np.sum(labels == 1) means = _sums / _counts stds = np.sqrt(_squared_sums / _counts - means**2) print('means:') print(means) print('stdevs:') print(stds) # map up to original set of anchors # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来 labels = _unmap(labels, total_anchors, inds_inside, fill=-1) #这些anchor的label是-1,也即dontcare bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) #这些anchor的真值是0,也即没有值 bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) #内部权重以0填充 bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) #外部权重以0填充 if DEBUG: print('rpn: max max_overlap', np.max(max_overlaps)) print('rpn: num_positive', np.sum(labels == 1)) print('rpn: num_negative', np.sum(labels == 0)) _fg_sum += np.sum(labels == 1) _bg_sum += np.sum(labels == 0) _count += 1 print('rpn: num_positive avg', _fg_sum / _count) print('rpn: num_negative avg', _bg_sum / _count) # labels labels = labels.reshape((1, height, width, A)) #reshap一下label rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4))#reshape rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def _load_shapenet_annotation(self, index): """ Load class name and meta data """ # image path image_path = self.image_path_from_index(index) # depth path depth_path = self.depth_path_from_index(index) # metadata path metadata_path = self.metadata_path_from_index(index) # the first 8 digits in index synset = index[:8] cls = g_shape_synset_name_mapping[synset] gt_class = self._class_to_ind[cls] # bounding boxes num_objs = 1 boxes = np.zeros((num_objs, 4), dtype=np.float32) gt_classes = np.zeros((num_objs), dtype=np.int32) # read depth image im = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED) index_depth = np.where(im > 0) x1 = np.min(index_depth[1]) y1 = np.min(index_depth[0]) x2 = np.max(index_depth[1]) y2 = np.max(index_depth[0]) boxes[0, 0] = x1 boxes[0, 1] = y1 boxes[0, 2] = x2 boxes[0, 3] = y2 gt_classes[0] = gt_class import numpy.random as npr random_scale_ind = npr.randint(0, high=len(cfg.TRAIN.SCALES_BASE)) scale = cfg.TRAIN.SCALES_BASE[random_scale_ind] feat_stride = 16 # faster rcnn region proposal anchors = generate_anchors(cfg.TRAIN.RPN_BASE_SIZE, cfg.TRAIN.RPN_ASPECTS, cfg.TRAIN.RPN_SCALES) num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.floor((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2.0 + 1 + 0.5) height = np.floor((height - 1) / 2.0 + 1 + 0.5) height = int(height * 1) width = np.floor((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = int(width * 1) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[fg_inds] == i)[0]) return {'image': image_path, 'depth': depth_path, 'meta_data': metadata_path, 'boxes': boxes, 'gt_classes': gt_classes, 'flipped': False}
def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride=[ 16, ], anchor_scales=[4, 8, 16, 32]): """ Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. Parameters ---------- rpn_cls_score: for pytorch (1, Ax2, H, W) bg/fg scores of previous conv layer gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] gt_ishard: (G, 1), 1 or 0 indicates difficult or not dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0 im_info: a list of [image_height, image_width, scale_ratios] _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform) that are the regression objectives rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg, beacuse the numbers of bgs and fgs mays significiantly different """ _anchors = generate_anchors(scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] if DEBUG: print('anchors:') print(_anchors) print('anchor shapes:') print( np.hstack(( _anchors[:, 2::4] - _anchors[:, 0::4], _anchors[:, 3::4] - _anchors[:, 1::4], ))) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) _fg_sum = 0 _bg_sum = 0 _count = 0 # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) # height, width = rpn_cls_score.shape[1:3] im_info = im_info[0] # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) # pytorch (bs, c, h, w) height, width = rpn_cls_score.shape[2:4] if DEBUG: print('AnchorTargetLayer: height', height, 'width', width) print('') print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) print('height, width: ({}, {})'.format(height, width)) print('rpn: gt_boxes.shape', gt_boxes.shape) print('rpn: gt_boxes', gt_boxes) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) # in W H order # K is H x W shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print('total_anchors', total_anchors) print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print('anchors.shape', anchors.shape) # label: 1 is positive, 0 is negative, -1 is dont care # (A) labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt), shape is A x G overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) # (A) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) # G gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # preclude dontcare areas if dontcare_areas is not None and dontcare_areas.shape[0] > 0: # intersec shape is D x A intersecs = bbox_intersections( np.ascontiguousarray(dontcare_areas, dtype=np.float), # D x 4 np.ascontiguousarray(anchors, dtype=np.float) # A x 4 ) intersecs_ = intersecs.sum(axis=0) # A x 1 labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1 # preclude hard samples that are highly occlusioned, truncated or difficult to see if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[ 0] > 0: assert gt_ishard.shape[0] == gt_boxes.shape[0] gt_ishard = gt_ishard.astype(int) gt_hardboxes = gt_boxes[gt_ishard == 1, :] if gt_hardboxes.shape[0] > 0: # H x A hard_overlaps = bbox_overlaps( np.ascontiguousarray(gt_hardboxes, dtype=np.float), # H x 4 np.ascontiguousarray(anchors, dtype=np.float)) # A x 4 hard_max_overlaps = hard_overlaps.max(axis=0) # (A) labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1 max_intersec_label_inds = hard_overlaps.argmax(axis=1) # H x 1 labels[max_intersec_label_inds] = -1 # # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 # print("was %s inds, disabling %s, now %s inds" % ( # len(bg_inds), len(disable_inds), np.sum(labels == 0)) # bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) # num_examples = np.sum(labels >= 0) + 1 # positive_weights = np.ones((1, 4)) * 1.0 / num_examples # negative_weights = np.ones((1, 4)) * 1.0 / num_examples positive_weights = np.ones((1, 4)) negative_weights = np.zeros((1, 4)) else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / (np.sum(labels == 1)) + 1) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / (np.sum(labels == 0)) + 1) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: _sums += bbox_targets[labels == 1, :].sum(axis=0) _squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts += np.sum(labels == 1) means = _sums / _counts stds = np.sqrt(_squared_sums / _counts - means**2) print('means:') print(means) print('stdevs:') print(stds) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) if DEBUG: print('rpn: max max_overlap', np.max(max_overlaps)) print('rpn: num_positive', np.sum(labels == 1)) print('rpn: num_negative', np.sum(labels == 0)) _fg_sum += np.sum(labels == 1) _bg_sum += np.sum(labels == 0) _count += 1 print('rpn: num_positive avg', _fg_sum / _count) print('rpn: num_negative avg', _bg_sum / _count) # labels # pdb.set_trace() labels = labels.reshape((1, height, width, A)) labels = labels.transpose(0, 3, 1, 2) rpn_labels = labels.reshape( (1, 1, A * height, width)).transpose(0, 2, 3, 1) # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) # assert bbox_inside_weights.shape[2] == height # assert bbox_inside_weights.shape[3] == width rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) # assert bbox_outside_weights.shape[2] == height # assert bbox_outside_weights.shape[3] == width rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def anchor_fv_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride=[ 16, ], anchor_scales=[8, 16, 32], DEBUG=False, num_class=2): """ Assign anchors to ground-truth targets. Produces anchor classification labels. NOTE: no box regression targets! """ #t0 = time.time() # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap _anchors = generate_anchors(base_size=_feat_stride[0], ratios=np.array([1.54]), scales=np.array([4, 3])) # anchors by default are in bird view in bv pixels (lidar bv image coord) _num_anchors = _anchors.shape[0] #print 'time for anchors: ', time.time()-t0 #t0 = time.time() im_info = im_info[0] #print 'arrive!!' assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width, channels = rpn_cls_score.shape[1:4] num_scores = channels / num_class assert num_scores == _num_anchors, 'number of anchors does not match number of proposal scores' # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() A = _num_anchors K = shifts.shape[0] all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = clip_anchors(all_anchors, im_info) #print 'time for clip: ', time.time()-t0 #t0 = time.time() # WZN: only keep gt_boxes inside the image inds_inside_box = np.where( (gt_boxes[:, 0] >= -_allowed_border) & (gt_boxes[:, 1] >= -_allowed_border) & (gt_boxes[:, 2] < im_info[1] + _allowed_border) & # width (gt_boxes[:, 3] < im_info[0] + _allowed_border) & # height (gt_boxes[:, 4] > 0) #only keep object gt_boxes )[0] inds_ignore_box = np.where(gt_boxes[:, 4] == -1)[0] gt_boxes_pos = gt_boxes[inds_inside_box, :] gt_boxes_ignore = gt_boxes[inds_ignore_box, :] if gt_boxes_pos.shape[0] == 0: no_gt = True gt_boxes_pos = np.reshape(np.array([-1000, -1000, -1000, -1000, -1]), [1, -1]) else: no_gt = False if gt_boxes_ignore.shape[0] == 0: no_ignore = True else: no_ignore = False if DEBUG: print('total_anchors: ', total_anchors) print('inds_inside: ', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print('anchors.shape: ', anchors.shape) #print 'time for clip2: ', time.time()-t0 #t0 = time.time() # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes_pos, dtype=np.float)) overlaps_ignore = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes_ignore, dtype=np.float)) labels, _, _ = calc_label_and_reward(overlaps, len(inds_inside), cfg, no_gt, use_reward=False, multi_scale=True) #also for ignore labels, we need -1 for them if not (no_ignore): argmax_overlaps_ignore = overlaps_ignore.argmax(axis=1) max_overlaps_ignore = overlaps_ignore[np.arange(len(inds_inside)), argmax_overlaps_ignore] # WZN: disable those that captures the ignore windows labels[max_overlaps_ignore >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1 # print 'labels = 0:, ', np.where(labels == 0) #all_inds = np.where(labels != -1) #labels_new = labels[all_inds] #zeros = np.zeros((labels_new.shape[0], 1), dtype=np.float32) anchors = np.array([0] * 5).astype( np.float32) # np.hstack((zeros, anchors[all_inds])).astype(np.float32) #anchors_3d = np.hstack((zeros, anchors_3d[all_inds])).astype(np.float32) #print 'time for targets: ', time.time()-t0 #t0 = time.time() if DEBUG: #_sums += bbox_targets[labels >= 1, :].sum(axis=0) #_squared_sums += (bbox_targets[labels >= 1, :] ** 2).sum(axis=0) _counts += np.sum(labels == 1) means = _sums / _counts stds = np.sqrt(_squared_sums / _counts - means**2) print('labels shape before unmap: ', labels.shape) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) if DEBUG: print('max_overlaps.shape: ', max_overlaps.shape) print('rpn: max max_overlap', np.max(max_overlaps)) max_100_ind = np.argsort(max_overlaps)[-50:] print('rpn: max 50 overlaps: ', max_overlaps[max_100_ind]) print('rpn: max 50 indices:', max_100_ind) print('rpn: num_positive', np.sum(labels >= 1)) print('rpn: num_negative', np.sum(labels == 0)) _fg_sum += np.sum(labels >= 1) _bg_sum += np.sum(labels == 0) _count += 1 print('rpn: num_positive avg', _fg_sum / _count) print('rpn: num_negative avg', _bg_sum / _count) #print 'fg inds: ', fg_inds print('label shape', labels.shape) # labels rpn_labels = labels if DEBUG: print('labels shape: ', labels.shape) #print 'time for unmap: ', time.time()-t0 #t0 = time.time() #print '--------------------' return rpn_labels, anchors # origin: anchors_3d, WZN: changed to rewards
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride=[ 16, ], anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1, 2]): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) _anchors = generate_anchors(ratios=anchor_ratios, scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape, [0, 3, 1, 2]) rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1]) #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1]) im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' # cfg_key = 'TRAIN' # cfg_key = str('TRAIN' if self.phase == 0 else 'TEST') cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :] bbox_deltas = rpn_bbox_pred #im_info = bottom[2].data[0, :] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # remove_option = 1 # if ('TEST' == cfg_key and remove_option in [1, 2]): # # get rid of boxes that are completely inside other boxes # # with options as to which one to get rid of # # 1. always the one with lower scores, 2. always the one inside # new_proposals = [] # removed_indices = set() # num_props = proposals.shape[0] # for i in range(num_props): # if (i in removed_indices): # continue # bxA = proposals[i, :] # for j in range(num_props): # if ((j == i) or (j in removed_indices)): # continue # bxB = proposals[j, :] # if (bbox_contains(bxA, bxB)): # if ((1 == remove_option) and (scores[i] != scores[j])): # if (scores[i] > scores[j]): # removed_indices.add(j) # else: # removed_indices.add(i) # else: # remove_option == 2 or scores[i] == scores[j] # removed_indices.add(j) # nr = len(removed_indices) # if (nr > 0): # new_proposals = sorted(set(range(num_props)) - removed_indices) # proposals = proposals[new_proposals, :] # scores = scores[new_proposals] # # padding to make the total number of proposals == post_nms_topN # proposals = np.vstack((proposals, [proposals[-1, :]] * nr)) # scores = np.vstack((scores, [scores[-1]] * nr)) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 # batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) # BUT we NOW (18-Sep-2017) abuse batch inds, and use it for carrying scores if ('TEST' == cfg_key): batch_inds = np.reshape(scores, [proposals.shape[0], 1]) else: batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) if (DEBUG): print(('blob shape: {0}'.format(blob.shape))) print(('proposal shape: {0}'.format(proposals.shape))) return blob
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride = [16,], anchor_scales = [16,]): """ Parameters ---------- rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg NOTICE: the old version is ordered by (1, H, W, 2, A) !!!! rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN im_info: a list of [image_height, image_width, scale_ratios] cfg_key: 'TRAIN' or 'TEST' _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2] # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) """ cfg_key=cfg_key.decode('ascii') _anchors = generate_anchors(scales=np.array(anchor_scales))#生成基本的9个anchor _num_anchors = _anchors.shape[0]#9个anchor im_info = im_info[0]#原始图像的高宽、缩放尺度 assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N#12000,在做nms之前,最多保留的候选box数目 post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N#2000,做完nms之后,最多保留的box的数目 nms_thresh = cfg[cfg_key].RPN_NMS_THRESH#nms用参数,阈值是0.7 min_size = cfg[cfg_key].RPN_MIN_SIZE#候选box的最小尺寸,目前是16,高宽均要大于16 #TODO 后期需要修改这个最小尺寸,改为8? height, width = rpn_cls_prob_reshape.shape[1:3]#feature-map的高宽 # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want # (1, H, W, A) scores = np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:,:,:,:,1], [1, height, width, _num_anchors]) #提取到object的分数,non-object的我们不关心 #并reshape到1*H*W*9 bbox_deltas = rpn_bbox_pred#模型输出的pred是相对值,需要进一步处理成真实图像中的坐标 #im_info = bottom[2].data[0, :] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox deltas and shifted anchors if DEBUG: print('score map size: {}'.format(scores.shape)) # Enumerate all shifts # 同anchor-target-layer-tf这个文件一样,生成anchor的shift,进一步得到整张图像上的所有anchor shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4))#这里得到的anchor就是整张图像上的所有anchor # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.reshape((-1, 4)) #(HxWxA, 4) # Same story for the scores: scores = scores.reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas)#做逆变换,得到box在图像上的真实坐标 # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2])#将所有的proposal修建一下,超出图像范围的将会被修剪掉 # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2])#移除那些proposal小于一定尺寸的proposal proposals = proposals[keep, :]#保留剩下的proposal scores = scores[keep] bbox_deltas=bbox_deltas[keep,:] # # remove irregular boxes, too fat too tall # keep = _filter_irregular_boxes(proposals) # proposals = proposals[keep, :] # scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1]#score按得分的高低进行排序 if pre_nms_topN > 0: #保留12000个proposal进去做nms order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] bbox_deltas=bbox_deltas[order,:] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh)#进行nms操作,保留2000个proposal if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] bbox_deltas=bbox_deltas[keep,:] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 blob = np.hstack((scores.astype(np.float32, copy=False), proposals.astype(np.float32, copy=False))) return blob,bbox_deltas
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride = [16,], anchor_scales = [4 ,8, 16, 32]): """ Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. """ # 生成基本anchor:(相当于特征图最左下角的滑动窗口生成的九个anchor在输入图片上的对应坐标位置) # array([[ -83., -39., 100., 56.], # [-175., -87., 192., 104.], # [-359., -183., 376., 200.], # [ -55., -55., 72., 72.], # [-119., -119., 136., 136.], # [-247., -247., 264., 264.], # [ -35., -79., 52., 96.], # [ -79., -167., 96., 184.], # [-167., -343., 184., 360.]]) _anchors = generate_anchors(scales=np.array(anchor_scales)) #9 _num_anchors = _anchors.shape[0] if DEBUG: print 'anchors:' print _anchors print 'anchor shapes:' # [[183. 95.] # [367. 191.] # [735. 383.] # [127. 127.] # [255. 255.] # [511. 511.] # [87. 175.] # [175. 351.] # [351. 703.]] print np.hstack(( # array([[183.], # [367.], # [735.], # [127.], # [255.], # [511.], # [87.], # [175.], # [351.]]) _anchors[:, 2::4] - _anchors[:, 0::4], # array([[95.], # [191.], # [383.], # [127.], # [255.], # [511.], # [175.], # [351.], # [703.]]) _anchors[:, 3::4] - _anchors[:, 1::4], )) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) _fg_sum = 0 _bg_sum = 0 _count = 0 # 允许boxes超出图片边界的范围,0表示不能超出图片边界 # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) #height, width = rpn_cls_score.shape[1:3] im_info = im_info[0] # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap # rpn_cls_score.shape=[1,height,width,depth] # TODO: 1代表一张图片? assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # rpn_cls_score.shape的第二位第三位分别存储高与宽 # rpn_cls_score.shape=[1,height,width,depth],按前提来看,depth应为18 # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] if DEBUG: print 'AnchorTargetLayer: height', height, 'width', width print '' print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) print 'height, width: ({}, {})'.format(height, width) print 'rpn: gt_boxes.shape', gt_boxes.shape print 'rpn: gt_boxes', gt_boxes # 1. Generate proposals from bbox deltas and shifted anchors # 该层对输入层的total stride为16,相当于在该层滑动1,在输入层滑动16个像素。 # shift包含着x或y方向上不同位置的每一个窗口所对应的anchors的偏移量 shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride # 将坐标向量转换为坐标矩阵,新的shift_x行向量为旧shift_x,有dim(shift_y)行,每一行是相同的,新的shift_y列向量为旧shift_y,有dim(shift_x)列,每一列是相同的 # 最后生成的shift_x, shift_y的形状都是width*height,即可以包含rpn_cls_score所有点的x和y方向上的偏移量 shift_x, shift_y = np.meshgrid(shift_x, shift_y) # numpy.ravel()多维数组降为一维,组合得到一个(width*height,4)的数组。ravel列举数据内的元素。 # 因为box坐标的表示为(Xmin,Ymin,Xmax,Ymax),当需要加上坐标偏移时,加上的偏移量的形式就应该是(shift_x, shift_y,shift_x, shift_y) # vstack垂直堆叠后的形式是列向量的(shift_x, shift_y,shift_x, shift_y),因此需要转置transpose shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors # 9 A = _num_anchors # weight*height K = shifts.shape[0] # (1, A, 4)与(K, 1, 4)的数组进行相加,得到(K, A, 4)数组,实验得证,每个(K, 1, 4)的4元素都依次与(1, A, 4)中的每一个4元素相加(numpy里array相加会自动拓展,按需通过重复某子元素拓展成(K,A,4)), # 最后得到(K, A, 4)数组,这样是合理的,因为_anchors中记录的是对用于左上角可视野的9个anchor的左上角坐标与右下角坐标的4个值, # 而shifts中记录width*height个可视野相对于左上角可视野的偏移量,两者相加可得到width*height*9个预测anchor的左上角与右下角坐标信息 all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image # 获取不超出边界的anchors的index inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print 'total_anchors', total_anchors print 'inds_inside', len(inds_inside) # keep only inside anchors # 保存不超出边界的anchors anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors.shape', anchors.shape # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) # 返回[N,K]矩阵,记录每一个anchors和gt框的IoU。N为anchors数量,K为gt框数量。 overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) # 记录每个anchor对应的最大IoU的index(0到K-1),为每一个anchor找到与其重叠最好的GT argmax_overlaps = overlaps.argmax(axis=1) # 记录每个anchor对应的最大IoU的值 max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] # 记录和gt框IoU最高的anchors的index,为每一个GT找到与其重叠最好的一个anchor。当有多个anchor与GT框的IoU同时取到最大值时,只返回第一个的anchor的index gt_argmax_overlaps = overlaps.argmax(axis=0) # 记录和每一个gt框有最大IoU值的anchors的IoU gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] # np.where返回的是一个tuple,tuple里元素为array。在这个情况中,tuple里存着两个array,第一个array指明axis=0时的index,第二个array指明axis=1时的index。故用[0]取array,表明anchor的index # 上面所求的gt_argmax_overlaps只能指定第一个有最大IoU的anchor位置,当有多个anchor的IoU同时取到最大值时,是不能同时取到这几个anchors的。 # 通过以下这个方法,获取到全部这些anchors。 gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # PRN网络微调训练 # 正样本:与Ground Truth相交IoU最大的anchors【以防后一种方式下没有正样本】+与Ground Truth相交IoU>0.7的anchors # 负样本:与Ground Truth相交IoU<0.3的anchors if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them # labels与max_overlaps都是[(len(inds_inside), 1]形状的,下一行代码表示将最大IoU小于TRAIN.RPN_NEGATIVE_OVERLAP的对应的anchors的labels设置为0 labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap # 为了防止出现空的正样本,将与Ground Truth相交IoU最大的anchors设置为正样本 labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU # 与Ground Truth相交IoU>TRAIN.RPN_POSITIVE_OVERLAP的anchors labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 # 这个参数就是看positive与negative谁比较强,先设置0说明positive强,因为0可能转1,而后设置0说明negative强,设置完1还可以设置成0 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many # SGD mini - batch采样方式: # 同Fast R - CNN网络,采取”image - centric”方式采样,即采用层次采样,先对图像取样,再对anchors取样,同一图像的anchors共享计算和内存。 # 每个mini-batch包含从一张图中随机提取的256个anchors,正负样本比例为1: 1【当然可以对一张图所有anchors进行优化,但由于负样本过多最终模型会对正样本预测准确率很低】 # 来计算一个mini-batch的损失函数,如果一张图中不够128个正样本,拿负样本凑齐。 # 这里默认下,TRAIN.RPN_FG_FRACTION为0.5,表示正样本占据TRAIN.RPN_BATCHSIZE的一半,即正负样本比例为1:1 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: # choice : Generates a random sample from a given 1-D array # 从正样本中随机选出多余的部分,将他们的labels设置为-1(不关心) disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #print "was %s inds, disabling %s, now %s inds" % ( #len(bg_inds), len(disable_inds), np.sum(labels == 0)) bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) # anchors : 不超出边界的anchors # argmax_overlaps:为每一个anchor找到与其IoU最大的gt,其shape为[1,len(anchors)] # gt_boxes[argmax_overlaps, :]的shape为[len(anchors),5] # 返回每一个anchor的(targets_dx, targets_dy, targets_dw, targets_dh) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: _sums += bbox_targets[labels == 1, :].sum(axis=0) _squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0) _counts += np.sum(labels == 1) means = _sums / _counts stds = np.sqrt(_squared_sums / _counts - means ** 2) print 'means:' print means print 'stdevs:' print stds # map up to original set of anchors # 之后可能还会用到超出图片边界的anchors的信息,所以对labels信息进行扩充,添加进去了第一次筛选出的anchor的标签(都为-1) labels = _unmap(labels, total_anchors, inds_inside, fill=-1) # 对bbox_targets信息进行扩充,将超出图片边界的anchors的reg信息添加进来,但是都设置为0 bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) if DEBUG: print 'rpn: max max_overlap', np.max(max_overlaps) print 'rpn: num_positive', np.sum(labels == 1) print 'rpn: num_negative', np.sum(labels == 0) _fg_sum += np.sum(labels == 1) _bg_sum += np.sum(labels == 0) _count += 1 print 'rpn: num_positive avg', _fg_sum / _count print 'rpn: num_negative avg', _bg_sum / _count # labels #pdb.set_trace() # height和weight为特征图里的相应位置 # 由之前anchor产生可知,anchor产生的排序方式与卷积的顺序相同,一行一行的出,每个位置产生9个anchor # NOTE:由于越往后信息归类越精确,所以labels.reshape((1, height, width, A))顺序正常的 # 之后transpose(0, 3, 1, 2),此时最精确信息为width,此时以width信息进行fastest聚类 # [1,A,height,weight] labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #assert bbox_inside_weights.shape[2] == height #assert bbox_inside_weights.shape[3] == width rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #assert bbox_outside_weights.shape[2] == height #assert bbox_outside_weights.shape[3] == width rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights
def _load_pascal_annotation(self, index): """ Load image and bounding boxes info from XML file in the PASCAL VOC format. """ filename = os.path.join(self._data_path, 'Annotations', index + '.xml') # print 'Loading: {}'.format(filename) def get_data_from_tag(node, tag): return node.getElementsByTagName(tag)[0].childNodes[0].data with open(filename) as f: data = minidom.parseString(f.read()) objs = data.getElementsByTagName('object') num_objs = len(objs) boxes = np.zeros((num_objs, 4), dtype=np.uint16) gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) # Load object bounding boxes into a data frame. for ix, obj in enumerate(objs): # Make pixel indexes 0-based x1 = float(get_data_from_tag(obj, 'xmin')) - 1 y1 = float(get_data_from_tag(obj, 'ymin')) - 1 x2 = float(get_data_from_tag(obj, 'xmax')) - 1 y2 = float(get_data_from_tag(obj, 'ymax')) - 1 name = str(get_data_from_tag(obj, "name")).lower().strip() if name in self._classes: cls = self._class_to_ind[name] else: cls = 0 boxes[ix, :] = [x1, y1, x2, y2] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 overlaps = scipy.sparse.csr_matrix(overlaps) gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal anchors = generate_anchors() num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }
def _load_imagenet3d_annotation(self, index): """ Load image and bounding boxes info from txt file in the imagenet3d format. """ if self._image_set == 'test' or self._image_set == 'test_1' or self._image_set == 'test_2': lines = [] else: filename = os.path.join(self._imagenet3d_path, 'Labels', index + '.txt') lines = [] with open(filename) as f: for line in f: lines.append(line) num_objs = len(lines) boxes = np.zeros((num_objs, 4), dtype=np.float32) viewpoints = np.zeros((num_objs, 3), dtype=np.float32) # azimuth, elevation, in-plane rotation viewpoints_flipped = np.zeros((num_objs, 3), dtype=np.float32) # azimuth, elevation, in-plane rotation gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) for ix, line in enumerate(lines): words = line.split() assert len(words) == 5 or len(words) == 8, 'Wrong label format: {}'.format(index) cls = self._class_to_ind[words[0]] boxes[ix, :] = [float(n) for n in words[1:5]] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 if len(words) == 8: viewpoints[ix, :] = [float(n) for n in words[5:8]] # flip the viewpoint viewpoints_flipped[ix, 0] = -viewpoints[ix, 0] # azimuth viewpoints_flipped[ix, 1] = viewpoints[ix, 1] # elevation viewpoints_flipped[ix, 2] = -viewpoints[ix, 2] # in-plane rotation else: viewpoints[ix, :] = np.inf viewpoints_flipped[ix, :] = np.inf gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) viewindexes_azimuth = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_azimuth_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_elevation = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_elevation_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_rotation = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_rotation_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) overlaps = scipy.sparse.csr_matrix(overlaps) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) viewindexes_azimuth = scipy.sparse.csr_matrix(viewindexes_azimuth) viewindexes_azimuth_flipped = scipy.sparse.csr_matrix(viewindexes_azimuth_flipped) viewindexes_elevation = scipy.sparse.csr_matrix(viewindexes_elevation) viewindexes_elevation_flipped = scipy.sparse.csr_matrix(viewindexes_elevation_flipped) viewindexes_rotation = scipy.sparse.csr_matrix(viewindexes_rotation) viewindexes_rotation_flipped = scipy.sparse.csr_matrix(viewindexes_rotation_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes_all == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal base_size = 16 ratios = cfg.TRAIN.RPN_ASPECTS scales = cfg.TRAIN.RPN_SCALES anchors = generate_anchors(base_size, ratios, scales) num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis = 0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend(np.where((gt_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len(np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len(np.where(gt_classes[fg_inds] == i)[0]) return {'boxes' : boxes, 'gt_classes': gt_classes, 'gt_viewpoints': viewpoints, 'gt_viewpoints_flipped': viewpoints_flipped, 'gt_viewindexes_azimuth': viewindexes_azimuth, 'gt_viewindexes_azimuth_flipped': viewindexes_azimuth_flipped, 'gt_viewindexes_elevation': viewindexes_elevation, 'gt_viewindexes_elevation_flipped': viewindexes_elevation_flipped, 'gt_viewindexes_rotation': viewindexes_rotation, 'gt_viewindexes_rotation_flipped': viewindexes_rotation_flipped, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps' : overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped' : False}
def _load_pascal3d_voxel_exemplar_annotation(self, index): """ Load image and bounding boxes info from txt file in the pascal subcategory exemplar format. """ if self._image_set == 'val': return self._load_pascal_annotation(index) filename = os.path.join(self._pascal3d_path, cfg.SUBCLS_NAME, index + '.txt') assert os.path.exists(filename), \ 'Path does not exist: {}'.format(filename) # the annotation file contains flipped objects lines = [] lines_flipped = [] with open(filename) as f: for line in f: words = line.split() subcls = int(words[1]) is_flip = int(words[2]) if subcls != -1: if is_flip == 0: lines.append(line) else: lines_flipped.append(line) num_objs = len(lines) # store information of flipped objects assert (num_objs == len(lines_flipped) ), 'The number of flipped objects is not the same!' gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) for ix, line in enumerate(lines_flipped): words = line.split() subcls = int(words[1]) gt_subclasses_flipped[ix] = subcls boxes = np.zeros((num_objs, 4), dtype=np.float32) gt_classes = np.zeros((num_objs), dtype=np.int32) gt_subclasses = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) for ix, line in enumerate(lines): words = line.split() cls = self._class_to_ind[words[0]] subcls = int(words[1]) # Make pixel indexes 0-based boxes[ix, :] = [float(n) - 1 for n in words[3:7]] gt_classes[ix] = cls gt_subclasses[ix] = subcls overlaps[ix, cls] = 1.0 subindexes[ix, cls] = subcls subindexes_flipped[ix, cls] = gt_subclasses_flipped[ix] overlaps = scipy.sparse.csr_matrix(overlaps) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal base_size = 16 ratios = [3.0, 2.0, 1.5, 1.0, 0.75, 0.5, 0.25] scales = 2**np.arange(1, 6, 0.5) anchors = generate_anchors(base_size, ratios, scales) num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride=[ 16, ], anchor_scales=[4, 8, 16, 32]): """ Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. """ _anchors = generate_anchors(scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] if DEBUG: print('anchors:') print(_anchors) print('anchor shapes:') print( np.hstack(( _anchors[:, 2::4] - _anchors[:, 0::4], _anchors[:, 3::4] - _anchors[:, 1::4], ))) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) _fg_sum = 0 _bg_sum = 0 _count = 0 # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) #height, width = rpn_cls_score.shape[1:3] im_info = im_info[0] # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] if DEBUG: print('AnchorTargetLayer: height', height, 'width', width) print('') print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) print('height, width: ({}, {})'.format(height, width)) print('rpn: gt_boxes.shape', gt_boxes.shape) print('rpn: gt_boxes', gt_boxes) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print('total_anchors', total_anchors) print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print('anchors.shape', anchors.shape) # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #print "was %s inds, disabling %s, now %s inds" % ( #len(bg_inds), len(disable_inds), np.sum(labels == 0)) bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: _sums += bbox_targets[labels == 1, :].sum(axis=0) _squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts += np.sum(labels == 1) means = _sums / _counts stds = np.sqrt(_squared_sums / _counts - means**2) print('means:') print(means) print('stdevs:') print(stds) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) if DEBUG: print('rpn: max max_overlap', np.max(max_overlaps)) print('rpn: num_positive', np.sum(labels == 1)) print('rpn: num_negative', np.sum(labels == 0)) _fg_sum += np.sum(labels == 1) _bg_sum += np.sum(labels == 0) _count += 1 print('rpn: num_positive avg', _fg_sum / _count) print('rpn: num_negative avg', _bg_sum / _count) # labels #pdb.set_trace() labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #assert bbox_inside_weights.shape[2] == height #assert bbox_inside_weights.shape[3] == width rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #assert bbox_outside_weights.shape[2] == height #assert bbox_outside_weights.shape[3] == width rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights