def create_roidb_from_box_list(self, box_list, gt_roidb): assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' roidb = [] for i in range(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] overlaps = scipy.sparse.csr_matrix(overlaps) roidb.append({ 'boxes': boxes, 'gt_classes': np.zeros((num_boxes,), dtype=np.int32), 'gt_overlaps': overlaps, 'flipped': False, 'seg_areas': np.zeros((num_boxes,), dtype=np.float32), }) return roidb
def _compute_targets(rois, overlaps, labels): """Compute bounding-box regression targets for an image.""" # Indices of ground-truth ROIs gt_inds = np.where(overlaps == 1)[0] if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return np.zeros((rois.shape[0], 5), dtype=np.float32) # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = bbox_overlaps( np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] targets = np.zeros((rois.shape[0], 5), dtype=np.float32) targets[ex_inds, 0] = labels[ex_inds] targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) return targets
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] if bg_inds.size == 0: bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= 0))[0] # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.size > 0 and bg_inds.size > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.size) fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False) bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.size < bg_rois_per_image bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace) elif fg_inds.size > 0: to_replace = fg_inds.size < rois_per_image fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = rois_per_image elif bg_inds.size > 0: to_replace = bg_inds.size < rois_per_image bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = 0 else: # print("bg_inds is 0") # bg_inds = np.zeros(shape=[1,],dtype=np.int32) import pdb pdb.set_trace() # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_image):] = 0 rois = all_rois[keep_inds] roi_scores = all_scores[keep_inds] bbox_target_data = _compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) # notice: # the shape of overlap is N,K. And N is the number of proposal(include pred and gt), K is the number of gt overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size)) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[fg_rois_per_this_image:] = 0 rois = all_rois[keep_inds] bbox_target_data = _compute_targets(rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, bbox_targets, bbox_inside_weights
def create_roidb_from_box_list(self, box_list, gt_roidb): # 通过 gt_roidb产生 roidb assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' #每张图片对应一个 box_list 一个 box_list 有好几个 box 多物体识别 roidb = [] for i in range(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] #每个box_list的个数 overlaps = np.zeros( (num_boxes, self.num_classes), dtype=np.float32) #计算每个 box 与 每个ground_truth 的重叠面积 if gt_roidb is not None and gt_roidb[i][ 'boxes'].size > 0: #groudth 存在 且 box 有尺寸 gt_boxes = gt_roidb[i]['boxes'] #将 赋值给roidb gt_classes = gt_roidb[i]['gt_classes'] # 类别赋值 gt_overlaps = bbox_overlaps( boxes.astype( np.float ), #roidb 是在 gt_roidb 基础上 与初始化的 anchor 形成的 overlaps gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] overlaps = scipy.sparse.csr_matrix(overlaps) roidb.append({ 'boxes': boxes, 'gt_classes': np.zeros((num_boxes, ), dtype=np.int32), 'gt_overlaps': overlaps, 'flipped': False, 'seg_areas': np.zeros((num_boxes, ), dtype=np.float32), }) return roidb
def create_roidb_from_box_list(self, box_list, gt_roidb): assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' roidb = [] for i in range(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax( axis=1) #返回的是计算每个anchor和哪个GTobject的IOU最大,这个是相对GT的索引 maxes = gt_overlaps.max( axis=1) #返回的是计算每个anchor和GTobject的IOU最大值 I = np.where(maxes > 0)[0] #返回max>0时,对应的索引,这个索引也是相对anchor的索引 overlaps[I, gt_classes[argmaxes[I]]] = maxes[ I] #[anchorid,Classid[GTid]]=maxiOU #对于那些零元素数目远远多于非零元素数目,并且非零元素的分布没有规律的矩阵称为稀疏矩阵(sparse)。仅存储非零元素可使矩阵操作效率更高。也就是稀疏矩阵的计算速度更快 #csr_matrix,创建稀疏矩阵 overlaps = scipy.sparse.csr_matrix(overlaps) roidb.append({ 'boxes': boxes, 'gt_classes': np.zeros((num_boxes, ), dtype=np.int32), 'gt_overlaps': overlaps, 'flipped': False, 'seg_areas': np.zeros((num_boxes, ), dtype=np.float32), }) return roidb
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): """Same as the anchor target layer in original Fast/er RCNN """ # 每个像素点对应anchors 个数 A = num_anchors # 总共anchors个数 total_anchors = all_anchors.shape[0] # 总共像素点个数 K = total_anchors / num_anchors # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) # feature map的高和宽 height, width = rpn_cls_score.shape[1:3] # only keep anchors inside the image # 获取所有在图片尺寸内的anchors的indexes inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] # keep only inside anchors # 只保留在图片内的anchors anchors = all_anchors[inds_inside, :] """ label: 1 is positive, 0 is negative, -1 is dont care labels用来保存anchors的标签,1代表是正标签,即该anchor是正样本,即存在ground truth 和该anchor的IoU满足rpn正样本的要求;0代表负样本 ;-1代表不关心该样本。正样本指当做前景物体的boxes,负样本指代表背景的boxes。 关于rpn的正负样本的划分,如下描述: 考察训练集中的每张图像(含有人工标定的ground true box)的所有anchors。 a. 对每个标定的ground true box区域,与其重叠比例最大的anchor记为 正样本 (保证每个ground true 至少对应一个正样本anchor) b. 对a)剩余的anchor,如果其与某个标定区域重叠比例大于0.7,记为正样本(每个ground true box可能会对应多个正样本anchor。但每个正样本 anchor 只可能对应一个ground true box);如果其与任意一个标定的重叠比例都小于0.3,记为负样本。 c. 对a),b)剩余的anchor,弃去不用。 d. 跨越图像边界的anchor弃去不用 """ labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) """ overlaps between the anchors and the gt boxes overlaps (ex, gt) 计算anchors boxes和groud_truth boxes的IoU,overlaps为二维,每行为一个anchors与所有gt boxes之间IoU大小。 假设overlaps值如下: array([[0.48557798, 0.12723372, 0.22120988, 0.93015104], [0.41377397, 0.91378704, 0.10855037, 0.19918476], [0.86859926, 0.21069385, 0.48224216, 0.63019202], [0.38623382, 0.62772807, 0.91166306, 0.91971408], [0.25904543, 0.31076955, 0.5593479 , 0.41681275], [0.58562328, 0.18718799, 0.28859296, 0.57199318], [0.35817963, 0.94432939, 0.80717617, 0.84104114], [0.17751499, 0.42397581, 0.40260994, 0.10210093]]) shape为(8, 4),即一副图片总共有8个anchors boxes和4个ground truth boxes。 """ overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) """ 找到每个anchors boxes IoU最大的gt boxes argmax_overlaps为array([3, 1, 0, 3, 2, 0, 1, 1]) """ argmax_overlaps = overlaps.argmax(axis=1) """ 每行最大的IoU组成max_overlaps数组 max_overlaps为: array([0.93015104, 0.91378704, 0.86859926, 0.91971408, 0.5593479 , 0.58562328, 0.94432939, 0.42397581]) """ max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] """ 找到每个gt boxes IoU最大的anchors boxes gt_argmax_overlaps为: array([2, 6, 3, 0]) """ gt_argmax_overlaps = overlaps.argmax(axis=0) """ 每列最大的IoU组成gt_max_overlaps数组 gt_max_overlaps为: array([0.86859926, 0.94432939, 0.91166306, 0.93015104]) """ gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] """ 根据上述正样本划分的条件a,所以每个ground truth box至少得到一个anchor,gt_argmax_overlaps就是行号,就是对应的anchor,为: array([0, 2, 3, 6])。 """ gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: """ assign bg labels first so that positive labels can clobber them first set the negatives 如果某个anchor最大IoU小于negative阈值,则先设置为负样本。 """ labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 """ fg label: for each gt, anchor with highest overlap 满足条件a的anchors的labels设置为1。 """ labels[gt_argmax_overlaps] = 1 """ fg label: above threshold IOU 根据上述正样本划分的条件b,剩余的anchors中,如果存在gt boxes与它的IoU大于阈值,则是正样本。 """ labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 """ subsample positive labels if we have too many 如果正样本个数超过限定值则随机选择特定个。 """ num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 """ subsample negative labels if we have too many 如果负样本个数超过限定值,则随机选择特定个。 """ num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) """ bbox_targets是anchors boxes和ground truth的偏移量,偏移量见lib.model.bbox_transorm.bbox_transform_inv_tf函数解释。 该偏移量就是rpn的Bounding-box regression学习的目标。 """ bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # only the positive ones have regression targets bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights # map up to original set of anchors """ 因为前边把不在图片尺寸内的anchors给删除了,这里再把labels扩充到原始anchors的个数,用-1来扩充。 后边bbox_targets、bbox_inside_weights、bbox_outside_weights同理。 """ labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) """ 假设labels为: array([0, 1, 0, 1, 0, 1, 0, 1]) 假设feature map有两行两列,每个像素点有两个anchors。 经过转置和reshape,rpn_labels为: array([[[[0, 0], ---feature map第一行两个像素点对应的第一个anchor的label [0, 0], ---feature map第二行两个像素点对应的第一个anchor的label [1, 1], ---feature map第一行两个像素点对应的第二个anchor的label [1, 1]]]]) ---feature map第二行两个像素点对应的第二个anchor的label shape为(1, 4, 2, 2) """ labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def prepare_roidb(imdb): """Enrich the imdb's roidb by adding some derived quantities that are useful for training. This function precomputes the maximum overlap, taken over ground-truth boxes, between each ROI and each ground-truth box. The class with maximum overlap is also recorded. """ cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: imdb._roidb = cPickle.load(fid) print '{} gt roidb prepared loaded from {}'.format( imdb.name, cache_file) return roidb = imdb.roidb for i in xrange(len(imdb.image_index)): roidb[i]['image'] = imdb.image_path_at(i) boxes = roidb[i]['boxes'] labels = roidb[i]['gt_classes'] info_boxes = np.zeros((0, 18), dtype=np.float32) if boxes.shape[0] == 0: roidb[i]['info_boxes'] = info_boxes continue # compute grid boxes s = PIL.Image.open(imdb.image_path_at(i)).size image_height = s[1] image_width = s[0] boxes_grid, cx, cy = get_boxes_grid(image_height, image_width) # for each scale for scale_ind, scale in enumerate(cfg.TRAIN.SCALES): boxes_rescaled = boxes * scale # compute overlap overlaps = bbox_overlaps(boxes_grid.astype(np.float), boxes_rescaled.astype(np.float)) max_overlaps = overlaps.max(axis=1) argmax_overlaps = overlaps.argmax(axis=1) max_classes = labels[argmax_overlaps] # select positive boxes fg_inds = [] for k in xrange(1, imdb.num_classes): fg_inds.extend( np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0]) if len(fg_inds) > 0: gt_inds = argmax_overlaps[fg_inds] # bounding box regression targets gt_targets = _compute_targets(boxes_grid[fg_inds, :], boxes_rescaled[gt_inds, :]) # scale mapping for RoI pooling scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind] scale_map = cfg.TRAIN.SCALES[scale_ind_map] # contruct the list of positive boxes # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target) info_box = np.zeros((len(fg_inds), 18), dtype=np.float32) info_box[:, 0] = cx[fg_inds] info_box[:, 1] = cy[fg_inds] info_box[:, 2] = scale_ind info_box[:, 3:7] = boxes_grid[fg_inds, :] info_box[:, 7] = scale_ind_map info_box[:, 8:12] = boxes_grid[fg_inds, :] * scale_map / scale info_box[:, 12] = labels[gt_inds] info_box[:, 14:] = gt_targets info_boxes = np.vstack((info_boxes, info_box)) roidb[i]['info_boxes'] = info_boxes with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote gt roidb prepared to {}'.format(cache_file)
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): """Same as the anchor target layer in original Fast/er RCNN """ A = num_anchors total_anchors = all_anchors.shape[0] K = total_anchors / num_anchors im_info = im_info[0] # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] # only keep anchors inside the image # 2 inds_inside = 所有的anchor中x1,y1,x2,y2没有超过图像边界的。 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care # labels 字段的长度就是合法的anchor的个数 # 先用-1填充labels labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes , # 3 根据预设阈值和overlap重叠率,打上前背景标签1|0 # bbox_overlaps ()计算anchors与gt_boxes之间的重合度IOU,大于0.7标记为前景图,小于0.3标记为背景图; # 返回类型(n,k),即第n个anchors与第K个gt_boxes的IOU重合度值 # overlaps (ex, gt) overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) # 4 对于每个anchor,找到与gt_box坐标的IOU的最大值,即找到每个anchors最大重叠率的gt_boxes。 argmax_overlaps = overlaps.argmax(axis=1) # ? # max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] # ? # anchors与gt_boxes最大IoU gt_argmax_overlaps = overlaps.argmax(axis=0) # ? # gt_max_overlaps = overlaps[gt_argmax_overlaps,np.arange(overlaps.shape[1])] # ? # gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]#再次对于每个gt_box,找到对应的最大overlap的anchor。shape[len(gt_boxes),] if not cfg.FLAGS.rpn_clobber_positives: # assign bg labels first so that positive labels can clobber them # first set the negatives , 在这里将anchors与gt_boxes最大IoU仍然小于阈值(0.3)的某些anchor置0 labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU , 在这里将anchors与gt_boxes最大IoU大于阈值(0.7)的某些anchor置1 labels[max_overlaps >= cfg.FLAGS.rpn_positive_overlap] = 1 if cfg.FLAGS.rpn_clobber_positives: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0 # 5 随机抛弃一些前景anchor和背景anchors. 二次采样 # subsample positive labels if we have too many num_fg = int(cfg.FLAGS.rpn_fg_fraction * cfg.FLAGS.rpn_batchsize) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 #如果事实存在的前景anchor大于了所需值,就随机抛弃一些前景anchor # subsample negative labels if we have too many num_bg = cfg.FLAGS.rpn_batchsize - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 # 6 使用bbox_transform函数,计算每个anchor与最大的overlap的gt_boxes的框偏移量, # 作为位移量的标签值(tx,ty,th,tw)用于后续框回归 # bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) # 对每个在原图内部的anchor,用全0初始化坐标变换值 bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) # bbox_inside_weights和bbox_outside_weights,这两个数组在训练anchor边框修正时有重大作用 #7 在进行边框修正loss的计算时,只有前景anchor会起作用,可以看到这是bbox_inside_weights和bbox_outside_weights在实现。 # 非前景和背景anchor对应的bbox_inside_weights和bbox_outside_weights都为0 @ https://blog.csdn.net/u012426298/article/details/81517609 bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # only the positive ones have regression targets bbox_inside_weights[labels == 1, :] = np.array(cfg.FLAGS2["bbox_inside_weights"]) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.FLAGS.rpn_positive_weight < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.FLAGS.rpn_positive_weight > 0) & (cfg.FLAGS.rpn_positive_weight < 1)) positive_weights = (cfg.FLAGS.rpn_positive_weight / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.FLAGS.rpn_positive_weight) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights # map up to original set of anchors # 8.统一所有的标签,并转化标签labels的格式后,返回 labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) # ? # bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # ? # # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def evaluate_recall(self, candidate_boxes=None, thresholds=None, area='all', limit=None): """Evaluate detection proposal recall metrics. Returns: results: dictionary of results with keys 'ar': average recall 'recalls': vector recalls at each IoU overlap threshold 'thresholds': vector of IoU overlap thresholds 'gt_overlaps': vector of all ground-truth overlaps """ # Record max overlap value for each gt box # Return vector of overlap values areas = {'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7} area_ranges = [[0 ** 2, 1e5 ** 2], # all [0 ** 2, 32 ** 2], # small [32 ** 2, 96 ** 2], # medium [96 ** 2, 1e5 ** 2], # large [96 ** 2, 128 ** 2], # 96-128 [128 ** 2, 256 ** 2], # 128-256 [256 ** 2, 512 ** 2], # 256-512 [512 ** 2, 1e5 ** 2], # 512-inf ] assert area in areas, 'unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for i in range(self.num_images): # Checking for max_overlaps == 1 avoids including crowd annotations # (...pretty hacking :/) max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1) gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] gt_boxes = self.roidb[i]['boxes'][gt_inds, :] gt_areas = self.roidb[i]['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) if candidate_boxes is None: # If candidate_boxes is not supplied, the default is to use the # non-ground-truth boxes from this roidb non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0] boxes = self.roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in range(gt_boxes.shape[0]): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert (gt_ovr >= 0) # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert (_gt_overlaps[j] == gt_ovr) # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps}
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): # 生成前景和背景roi """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap # 选取前景roi,与IOU最大的GT的IOU超过0.5,被认为前景 fg_inds = np.where(max_overlaps >= cfg.FLAGS.roi_fg_threshold)[0] # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) # 选取背景roi,与IOU最大的GT的IOU低于0.5,高于0.1,标记为背景 bg_inds = np.where((max_overlaps < cfg.FLAGS.roi_bg_threshold_high) & (max_overlaps >= cfg.FLAGS.roi_bg_threshold_low))[0] # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.size > 0 and bg_inds.size > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.size) fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False) bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.size < bg_rois_per_image bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace) elif fg_inds.size > 0: to_replace = fg_inds.size < rois_per_image fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = rois_per_image elif bg_inds.size > 0: to_replace = bg_inds.size < rois_per_image bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = 0 else: import pdb pdb.set_trace() # The indices that we're selecting (both fg and bg) # 保留的roi索引 keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: # 取roi标签 labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 # 将被标记为背景的roi的标签置为0 labels[int(fg_rois_per_image):] = 0 rois = all_rois[keep_inds] roi_scores = all_scores[keep_inds] # bbox的rcnn回归目标值 bbox_target_data = _compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) # bbox_inside_weights:这个是干啥的?每个box的损失权重? bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): """Same as the anchor target layer in original Faster RCNN """ A = num_anchors total_anchors = all_anchors.shape[0] K = total_anchors / num_anchors im_info = im_info[0] # allow boxes to sit over the edge by a small amount # 是否允许anchor box越过图片边界 _allowed_border = 0 # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] # only keep anchors inside the image # 只保留在图内的anchor inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] # keep only inside anchors # 仅保留界内的anchor box anchors = all_anchors[inds_inside, :] # 为anchor box设置标签 # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # 计算anchor和gt的iou # overlaps between the anchors and the gt boxes # overlaps (ex, gt) # 计算出所有anchor和gt_boxes的IOU overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) print('anchors iou with gt-boxes shape:', overlaps) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.FLAGS.rpn_clobber_positives: # assign bg labels first so that positive labels can clobber them # first set the negatives # 先设置负例bbox,与任意gt的iou小于0.3均为负例(背景) labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0 # fg label: for each gt, anchor with highest overlap # 1、是任意gt的最大iou 的bbox,均设为正例(前景) labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU # 2、与任意gt的iou大于0.7,均设为正例(前景) labels[max_overlaps >= cfg.FLAGS.rpn_positive_overlap] = 1 if cfg.FLAGS.rpn_clobber_positives: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0 # subsample positive labels if we have too many # 我们最终给出的roi数量是256,其中正例不允许超过50%,如果超出,随机将一部分设为不关心 num_fg = int(cfg.FLAGS.rpn_fg_fraction * cfg.FLAGS.rpn_batchsize) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many # 负例数量 = 256-正例数量,如果超出,随机去掉多余的 num_bg = cfg.FLAGS.rpn_batchsize - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 # TODO 这是为bbox设置回归目标值! 有空看下目标值到底是不是偏移量 bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # only the positive ones have regression targets bbox_inside_weights[labels == 1, :] = np.array( cfg.FLAGS2["bbox_inside_weights"]) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.FLAGS.rpn_positive_weight < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.FLAGS.rpn_positive_weight > 0) & (cfg.FLAGS.rpn_positive_weight < 1)) positive_weights = (cfg.FLAGS.rpn_positive_weight / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.FLAGS.rpn_positive_weight) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels # 对labels reshape labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets # 对回归值 reshape bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): #rpn_cls score 是0 或者1 """Same as the anchor target layer in original Fast/er RCNN """ A = num_anchors total_anchors = all_anchors.shape[0] K = total_anchors / num_anchors # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] #获取map大小 # only keep anchors inside the image #保证 xmin>0 ymin>0 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width bolb (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) #labels 初始化为-1 labels.fill(-1) # overlaps between the anchors and the gt boxes #此刻终于找到 overlaps 的 函数 此刻 两这结合起来碰撞出火花 # overlaps (ex, gt) overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax( axis=0) #那一列大值 一般是 与gt 所围成的面积的最大值 的索引标签给1 gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them # first set the negatives labels[ max_overlaps < cfg.TRAIN. RPN_NEGATIVE_OVERLAP] = 0 #每个anchor的最大max_ovlaps<0.3的话 肯定labels 为背景 设为0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 #前景全部设为1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN. RPN_POSITIVE_OVERLAP] = 1 #IOU >= thresh: positive example IOU>0.7 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many cfg.TRAIN.RPN_BATCHSIZE=256 num_fg = int( cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE ) # Max number of foreground examples __C.TRAIN.RPN_FG_FRACTION = 0.5 Total number of examples fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # only the positive ones have regression targets bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def _load_rpn_roidb(self, gt_roidb, model): box_list = [] for ix, index in enumerate(self.image_index): filename = os.path.join(self._imagenet3d_path, 'region_proposals', model, index + '.txt') assert os.path.exists(filename), \ '{} data not found at: {}'.format(model, filename) raw_data = np.loadtxt(filename, dtype=float) if len(raw_data.shape) == 1: if raw_data.size == 0: raw_data = raw_data.reshape((0, 5)) else: raw_data = raw_data.reshape((1, 5)) if model == 'selective_search' or model == 'mcg': x1 = raw_data[:, 1].copy() y1 = raw_data[:, 0].copy() x2 = raw_data[:, 3].copy() y2 = raw_data[:, 2].copy() elif model == 'edge_boxes': x1 = raw_data[:, 0].copy() y1 = raw_data[:, 1].copy() x2 = raw_data[:, 2].copy() + raw_data[:, 0].copy() y2 = raw_data[:, 3].copy() + raw_data[:, 1].copy() elif model == 'rpn_caffenet' or model == 'rpn_vgg16': x1 = raw_data[:, 0].copy() y1 = raw_data[:, 1].copy() x2 = raw_data[:, 2].copy() y2 = raw_data[:, 3].copy() else: assert 1, 'region proposal not supported: {}'.format(model) inds = np.where((x2 > x1) & (y2 > y1))[0] raw_data[:, 0] = x1 raw_data[:, 1] = y1 raw_data[:, 2] = x2 raw_data[:, 3] = y2 raw_data = raw_data[inds, :4] self._num_boxes_proposal += raw_data.shape[0] box_list.append(raw_data) print 'load {}: {}'.format(model, index) if gt_roidb is not None: # compute overlaps between region proposals and gt boxes boxes = gt_roidb[ix]['boxes'].copy() gt_classes = gt_roidb[ix]['gt_classes'].copy() # compute overlap overlaps = bbox_overlaps(raw_data.astype(np.float), boxes.astype(np.float)) # check how many gt boxes are covered by anchors if raw_data.shape[0] != 0: max_overlaps = overlaps.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return self.create_roidb_from_box_list(box_list, gt_roidb)
def _load_imagenet3d_annotation(self, index): """ Load image and bounding boxes info from txt file in the imagenet3d format. """ if self._image_set == 'test' or self._image_set == 'test_1' or self._image_set == 'test_2': lines = [] else: filename = os.path.join(self._imagenet3d_path, 'Labels', index + '.txt') lines = [] with open(filename) as f: for line in f: lines.append(line) num_objs = len(lines) boxes = np.zeros((num_objs, 4), dtype=np.float32) viewpoints = np.zeros( (num_objs, 3), dtype=np.float32) # azimuth, elevation, in-plane rotation viewpoints_flipped = np.zeros( (num_objs, 3), dtype=np.float32) # azimuth, elevation, in-plane rotation gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) for ix, line in enumerate(lines): words = line.split() assert len(words) == 5 or len( words) == 8, 'Wrong label format: {}'.format(index) cls = self._class_to_ind[words[0]] boxes[ix, :] = [float(n) for n in words[1:5]] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 if len(words) == 8: viewpoints[ix, :] = [float(n) for n in words[5:8]] # flip the viewpoint viewpoints_flipped[ix, 0] = -viewpoints[ix, 0] # azimuth viewpoints_flipped[ix, 1] = viewpoints[ix, 1] # elevation viewpoints_flipped[ix, 2] = -viewpoints[ix, 2] # in-plane rotation else: viewpoints[ix, :] = np.inf viewpoints_flipped[ix, :] = np.inf gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) viewindexes_azimuth = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_azimuth_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_elevation = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_elevation_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_rotation = np.zeros((num_objs, self.num_classes), dtype=np.float32) viewindexes_rotation_flipped = np.zeros((num_objs, self.num_classes), dtype=np.float32) overlaps = scipy.sparse.csr_matrix(overlaps) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) viewindexes_azimuth = scipy.sparse.csr_matrix(viewindexes_azimuth) viewindexes_azimuth_flipped = scipy.sparse.csr_matrix( viewindexes_azimuth_flipped) viewindexes_elevation = scipy.sparse.csr_matrix(viewindexes_elevation) viewindexes_elevation_flipped = scipy.sparse.csr_matrix( viewindexes_elevation_flipped) viewindexes_rotation = scipy.sparse.csr_matrix(viewindexes_rotation) viewindexes_rotation_flipped = scipy.sparse.csr_matrix( viewindexes_rotation_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal base_size = 16 ratios = cfg.TRAIN.RPN_ASPECTS scales = cfg.TRAIN.RPN_SCALES anchors = generate_anchors(base_size, ratios, scales) num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_viewpoints': viewpoints, 'gt_viewpoints_flipped': viewpoints_flipped, 'gt_viewindexes_azimuth': viewindexes_azimuth, 'gt_viewindexes_azimuth_flipped': viewindexes_azimuth_flipped, 'gt_viewindexes_elevation': viewindexes_elevation, 'gt_viewindexes_elevation_flipped': viewindexes_elevation_flipped, 'gt_viewindexes_rotation': viewindexes_rotation, 'gt_viewindexes_rotation_flipped': viewindexes_rotation_flipped, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }
def _load_kitti_annotation(self, index): """ Load image and bounding boxes info from txt file in the KITTI format. """ if self._image_set == 'test': lines = [] else: filename = os.path.join(self._data_path, 'training', 'label_2', index + '.txt') lines = [] with open(filename) as f: for line in f: line = line.replace('Van', 'Car') words = line.split() cls = words[0] truncation = float(words[1]) occlusion = int(words[2]) height = float(words[7]) - float(words[5]) if cls in self._class_to_ind and truncation < 0.5 and occlusion < 3 and height > 25: lines.append(line) num_objs = len(lines) boxes = np.zeros((num_objs, 4), dtype=np.float32) gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) for ix, line in enumerate(lines): words = line.split() cls = self._class_to_ind[words[0]] boxes[ix, :] = [float(n) for n in words[4:8]] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 overlaps = scipy.sparse.csr_matrix(overlaps) gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal anchors = generate_anchors() num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }
def _sample_rois(all_rois, all_scores, gt_boxes, gt_phrases, fg_rois_per_image, rois_per_image): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] phrases = gt_phrases[gt_assignment] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Small modification to the original version where we ensure a fixed number of regions are sampled if cfg.SAMPLE_NUM_FIXED_REGIONS: if fg_inds.size > 0 and bg_inds.size > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.size) fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False) bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.size < bg_rois_per_image bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace) elif fg_inds.size > 0: to_replace = fg_inds.size < rois_per_image fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = rois_per_image elif bg_inds.size > 0: to_replace = bg_inds.size < rois_per_image bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = 0 else: import pdb pdb.set_trace() else: # foreground RoIs fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] phrases = phrases[keep_inds] # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_image):] = 0 phrases[int(fg_rois_per_image):, :] = 0 rois = all_rois[keep_inds] roi_scores = all_scores[keep_inds] bbox_target_data = _compute_targets(rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) if cfg.DEBUG_ALL: target_boxes = compute_rois_offset(rois[:, 1:5], bbox_target_data[:, 1:5]) match_boxes = gt_boxes[gt_assignment[keep_inds], :4] print('boxes consistency check') print(target_boxes[:2, :]) print(match_boxes[:2, :]) assert np.linalg.norm(target_boxes - match_boxes) < 0.01 bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data) return labels, rois, roi_scores, bbox_targets, bbox_inside_weights, phrases
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # print("gt_boxes: {}".format(gt_boxes)) # print("all_rois: {}".format(all_rois)) # print("all_scores: {}".format(all_scores)) # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.FLAGS.roi_fg_threshold)[0] # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.FLAGS.roi_bg_threshold_high) & (max_overlaps >= cfg.FLAGS.roi_bg_threshold_low))[0] # print("fg_inds {} bg_inds {} before modifications".format(fg_inds, bg_inds)) # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.size > 0 and bg_inds.size > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.size) fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False) bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.size < bg_rois_per_image bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace) elif fg_inds.size > 0: to_replace = fg_inds.size < rois_per_image fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = rois_per_image elif bg_inds.size > 0: to_replace = bg_inds.size < rois_per_image bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = 0 else: #import pdb #pdb.set_trace() raise Exception() # Raise exception instead of debugging. # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_image):] = 0 rois = all_rois[keep_inds] roi_scores = all_scores[keep_inds] # print("rois: {}".format(rois)) # print("gt_boxes: {}".format(gt_boxes[gt_assignment[keep_inds], :4])) # print("labels {}".format(labels)) bbox_target_data = _compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) # print("bbox_target_data: {}".format(bbox_target_data)) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] #在此处计算标签 """ tf.app.flags.DEFINE_float('roi_fg_threshold', 0.5, "Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)") tf.app.flags.DEFINE_float('roi_bg_threshold_high', 0.5, "Overlap threshold for a ROI to be considered background (class = 0 if overlap in [LO, HI))") tf.app.flags.DEFINE_float('roi_bg_threshold_low', 0.1, "Overlap threshold for a ROI to be considered background (class = 0 if overlap in [LO, HI))") """ # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.FLAGS.roi_fg_threshold)[0] # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.FLAGS.roi_bg_threshold_high) & (max_overlaps >= cfg.FLAGS.roi_bg_threshold_low))[0] # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.size > 0 and bg_inds.size > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.size) fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False) bg_rois_per_image = rois_per_image - fg_rois_per_image # 当为true时,则表明正样本减少了,负样本需要增多 to_replace = bg_inds.size < bg_rois_per_image bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace) # 只有前景时 elif fg_inds.size > 0: to_replace = fg_inds.size < rois_per_image fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = rois_per_image # 只有背景时 elif bg_inds.size > 0: to_replace = bg_inds.size < rois_per_image bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = 0 else: raise Exception() # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_image):] = 0 # 这里的label是指真实类别标签 0-20 rois = all_rois[keep_inds] roi_scores = all_scores[keep_inds] # 第一列是标签,后四列是偏移量 bbox_target_data = _compute_targets(rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): """Same as the anchor target layer in original Fast/er RCNN """ A = num_anchors total_anchors = all_anchors.shape[0] K = total_anchors / num_anchors im_info = im_info[0] # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.FLAGS.rpn_clobber_positives: # assign bg labels first so that positive labels can clobber them # first set the negatives labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.FLAGS.rpn_positive_overlap] = 1 if cfg.FLAGS.rpn_clobber_positives: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0 # subsample positive labels if we have too many num_fg = int(cfg.FLAGS.rpn_fg_fraction * cfg.FLAGS.rpn_batchsize) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.FLAGS.rpn_batchsize - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # only the positive ones have regression targets bbox_inside_weights[labels == 1, :] = np.array( cfg.FLAGS2["bbox_inside_weights"]) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.FLAGS.rpn_positive_weight < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.FLAGS.rpn_positive_weight > 0) & (cfg.FLAGS.rpn_positive_weight < 1)) positive_weights = (cfg.FLAGS.rpn_positive_weight / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.FLAGS.rpn_positive_weight) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def anchor_target_layer(rpn_cls_prob, gt_boxes, gt_points, im_info, _feat_stride, all_anchors, num_anchors, target_name): # def anchor_target_layer(anchor_hw, rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): """Same as the anchor target layer in original Fast/er RCNN """ A = num_anchors total_anchors = all_anchors.shape[0] K = total_anchors / num_anchors hard_mining = cfg.TRAIN.HARD_POSITIVE_MINING # allow boxes to sit over the edge by a small amount # _allowed_border = 0 # follow the SSH setting if target_name == "M3": _allowed_border = 512 else: _allowed_border = 0 # map of shape (..., H, W) height, width = rpn_cls_prob.shape[1:3] # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] # only keep anchors inside anchors # keep away the problem of ‘ValueError: attempt to get argmax of an empty sequence’ during training if inds_inside.shape[0] == 0: # If no anchors inside use whatever anchors we have inds_inside = np.arange(0, total_anchors) # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them # first set the negatives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap if cfg.TRAIN.FORCE_FG_FOR_EACH_GT: gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 ################################### Subsample positive labels ################################## # subsample positive labels if we have too many # num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) # fg_inds = np.where(labels == 1)[0] # if len(fg_inds) > num_fg: # disable_inds = npr.choice( # fg_inds, size=(len(fg_inds) - num_fg), replace=False) # labels[disable_inds] = -1 ##################### Add OHEM for subsample positive labels(Online Hard Examples Mining) ########## num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: if hard_mining: ohem_scores = rpn_cls_prob[:, :, :, num_anchors:] ohem_scores = ohem_scores.reshape((-1, 1)) ohem_scores = ohem_scores[inds_inside] pos_ohem_scores = 1 - ohem_scores[fg_inds] order_pos_ohem_scores = pos_ohem_scores.ravel().argsort()[::-1] ohem_sampled_fgs = fg_inds[order_pos_ohem_scores[:num_fg]] labels[fg_inds] = -1 labels[ohem_sampled_fgs] = 1 else: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 ########################################## End ################################################## ##########################################Subsample negative labels ############################# # subsample negative labels if we have too many # num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) # bg_inds = np.where(labels == 0)[0] # if len(bg_inds) > num_bg: # disable_inds = npr.choice( # bg_inds, size=(len(bg_inds) - num_bg), replace=False) # labels[disable_inds] = -1 ################# Add OHEM for subsample negative labels(Online Hard Examples Mining) ############ num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: if not hard_mining: # randomly sub-sampling negatives disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 else: # sort ohem scores ohem_scores = rpn_cls_prob[:, :, :, num_anchors:] ohem_scores = ohem_scores.reshape((-1, 1)) ohem_scores = ohem_scores[inds_inside] neg_ohem_scores = ohem_scores[bg_inds] order_neg_ohem_scores = neg_ohem_scores.ravel().argsort()[::-1] ohem_sampled_bgs = bg_inds[order_neg_ohem_scores[:num_bg]] labels[bg_inds] = -1 labels[ohem_sampled_bgs] = 0 ########################################## End ############################################## # Compute boxes regression targets bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_bboxes_targets(anchors, gt_boxes[argmax_overlaps, :]) # Compute kpoints offset targets kpoints_targets = np.zeros((len(inds_inside), 10), dtype=np.float32) kpoints_targets = _compute_kpoints_targets(anchors, gt_points[argmax_overlaps, :10]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # only the positive ones have regression targets bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) kpoints_inside_weights = np.zeros((len(inds_inside), 10), dtype=np.float32) # only the positive ones have regression targets kpoints_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_KPOINTS_POSITIVE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) kpoints_outside_weights = np.zeros((len(inds_inside), 10), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples kpoints_positive_weights = np.ones((1, 10)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) kpoints_positive_weights = (cfg.TRAIN.RPN_KPOINTS_POSITIVE_WEIGHTS / np.sum(labels == 1)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights kpoints_outside_weights[labels == 1, :] = kpoints_positive_weights if gt_points.size > 0: gt_points_flag = gt_points[argmax_overlaps, 10] gt_points_flag = np.array(gt_points_flag, np.int32) kpoints_inside_weights[gt_points_flag == 0, :] = np.array(cfg.TRAIN.RPN_KPOINTS_WEIGHTS_NON) kpoints_outside_weights[gt_points_flag == 0, :] = np.array(cfg.TRAIN.RPN_KPOINTS_WEIGHTS_NON) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) kpoints_targets = _unmap(kpoints_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) kpoints_inside_weights = _unmap(kpoints_inside_weights, total_anchors, inds_inside, fill=0) kpoints_outside_weights = _unmap(kpoints_outside_weights, total_anchors, inds_inside, fill=0) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) # kpoints_targets kpoints_targets = kpoints_targets.reshape((1, height, width, A * 10)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights # kpoints_inside_weights kpoints_inside_weights = kpoints_inside_weights.reshape((1, height, width, A * 10)) rpn_kpoints_inside_weights = kpoints_inside_weights # kpoints_outside_weights kpoints_outside_weights = kpoints_outside_weights.reshape((1, height, width, A * 10)) rpn_kpoints_outside_weights = kpoints_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, kpoints_targets, \ rpn_kpoints_inside_weights, rpn_kpoints_outside_weights
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ """ overlaps: (rois x gt_boxes) 计算proposal boxes与ground truth boxes的IoU """ overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) # labels保存ground truth boxes的类型标签 labels = gt_boxes[gt_assignment, 4] """ Select foreground RoIs as those with >= FG_THRESH overlap 根据IoU和前景阈值,计算所有前景的proposal boxes,也就是包含物体的proposal boxes。 """ fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] """ Guard against the case when an image has fewer than fg_rois_per_image Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) 根据IoU和背景阈值,计算所有背景的proposal boxes。 """ bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] """ Small modification to the original version where we ensure a fixed number of regions are sampled 选取对应数据量的前景和背景proposal boxes。 """ if fg_inds.size > 0 and bg_inds.size > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.size) fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False) bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.size < bg_rois_per_image bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace) elif fg_inds.size > 0: to_replace = fg_inds.size < rois_per_image fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = rois_per_image elif bg_inds.size > 0: to_replace = bg_inds.size < rois_per_image bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = 0 else: import pdb pdb.set_trace() """ The indices that we're selecting (both fg and bg) 连接一下前景和背景proposal boxes的indexes """ keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] """ Clamp labels for the background RoIs to 0 将背景proposal boxes的label设置为0 """ labels[int(fg_rois_per_image):] = 0 """ 选取一下留下来的rois和roi_scores。 """ rois = all_rois[keep_inds] roi_scores = all_scores[keep_inds] """ 计算一下留下来的proposal boxes与对应ground truth boxes之间的偏移量。 bbox_target_data的shape为: (len(keep_inds), 5),其中第2维中第一个元素为label,后四个元素为偏移量。 """ bbox_target_data = _compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) """ bbox_targets的shape为N × 4k,N代表proposal boxes的个数,4k则是指对每一个类别都有四个值对应偏移量。 比如如果一个proposal box的类别是1,则bbox_targets对应该proposal box的行第4元素~第7个元素代表偏移量,其他值均为0。特殊的,对于背景, 对应的偏移量为0。 """ bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def _load_pascal_annotation(self, index): """ Load image and bounding boxes info from XML file in the PASCAL VOC format. """ filename = os.path.join(self._data_path, 'Annotations', index + '.xml') # print 'Loading: {}'.format(filename) def get_data_from_tag(node, tag): return node.getElementsByTagName(tag)[0].childNodes[0].data with open(filename) as f: data = minidom.parseString(f.read()) objs = data.getElementsByTagName('object') num_objs = len(objs) boxes = np.zeros((num_objs, 4), dtype=np.uint16) gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) # Load object bounding boxes into a data frame. for ix, obj in enumerate(objs): # Make pixel indexes 0-based x1 = float(get_data_from_tag(obj, 'xmin')) - 1 y1 = float(get_data_from_tag(obj, 'ymin')) - 1 x2 = float(get_data_from_tag(obj, 'xmax')) - 1 y2 = float(get_data_from_tag(obj, 'ymax')) - 1 name = str(get_data_from_tag(obj, "name")).lower().strip() if name in self._classes: cls = self._class_to_ind[name] else: cls = 0 boxes[ix, :] = [x1, y1, x2, y2] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 overlaps = scipy.sparse.csr_matrix(overlaps) gt_subclasses = np.zeros((num_objs), dtype=np.int32) gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal anchors = generate_anchors() num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }
def _sample_rois(all_rois, gt_boxes, dontcare_areas, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) gt_num = gt_boxes.shape[0] overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] if DEBUG: print 'max_overlaps', overlaps[:-gt_num, :].max(axis=0) # preclude dontcare areas ignore_inds = np.empty(shape=(0), dtype=int) if dontcare_areas is not None and dontcare_areas.shape[0] > 0: # intersec shape is D x R intersecs = bbox_intersections( np.ascontiguousarray(dontcare_areas, dtype=np.float), # D x 4 np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float) # R x 4 ) intersecs_sum = intersecs.sum(axis=0) # R x 1 ignore_inds = np.append( ignore_inds, np.where( intersecs_sum > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI)[0]) # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] fg_inds = np.setdiff1d(fg_inds, ignore_inds) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] bg_inds = np.setdiff1d(bg_inds, ignore_inds) # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size)) # fg_rois_per_this_image = int(min(bg_inds.size, fg_inds.size)) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # bg_rois_per_this_image = fg_rois_per_this_image # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[fg_rois_per_this_image:] = 0 rois = all_rois[keep_inds] # bbox_target_data = np.zeros((rois.shape[0], 5)) # _compute_targets(all_rois[fg_inds, 1:5], gt_boxes[gt_assignment[fg_inds], :4], labels[:len(fg_inds)]) bbox_target_data = _compute_targets(rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, bbox_targets, bbox_inside_weights
def _load_pascal3d_voxel_exemplar_annotation(self, index): """ Load image and bounding boxes info from txt file in the pascal subcategory exemplar format. """ if self._image_set == 'val': return self._load_pascal_annotation(index) filename = os.path.join(self._pascal3d_path, cfg.SUBCLS_NAME, index + '.txt') assert os.path.exists(filename), \ 'Path does not exist: {}'.format(filename) # the annotation file contains flipped objects lines = [] lines_flipped = [] with open(filename) as f: for line in f: words = line.split() subcls = int(words[1]) is_flip = int(words[2]) if subcls != -1: if is_flip == 0: lines.append(line) else: lines_flipped.append(line) num_objs = len(lines) # store information of flipped objects assert (num_objs == len(lines_flipped) ), 'The number of flipped objects is not the same!' gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32) for ix, line in enumerate(lines_flipped): words = line.split() subcls = int(words[1]) gt_subclasses_flipped[ix] = subcls boxes = np.zeros((num_objs, 4), dtype=np.float32) gt_classes = np.zeros((num_objs), dtype=np.int32) gt_subclasses = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32) subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32) for ix, line in enumerate(lines): words = line.split() cls = self._class_to_ind[words[0]] subcls = int(words[1]) # Make pixel indexes 0-based boxes[ix, :] = [float(n) - 1 for n in words[3:7]] gt_classes[ix] = cls gt_subclasses[ix] = subcls overlaps[ix, cls] = 1.0 subindexes[ix, cls] = subcls subindexes_flipped[ix, cls] = gt_subclasses_flipped[ix] overlaps = scipy.sparse.csr_matrix(overlaps) subindexes = scipy.sparse.csr_matrix(subindexes) subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped) if cfg.IS_RPN: if cfg.IS_MULTISCALE: # compute overlaps between grid boxes and gt boxes in multi-scales # rescale the gt boxes boxes_all = np.zeros((0, 4), dtype=np.float32) for scale in cfg.TRAIN.SCALES: boxes_all = np.vstack((boxes_all, boxes * scale)) gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES)) # compute grid boxes s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] boxes_grid, _, _ = get_boxes_grid(image_height, image_width) # compute overlap overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float)) # check how many gt boxes are covered by grids if num_objs != 0: index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES)) max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes_all == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) index_covered = np.unique(index[fg_inds]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[index_covered] == i)[0]) else: assert len(cfg.TRAIN.SCALES_BASE) == 1 scale = cfg.TRAIN.SCALES_BASE[0] feat_stride = 16 # faster rcnn region proposal base_size = 16 ratios = [3.0, 2.0, 1.5, 1.0, 0.75, 0.5, 0.25] scales = 2**np.arange(1, 6, 0.5) anchors = generate_anchors(base_size, ratios, scales) num_anchors = anchors.shape[0] # image size s = PIL.Image.open(self.image_path_from_index(index)).size image_height = s[1] image_width = s[0] # height and width of the heatmap height = np.round((image_height * scale - 1) / 4.0 + 1) height = np.floor((height - 1) / 2 + 1 + 0.5) height = np.floor((height - 1) / 2 + 1 + 0.5) width = np.round((image_width * scale - 1) / 4.0 + 1) width = np.floor((width - 1) / 2.0 + 1 + 0.5) width = np.floor((width - 1) / 2.0 + 1 + 0.5) # gt boxes gt_boxes = boxes * scale # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # compute overlap overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) # check how many gt boxes are covered by anchors if num_objs != 0: max_overlaps = overlaps_grid.max(axis=0) fg_inds = [] for k in xrange(1, self.num_classes): fg_inds.extend( np.where((gt_classes == k) & ( max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1])) [0]) for i in xrange(self.num_classes): self._num_boxes_all[i] += len( np.where(gt_classes == i)[0]) self._num_boxes_covered[i] += len( np.where(gt_classes[fg_inds] == i)[0]) return { 'boxes': boxes, 'gt_classes': gt_classes, 'gt_subclasses': gt_subclasses, 'gt_subclasses_flipped': gt_subclasses_flipped, 'gt_overlaps': overlaps, 'gt_subindexes': subindexes, 'gt_subindexes_flipped': subindexes_flipped, 'flipped': False }
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. gt_boxes:[6, 8], all_rois[2000, 8] """ print( "------------------------------------------------------------------------------------------------------------\r\n" ) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), # all_rois[1:5]是box [5:] 是dis np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) # gt_boxes[:4]是box [5:] 是dis # print(overlaps.shape) (2000, 3) gt_assignment = overlaps.argmax( axis=1) # 1表示行最大值地方 gt_assignment.shape = 2000 max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] print(max_overlaps, "\r\n", gt_boxes[gt_assignment, 5:]) # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.FLAGS.roi_fg_threshold)[0] # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.FLAGS.roi_bg_threshold_high) & (max_overlaps >= cfg.FLAGS.roi_bg_threshold_low))[0] # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.size > 0 and bg_inds.size > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.size) fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False) bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.size < bg_rois_per_image bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace) elif fg_inds.size > 0: to_replace = fg_inds.size < rois_per_image fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = rois_per_image elif bg_inds.size > 0: to_replace = bg_inds.size < rois_per_image bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = 0 else: # fg_inds = [] # bg_inds = cfg.bg_inds # # print("fg_rois_per_image", type(fg_rois_per_image), fg_rois_per_image) # fg_rois_per_image = 0 # pass # # print(fg_inds.size, bg_inds.size) import pdb pdb.set_trace() # pdb调试程序,程序停止。 # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: # print("check", type(labels), labels, "\r\n python", keep_inds) labels = labels[np.array(keep_inds, dtype=np.int)] # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_image):] = 0 rois = all_rois[keep_inds] # (256, 8) roi_scores = all_scores[keep_inds] # bbox_target_data.shape (256, 5) bbox_target_data = _compute_targets(rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) gt_dis_label = gt_boxes[gt_assignment[keep_inds], 5:] # (256, 3)这个是经过处理后的所有dis 标准label # print(gt_dis_label) # bbox_targets.shape (256, 40) dis_targets (256, 30) bbox_targets, dis_targets, bbox_inside_weights, dis_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, gt_dis_label, num_classes) return labels, rois, roi_scores, bbox_targets, bbox_inside_weights, dis_inside_weights, dis_targets