def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes, fast_iou_positive_threshld): """Generate a random sample of RoIs comprising foreground and background examples. all_rois shape is [-1, 4] gt_boxes shape is [-1, 5]. that is [x1, y1, x2, y2, label] """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois, dtype=np.float), np.ascontiguousarray(gt_boxes[:, :-1], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, -1] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= fast_iou_positive_threshld)[0] # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < fast_iou_positive_threshld) & ( max_overlaps >= cfgs.FAST_RCNN_IOU_NEGATIVE_THRESHOLD))[0] # print("first fileter, fg_size: {} || bg_size: {}".format(fg_inds.shape, bg_inds.shape)) # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_this_image), replace=False) # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_this_image), replace=False) # print("second fileter, fg_size: {} || bg_size: {}".format(fg_inds.shape, bg_inds.shape)) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_this_image):] = 0 rois = all_rois[keep_inds] bbox_target_data = _compute_targets( rois, gt_boxes[gt_assignment[keep_inds], :-1], labels) bbox_targets = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, bbox_targets
def iou_rotate(boxes1, boxes2): boxes1_convert = forward_convert(boxes1, False) # boxes2_convert = forward_convert(boxes2, False) boxes1_h = get_horizen_minAreaRectangle(boxes1_convert) # boxes2_h = get_horizen_minAreaRectangle(boxes2_convert) iou_h = bbox_overlaps(np.ascontiguousarray(boxes1_h, dtype=np.float), np.ascontiguousarray(boxes2, dtype=np.float)) # argmax_overlaps_inds = np.argmax(iou_h, axis=1) # target_boxes = boxes2[argmax_overlaps_inds] # # delta_theta = np.abs(boxes1[:, -1] - target_boxes[:, -1]) # iou_h[delta_theta > 10] = 0 # # argmax_overlaps_inds = np.argmax(iou_h, axis=1) # max_overlaps = iou_h[np.arange(iou_h.shape[0]), argmax_overlaps_inds] # indices = max_overlaps < 0.7 # iou_h[indices] = 0 # boxes1 = boxes1[indices] # # overlaps = get_iou_matrix(np.ascontiguousarray(boxes1, dtype=np.float32), # np.ascontiguousarray(boxes2, dtype=np.float32)) # # iou_r = np.zeros_like(iou_h) # iou_r[indices] = overlaps return iou_h
def anchor_target_layer(gt_boxes_h, gt_boxes_r, anchors, gpu_id=0): anchor_states = np.zeros((anchors.shape[0],)) labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM)) if gt_boxes_r.shape[0]: # [N, M] if cfgs.METHOD == 'H': overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes_h, dtype=np.float)) else: overlaps = rbbx_overlaps(np.ascontiguousarray(anchors, dtype=np.float32), np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32), gpu_id) # overlaps = get_iou_matrix(np.ascontiguousarray(anchors, dtype=np.float32), # np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32)) argmax_overlaps_inds = np.argmax(overlaps, axis=1) max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds] # compute box regression targets target_boxes = gt_boxes_r[argmax_overlaps_inds] if cfgs.USE_ANGLE_COND: if cfgs.METHOD == 'R': delta_theta = np.abs(target_boxes[:, -2] - anchors[:, -1]) theta_indices = delta_theta < 15 positive_indices = (max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD) & theta_indices else: positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & (max_overlaps < cfgs.IOU_POSITIVE_THRESHOLD) else: positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & ~positive_indices anchor_states[ignore_indices] = -1 anchor_states[positive_indices] = 1 # compute target class labels labels[positive_indices, target_boxes[positive_indices, -1].astype(int) - 1] = 1 else: # no annotations? then everything is background target_boxes = np.zeros((anchors.shape[0], gt_boxes_r.shape[1])) if cfgs.METHOD == 'H': x_c = (anchors[:, 2] + anchors[:, 0]) / 2 y_c = (anchors[:, 3] + anchors[:, 1]) / 2 h = anchors[:, 2] - anchors[:, 0] + 1 w = anchors[:, 3] - anchors[:, 1] + 1 theta = -90 * np.ones_like(x_c) anchors = np.vstack([x_c, y_c, w, h, theta]).transpose() target_delta = bbox_transform.rbbox_transform(ex_rois=anchors, gt_rois=target_boxes) return np.array(labels, np.float32), np.array(target_delta, np.float32), \ np.array(anchor_states, np.float32), np.array(target_boxes, np.float32)
def wending(new_center, old_cetner, k): overlaps = bbox_overlaps(np.ascontiguousarray(new_center, dtype=np.float), np.ascontiguousarray(old_cetner, dtype=np.float)) dis = [] for i in range(k): dis.append(1 - overlaps[i, i]) if sum(dis) <= 0.000001: return False else: return True
def anchor_target_layer(gt_boxes, anchors): """ :param gt_boxes: np.array of shape (M, 5) for (x1, y1, x2, y2, label). :param img_shape: :param anchors: np.array of annotations of shape (N, 4) for (x1, y1, x2, y2). :return: """ anchor_states = np.zeros((anchors.shape[0], )) labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM)) if gt_boxes.shape[0]: # [N, M] overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps_inds = np.argmax(overlaps, axis=1) max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds] positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & ~positive_indices anchor_states[ignore_indices] = -1 anchor_states[positive_indices] = 1 # compute box regression targets target_boxes = gt_boxes[argmax_overlaps_inds] # compute target class labels labels[positive_indices, target_boxes[positive_indices, 4].astype(int) - 1] = 1 else: # no annotations? then everything is background target_boxes = np.zeros((anchors.shape[0], gt_boxes.shape[1])) target_delta = bbox_transform.bbox_transform(ex_rois=anchors, gt_rois=target_boxes) return np.array(labels, np.float32), np.array(target_delta, np.float32), np.array( anchor_states, np.float32)
def cluster(boxes, k): center_id = np.random.choice(np.arange(len(boxes)), k, replace=False) new_center_boxes = [boxes[i] for i in center_id] old_center_boxes = [np.zeros_like(box) for box in new_center_boxes] i = 0 while wending(new_center_boxes, old_center_boxes, k): overlaps = bbox_overlaps( np.ascontiguousarray(boxes, dtype=np.float), np.ascontiguousarray(new_center_boxes, dtype=np.float)) argmax_id = np.argmax(overlaps, axis=1) for i in range(k): cluster_i_box = boxes[argmax_id == i] old_center_boxes[i] = new_center_boxes[i] new_center_boxes[i] = np.mean(cluster_i_box, axis=0) # if i % 1 == 0: # print ("i", i) if i > 1000000: break i += 1 return new_center_boxes
def anchor_target_layer(gt_boxes, img_shape, all_anchors, is_restrict_bg=False): """Same as the anchor target layer in original Fast/er RCNN """ total_anchors = all_anchors.shape[0] img_h, img_w = img_shape[1], img_shape[2] gt_boxes = gt_boxes[:, :-1] # remove class label # allow boxes to sit over the edge by a small amount _allowed_border = 0 # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < img_w + _allowed_border) & # width (all_anchors[:, 3] < img_h + _allowed_border) # height )[0] anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care # 首先将所有的label都定义为 - 1 # 其label长度为在图像内部的Anchor的数目值 labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) # 计算每一行的重叠率最大的值所在的索引,行数则为在图像大小范围内的所有Anchors数目(每一个Anchor与哪一个ground truth框重叠最大 argmax_overlaps = overlaps.argmax(axis=1) #取出与相关的Anchors重叠最大的ground truth的那个值 max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] #计算出每一列的最大值的索引,一共有ground truth目标数目个列(每一个ground truth与哪一个Anchor重叠最大) gt_argmax_overlaps = overlaps.argmax(axis=0) #取出与ground truth最大重叠的Anchor的重叠率的数值 gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # 如果每一个最大重叠框与其最大的ground truth框的重叠率小于RPN_IOU_NEG 的重叠率,则这个框的label为背景 if not cfgs.TRAIN_RPN_CLOOBER_POSITIVES: labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0 # 如果每一个ground truth框对应的anchor的重叠率大于RPN_IOU_POS 的重叠率,则这个框的label为目标 labels[gt_argmax_overlaps] = 1 # 如果每一个anchor对应的最大重叠框的重叠率大于RPN_POS的重叠率阈值,则也认为其为目标 labels[max_overlaps >= cfgs.RPN_IOU_POSITIVE_THRESHOLD] = 1 if cfgs.TRAIN_RPN_CLOOBER_POSITIVES: labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0 # 预先设定的前景的目标数目 num_fg = int(cfgs.RPN_MINIBATCH_SIZE * cfgs.RPN_POSITIVE_RATE) fg_inds = np.where(labels == 1)[0] # 所有label为1的包含目标的点 if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # 如果label等于目标的数目大于所预先设定的目标数目的值,就随机的将部分label设定为-1,不参与计算 num_bg = cfgs.RPN_MINIBATCH_SIZE - np.sum(labels == 1) if is_restrict_bg: num_bg = max(num_bg, num_fg * 1.5) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 # 如果背景的label数目大于所设定的背景数目,则将部分的背景标签设置为 - 1,不参与计算。 # 如果小于,则不做任何改变,保留所有背景的相关标签为0 bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) # 这一块输入的参数为所有的Anchors以及与每一个anchor对应的重叠率最大的那个ground truth目标框所对应的坐标 # bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) # 其返回值为每一个在图像内的anchor与其对应的具有最大重叠率的ground truth框之间的映射关系,也就是对其进行编码的过程 # # # 因为一直在计算中都是针对于所有在图像内的框进行运算,并没有考虑到在图像外的框,但是在最终的计算中,针对的是所有的anchor, # 因此需要将处理过的与原始的进行融合 # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) # labels = labels.reshape((1, height, width, A)) rpn_labels = labels.reshape((-1, 1)) # bbox_targets bbox_targets = bbox_targets.reshape((-1, 4)) rpn_bbox_targets = bbox_targets # 最后返回的为编码后的label,以及映射因子矩阵 return rpn_labels, rpn_bbox_targets
def anchor_target_layer(gt_boxes, img_shape, all_anchors, is_restrict=False): """ get target anchor the same as Fast/er RCNN :param gt_boxes: :param img_shape: :param all_anchors: :param is_restrict: :return: """ anchors_num = all_anchors.shape[0] img_height, img_width = img_shape[1], img_shape[2] gt_boxes = gt_boxes[:, :-1] # remove class label # the number of a small amount boxes allow to sit over the edge allow_border = 0 # only keep anchors inside the image indices_inside = np.where( (all_anchors[:, 0] >= -allow_border) & # left_up_x (all_anchors[:, 1] >= -allow_border) & # left_up_y (all_anchors[:, 2] < img_width + allow_border) & # right_down_x (all_anchors[:, 3] < img_height + allow_border) # right_down_y )[0] anchors = all_anchors[indices_inside, :] # label: 1 -> positive, 0 -> negative, -1 -> dont care labels = np.empty((len(indices_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gtbox overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(indices_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfgs.TRAIN_RPN_CLOOBER_POSITIVES: labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0 labels[gt_argmax_overlaps] = 1 labels[max_overlaps >= cfgs.RPN_IOU_POSITIVE_THRESHOLD] = 1 if cfgs.TRAIN_RPN_CLOOBER_POSITIVES: labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0 # reference paper(Faster RCNN) balance positive and negative ratio # num foreground of RPN num_fg = int(cfgs.RPN_MINIBATCH_SIZE * cfgs.RPN_POSITIVE_RATE) fg_indices = np.where(labels == 1)[0] if len(fg_indices) > num_fg: disable_indices = np.random.choice(fg_indices, size=(len(fg_indices) - num_fg), replace=False) labels[disable_indices] = -1 # num backgound of RPN num_bg = cfgs.RPN_MINIBATCH_SIZE - np.sum(labels == 1) if is_restrict: num_bg = max(num_bg, num_fg * 1.5) bg_indices = np.where(labels == 0)[0] if len(bg_indices) > num_bg: disable_indices = np.random.choice(bg_indices, size=(len(bg_indices) - num_bg), replace=False) labels[disable_indices] = -1 bbox_targets = compute_targets(anchors, gt_boxes[argmax_overlaps, :]) # map up to original set of anchors labels = unmap_anchor(labels, anchors_num, indices_inside, fill=-1) bbox_targets = unmap_anchor(bbox_targets, anchors_num, indices_inside, fill=0) rpn_labels = labels.reshape((-1, 1)) bbox_targets = bbox_targets.reshape((-1, 4)) rpn_bbox_targets = bbox_targets return rpn_labels, rpn_bbox_targets
def anchor_target_layer(gt_boxes, img_shape, all_anchors, is_restrict_bg=False): """Same as the anchor target layer in original Fast/er RCNN """ total_anchors = all_anchors.shape[0] img_h, img_w = img_shape[1], img_shape[2] gt_boxes = gt_boxes[:, :-1] # remove class label # allow boxes to sit over the edge by a small amount _allowed_border = 0 # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < img_w + _allowed_border) & # width (all_anchors[:, 3] < img_h + _allowed_border) # height )[0] anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfgs.TRAIN_RPN_CLOOBER_POSITIVES: labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0 labels[gt_argmax_overlaps] = 1 labels[max_overlaps >= cfgs.RPN_IOU_POSITIVE_THRESHOLD] = 1 if cfgs.TRAIN_RPN_CLOOBER_POSITIVES: labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0 num_fg = int(cfgs.RPN_MINIBATCH_SIZE * cfgs.RPN_POSITIVE_RATE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 num_bg = cfgs.RPN_MINIBATCH_SIZE - np.sum(labels == 1) if is_restrict_bg: num_bg = max(num_bg, num_fg * 1.5) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) # labels = labels.reshape((1, height, width, A)) rpn_labels = labels.reshape((-1, 1)) # bbox_targets bbox_targets = bbox_targets.reshape((-1, 4)) rpn_bbox_targets = bbox_targets return rpn_labels, rpn_bbox_targets
def anchor_target_layer(gt_boxes_h_batch, gt_boxes_r_batch, gt_encode_label_batch, anchor_batch, gpu_id=0): all_labels, all_target_delta, all_anchor_states, all_target_boxes, all_target_encode_label = [], [], [], [], [] for i in range(cfgs.BATCH_SIZE): anchors = np.array(anchor_batch[i], np.float32) gt_boxes_h = gt_boxes_h_batch[i, :, :] gt_boxes_r = gt_boxes_r_batch[i, :, :] gt_encode_label = gt_encode_label_batch[i, :, :] anchor_states = np.zeros((anchors.shape[0], )) labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM)) if gt_boxes_r.shape[0]: # [N, M] if cfgs.METHOD == 'H': overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes_h, dtype=np.float)) else: overlaps = rbbx_overlaps( np.ascontiguousarray(anchors, dtype=np.float32), np.ascontiguousarray(gt_boxes_r[:, :-1], dtype=np.float32), gpu_id) argmax_overlaps_inds = np.argmax(overlaps, axis=1) max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds] # compute box regression targets target_boxes = gt_boxes_r[argmax_overlaps_inds] target_encode_label = gt_encode_label[argmax_overlaps_inds] positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & ~positive_indices anchor_states[ignore_indices] = -1 anchor_states[positive_indices] = 1 # compute target class labels labels[positive_indices, target_boxes[positive_indices, -1].astype(int) - 1] = 1 else: # no annotations? then everything is background target_boxes = np.zeros((anchors.shape[0], gt_boxes_r.shape[1])) target_encode_label = np.zeros( (anchors.shape[0], gt_encode_label.shape[1])) if cfgs.METHOD == 'H': x_c = (anchors[:, 2] + anchors[:, 0]) / 2 y_c = (anchors[:, 3] + anchors[:, 1]) / 2 h = anchors[:, 2] - anchors[:, 0] + 1 w = anchors[:, 3] - anchors[:, 1] + 1 theta = -90 * np.ones_like(x_c) anchors = np.vstack([x_c, y_c, w, h, theta]).transpose() if cfgs.ANGLE_RANGE == 180: anchors = coordinate_present_convert(anchors, mode=-1) target_boxes = coordinate_present_convert(target_boxes, mode=-1) target_delta = bbox_transform.rbbox_transform(ex_rois=anchors, gt_rois=target_boxes) all_labels.append(labels) all_target_delta.append(target_delta) all_anchor_states.append(anchor_states) all_target_boxes.append(target_boxes) all_target_encode_label.append(target_encode_label) return np.array(all_labels, np.float32), np.array(all_target_delta, np.float32)[:, :, :-1], \ np.array(all_anchor_states, np.float32), np.array(all_target_boxes, np.float32), \ np.array(all_target_encode_label, np.float32)
def evaluate_recall(self, candidate_boxes=None, thresholds=None, area='all', limit=None): """Evaluate detection proposal recall metrics. Returns: results: dictionary of results with keys 'ar': average recall 'recalls': vector recalls at each IoU overlap threshold 'thresholds': vector of IoU overlap thresholds 'gt_overlaps': vector of all ground-truth overlaps """ # Record max overlap value for each gt box # Return vector of overlap values areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7 } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2], # 512-inf ] assert areas.has_key(area), 'unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for i in xrange(self.num_images): # Checking for max_overlaps == 1 avoids including crowd annotations # (...pretty hacking :/) max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max( axis=1) gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] gt_boxes = self.roidb[i]['boxes'][gt_inds, :] gt_areas = self.roidb[i]['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) if candidate_boxes is None: # If candidate_boxes is not supplied, the default is to use the # non-ground-truth boxes from this roidb non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0] boxes = self.roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in xrange(gt_boxes.shape[0]): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert (gt_ovr >= 0) # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert (_gt_overlaps[j] == gt_ovr) # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { 'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps }
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. all_rois shape is [-1, 4] gt_boxes shape is [-1, 5]. that is [x1, y1, x2, y2, label] """ # overlaps: (rois x gt_boxes) # clw note:计算所有的RPN产生的ROI与所有的ground truth的目标框的重叠率 overlaps = bbox_overlaps( np.ascontiguousarray(all_rois, dtype=np.float), np.ascontiguousarray(gt_boxes[:, :-1], dtype=np.float)) # 得到与每一个roi最大重叠的gt_box 的框的索引 以及 重叠率 gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) # 获得相对应的类别标签 labels = gt_boxes[gt_assignment, -1] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where( max_overlaps >= cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD)[0] # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where( (max_overlaps < cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD) & (max_overlaps >= cfgs.FAST_RCNN_IOU_NEGATIVE_THRESHOLD))[0] # print("first fileter, fg_size: {} || bg_size: {}".format(fg_inds.shape, bg_inds.shape)) # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) # 以最小的 fg_size 作为fg_rois_per_this_image # Sample foreground regions without replacement if fg_inds.size > 0: # 如果有目标 fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_this_image), replace=False) # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_this_image), replace=False) # print("second fileter, fg_size: {} || bg_size: {}".format(fg_inds.shape, bg_inds.shape)) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # 选择出来的fg以及bg是在相关的阈值基础上得到的,bg的选取有一个最低的阈值 # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_this_image):] = 0 rois = all_rois[keep_inds] # 计算bbox目标数据,输入都是对应的keep_inds所对应的roi,gt_box,labels bbox_target_data = _compute_targets( rois, gt_boxes[gt_assignment[keep_inds], :-1], labels) # 其返回值为 roi与gt_box 之间映射的因子矩阵以及对应的类别信息, # 下面的函数将为每一个非background的类写入相关的四个坐标因此t, # 这里,由于num_classes是从tf-record 中直接得到的,因此类数量是包含background的,因此比真实的要多出一类 bbox_targets = _get_bbox_regression_labels(bbox_target_data, num_classes) # 返回值后期计算的labels(这里为具体的类),rois为要保留的roi,bbox_targets 为每一个具体的类 # (一共的NUM_CLASS个类,每一个类对应四个坐标点)对应的坐标映射矩阵 return labels, rois, bbox_targets
def sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """ Generate a random sample of RoIs comprising foreground and background examples. :param all_rois: rois shape is [-1, 4] :param gt_boxes: gt_boxes shape is [-1, 5]. that is [x1, y1, x2, y2, label] :param fg_rois_per_image: :param rois_per_image: :param num_classes: object_classes + 1(background) :return: """ # overlaps rois gt_boxes overlaps = bbox_overlaps( np.ascontiguousarray(all_rois, dtype=np.float), np.ascontiguousarray(gt_boxes[:, :-1], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, -1] # Select foreground RoIs as those with >= FG_THRESH overlap fg_indices = np.where( max_overlaps >= cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD)[0] # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_indices = np.where( (max_overlaps < cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD) & (max_overlaps >= cfgs.FAST_RCNN_IOU_NEGATIVE_THRESHOLD))[0] fg_rois_per_this_image = min(fg_rois_per_image, fg_indices.size) # Sample foreground regions without replacement if fg_indices.size > 0: fg_indices = np.random.choice(fg_indices, size=int(fg_rois_per_this_image), replace=False) # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_indices.size) # Sample background regions without replacement if bg_indices.size > 0: bg_indices = np.random.choice(bg_indices, size=int(bg_rois_per_this_image), replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_indices, bg_indices) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 # positive -> 1 , negative -> 0 labels[int(fg_rois_per_this_image ):] = 0 # [i+1 for i in range(cfgs.CLASS_NUM)] + [0] rois = all_rois[keep_inds] bbox_target_data = compute_targets( ex_rois=rois, gt_rois=gt_boxes[gt_assignment[keep_inds], :-1], # bbox labels=labels) # labels bbox_targets = get_bbox_regression_labels( bbox_target_data, num_classes) # (rois.shape[0], num_classes) return labels, rois, bbox_targets